65#define DEBUG_TYPE "openmp-ir-builder"
72 cl::desc(
"Use optimistic attributes describing "
73 "'as-if' properties of runtime calls."),
77 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
78 cl::desc(
"Factor for the unroll threshold to account for code "
79 "simplifications still taking place"),
90 if (!IP1.isSet() || !IP2.isSet())
92 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
97 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
98 case OMPScheduleType::UnorderedStaticChunked:
99 case OMPScheduleType::UnorderedStatic:
100 case OMPScheduleType::UnorderedDynamicChunked:
101 case OMPScheduleType::UnorderedGuidedChunked:
102 case OMPScheduleType::UnorderedRuntime:
103 case OMPScheduleType::UnorderedAuto:
104 case OMPScheduleType::UnorderedTrapezoidal:
105 case OMPScheduleType::UnorderedGreedy:
106 case OMPScheduleType::UnorderedBalanced:
107 case OMPScheduleType::UnorderedGuidedIterativeChunked:
108 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
109 case OMPScheduleType::UnorderedSteal:
110 case OMPScheduleType::UnorderedStaticBalancedChunked:
111 case OMPScheduleType::UnorderedGuidedSimd:
112 case OMPScheduleType::UnorderedRuntimeSimd:
113 case OMPScheduleType::OrderedStaticChunked:
114 case OMPScheduleType::OrderedStatic:
115 case OMPScheduleType::OrderedDynamicChunked:
116 case OMPScheduleType::OrderedGuidedChunked:
117 case OMPScheduleType::OrderedRuntime:
118 case OMPScheduleType::OrderedAuto:
119 case OMPScheduleType::OrderdTrapezoidal:
120 case OMPScheduleType::NomergeUnorderedStaticChunked:
121 case OMPScheduleType::NomergeUnorderedStatic:
122 case OMPScheduleType::NomergeUnorderedDynamicChunked:
123 case OMPScheduleType::NomergeUnorderedGuidedChunked:
124 case OMPScheduleType::NomergeUnorderedRuntime:
125 case OMPScheduleType::NomergeUnorderedAuto:
126 case OMPScheduleType::NomergeUnorderedTrapezoidal:
127 case OMPScheduleType::NomergeUnorderedGreedy:
128 case OMPScheduleType::NomergeUnorderedBalanced:
129 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
130 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
131 case OMPScheduleType::NomergeUnorderedSteal:
132 case OMPScheduleType::NomergeOrderedStaticChunked:
133 case OMPScheduleType::NomergeOrderedStatic:
134 case OMPScheduleType::NomergeOrderedDynamicChunked:
135 case OMPScheduleType::NomergeOrderedGuidedChunked:
136 case OMPScheduleType::NomergeOrderedRuntime:
137 case OMPScheduleType::NomergeOrderedAuto:
138 case OMPScheduleType::NomergeOrderedTrapezoidal:
146 SchedType & OMPScheduleType::MonotonicityMask;
147 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
159 Builder.restoreIP(IP);
169 Kernel->getFnAttribute(
"target-features").getValueAsString();
170 if (Features.
count(
"+wavefrontsize64"))
185 bool HasSimdModifier) {
187 switch (ClauseKind) {
188 case OMP_SCHEDULE_Default:
189 case OMP_SCHEDULE_Static:
190 return HasChunks ? OMPScheduleType::BaseStaticChunked
191 : OMPScheduleType::BaseStatic;
192 case OMP_SCHEDULE_Dynamic:
193 return OMPScheduleType::BaseDynamicChunked;
194 case OMP_SCHEDULE_Guided:
195 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
196 : OMPScheduleType::BaseGuidedChunked;
197 case OMP_SCHEDULE_Auto:
199 case OMP_SCHEDULE_Runtime:
200 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
201 : OMPScheduleType::BaseRuntime;
209 bool HasOrderedClause) {
210 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
211 OMPScheduleType::None &&
212 "Must not have ordering nor monotonicity flags already set");
215 ? OMPScheduleType::ModifierOrdered
216 : OMPScheduleType::ModifierUnordered;
217 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
220 if (OrderingScheduleType ==
221 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
222 return OMPScheduleType::OrderedGuidedChunked;
223 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
224 OMPScheduleType::ModifierOrdered))
225 return OMPScheduleType::OrderedRuntime;
227 return OrderingScheduleType;
233 bool HasSimdModifier,
bool HasMonotonic,
234 bool HasNonmonotonic,
bool HasOrderedClause) {
235 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
236 OMPScheduleType::None &&
237 "Must not have monotonicity flags already set");
238 assert((!HasMonotonic || !HasNonmonotonic) &&
239 "Monotonic and Nonmonotonic are contradicting each other");
242 return ScheduleType | OMPScheduleType::ModifierMonotonic;
243 }
else if (HasNonmonotonic) {
244 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
254 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
255 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
261 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
269 bool HasSimdModifier,
bool HasMonotonicModifier,
270 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
276 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
277 HasNonmonotonicModifier, HasOrderedClause);
292 assert(!Br->isConditional() &&
293 "BB's terminator must be an unconditional branch (or degenerate)");
296 Br->setSuccessor(0,
Target);
301 NewBr->setDebugLoc(
DL);
307 "Target BB must not have PHI nodes");
327 NewBr->setDebugLoc(
DL);
335 spliceBB(Builder.saveIP(), New, CreateBranch,
DebugLoc);
339 Builder.SetInsertPoint(Old);
343 Builder.SetCurrentDebugLocation(
DebugLoc);
352 spliceBB(IP, New, CreateBranch,
DL);
353 New->replaceSuccessorsPhiUsesWith(Old, New);
362 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
364 Builder.SetInsertPoint(Builder.GetInsertBlock());
367 Builder.SetCurrentDebugLocation(
DebugLoc);
376 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
378 Builder.SetInsertPoint(Builder.GetInsertBlock());
381 Builder.SetCurrentDebugLocation(
DebugLoc);
388 return splitBB(Builder, CreateBranch, Old->
getName() + Suffix);
395 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
397 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
398 const Twine &Name =
"",
bool AsPtr =
true) {
399 Builder.restoreIP(OuterAllocaIP);
402 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr, Name +
".addr");
406 FakeVal = FakeValAddr;
409 Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name +
".val");
414 Builder.restoreIP(InnerAllocaIP);
418 Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name +
".use");
434enum OpenMPOffloadingRequiresDirFlags {
436 OMP_REQ_UNDEFINED = 0x000,
438 OMP_REQ_NONE = 0x001,
440 OMP_REQ_REVERSE_OFFLOAD = 0x002,
442 OMP_REQ_UNIFIED_ADDRESS = 0x004,
444 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
446 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
452OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
453 : RequiresFlags(OMP_REQ_UNDEFINED) {}
455OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
456 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
457 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
458 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
459 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
460 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
461 RequiresFlags(OMP_REQ_UNDEFINED) {
462 if (HasRequiresReverseOffload)
463 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
464 if (HasRequiresUnifiedAddress)
465 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
466 if (HasRequiresUnifiedSharedMemory)
467 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
468 if (HasRequiresDynamicAllocators)
469 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
472bool OpenMPIRBuilderConfig::hasRequiresReverseOffload()
const {
473 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
476bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress()
const {
477 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
480bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory()
const {
481 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
484bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators()
const {
485 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
488int64_t OpenMPIRBuilderConfig::getRequiresFlags()
const {
489 return hasRequiresFlags() ? RequiresFlags
490 :
static_cast<int64_t
>(OMP_REQ_NONE);
493void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(
bool Value) {
495 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
497 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
500void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(
bool Value) {
502 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
504 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
507void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(
bool Value) {
509 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
511 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
514void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(
bool Value) {
516 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
518 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
525void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
529 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
531 constexpr const size_t MaxDim = 3;
533 Value *Flags = Builder.getInt64(KernelArgs.HasNoWait);
535 assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty());
538 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0});
539 Value *NumThreads3D =
540 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads[0], {0});
542 seq<unsigned>(1, std::min(KernelArgs.NumTeams.size(), MaxDim)))
544 Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[
I], {I});
546 seq<unsigned>(1, std::min(KernelArgs.NumThreads.size(), MaxDim)))
548 Builder.CreateInsertValue(NumThreads3D, KernelArgs.NumThreads[
I], {I});
550 ArgsVector = {Version,
552 KernelArgs.RTArgs.BasePointersArray,
553 KernelArgs.RTArgs.PointersArray,
554 KernelArgs.RTArgs.SizesArray,
555 KernelArgs.RTArgs.MapTypesArray,
556 KernelArgs.RTArgs.MapNamesArray,
557 KernelArgs.RTArgs.MappersArray,
558 KernelArgs.NumIterations,
562 KernelArgs.DynCGGroupMem};
570 auto FnAttrs =
Attrs.getFnAttrs();
571 auto RetAttrs =
Attrs.getRetAttrs();
573 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
578 bool Param =
true) ->
void {
579 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
580 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
581 if (HasSignExt || HasZeroExt) {
582 assert(AS.getNumAttributes() == 1 &&
583 "Currently not handling extension attr combined with others.");
585 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
588 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
595#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
596#include "llvm/Frontend/OpenMP/OMPKinds.def"
600#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
602 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
603 addAttrSet(RetAttrs, RetAttrSet, false); \
604 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
605 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
606 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
608#include "llvm/Frontend/OpenMP/OMPKinds.def"
622#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
624 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
626 Fn = M.getFunction(Str); \
628#include "llvm/Frontend/OpenMP/OMPKinds.def"
634#define OMP_RTL(Enum, Str, ...) \
636 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
638#include "llvm/Frontend/OpenMP/OMPKinds.def"
642 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
652 LLVMContext::MD_callback,
654 2, {-1, -1},
true)}));
660 addAttributes(FnID, *Fn);
667 assert(Fn &&
"Failed to create OpenMP runtime function");
675 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
679void OpenMPIRBuilder::initialize() { initializeTypes(M); }
690 for (
auto Inst =
Block->getReverseIterator()->begin();
691 Inst !=
Block->getReverseIterator()->end();) {
704void OpenMPIRBuilder::finalize(
Function *Fn) {
708 for (OutlineInfo &OI : OutlineInfos) {
711 if (Fn && OI.getFunction() != Fn) {
716 ParallelRegionBlockSet.
clear();
718 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
728 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
737 ".omp_par", ArgsInZeroAddressSpace);
741 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
742 assert(Extractor.isEligible() &&
743 "Expected OpenMP outlining to be possible!");
745 for (
auto *V : OI.ExcludeArgsFromAggregate)
746 Extractor.excludeArgFromAggregate(V);
748 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
752 if (TargetCpuAttr.isStringAttribute())
755 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
756 if (TargetFeaturesAttr.isStringAttribute())
757 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
760 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
762 "OpenMP outlined functions should not return a value!");
767 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
774 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
781 "Expected instructions to add in the outlined region entry");
783 End = ArtificialEntry.
rend();
788 if (
I.isTerminator()) {
790 if (OI.EntryBB->getTerminator())
791 OI.EntryBB->getTerminator()->adoptDbgRecords(
792 &ArtificialEntry,
I.getIterator(),
false);
796 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
799 OI.EntryBB->moveBefore(&ArtificialEntry);
806 if (OI.PostOutlineCB)
807 OI.PostOutlineCB(*OutlinedFn);
811 OutlineInfos = std::move(DeferredOutlines);
832 for (
Function *
F : ConstantAllocaRaiseCandidates)
835 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
836 [](EmitMetadataErrorKind Kind,
837 const TargetRegionEntryInfo &EntryInfo) ->
void {
838 errs() <<
"Error of kind: " << Kind
839 <<
" when emitting offload entries and metadata during "
840 "OMPIRBuilder finalization \n";
843 if (!OffloadInfoManager.empty())
844 createOffloadEntriesAndInfoMetadata(ErrorReportFn);
846 if (Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
847 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
848 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
849 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
855bool OpenMPIRBuilder::isFinalized() {
return IsFinalized; }
857OpenMPIRBuilder::~OpenMPIRBuilder() {
858 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
866 ConstantInt::get(I32Ty,
Value), Name);
878 UsedArray.
resize(List.size());
879 for (
unsigned I = 0,
E = List.size();
I !=
E; ++
I)
883 if (UsedArray.
empty())
890 GV->setSection(
"llvm.metadata");
894OpenMPIRBuilder::emitKernelExecutionMode(
StringRef KernelName,
896 auto *Int8Ty = Builder.getInt8Ty();
899 ConstantInt::get(Int8Ty,
Mode),
Twine(KernelName,
"_exec_mode"));
907 unsigned Reserve2Flags) {
909 LocFlags |= OMP_IDENT_FLAG_KMPC;
912 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
917 ConstantInt::get(
Int32, Reserve2Flags),
918 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
920 size_t SrcLocStrArgIdx = 4;
921 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
925 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
932 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
933 if (
GV.getInitializer() == Initializer)
938 M, OpenMPIRBuilder::Ident,
941 M.getDataLayout().getDefaultGlobalsAddressSpace());
953 SrcLocStrSize = LocStr.
size();
954 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
962 if (
GV.isConstant() &&
GV.hasInitializer() &&
963 GV.getInitializer() == Initializer)
966 SrcLocStr = Builder.CreateGlobalString(
967 LocStr,
"", M.getDataLayout().getDefaultGlobalsAddressSpace(),
975 unsigned Line,
unsigned Column,
981 Buffer.
append(FunctionName);
983 Buffer.
append(std::to_string(Line));
985 Buffer.
append(std::to_string(Column));
988 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
992OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(
uint32_t &SrcLocStrSize) {
993 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
994 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
1002 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1004 if (
DIFile *DIF = DIL->getFile())
1005 if (std::optional<StringRef> Source = DIF->getSource())
1010 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
1011 DIL->getColumn(), SrcLocStrSize);
1014Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(
const LocationDescription &
Loc,
1016 return getOrCreateSrcLocStr(
Loc.DL, SrcLocStrSize,
1017 Loc.IP.getBlock()->getParent());
1020Value *OpenMPIRBuilder::getOrCreateThreadID(
Value *Ident) {
1021 return Builder.CreateCall(
1022 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
1023 "omp_global_thread_num");
1026OpenMPIRBuilder::InsertPointOrErrorTy
1027OpenMPIRBuilder::createBarrier(
const LocationDescription &
Loc,
Directive Kind,
1028 bool ForceSimpleCall,
bool CheckCancelFlag) {
1029 if (!updateToLocation(
Loc))
1038 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1041 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1044 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1047 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1050 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1055 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1057 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
1058 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
1063 bool UseCancelBarrier =
1064 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
1067 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
1068 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
1069 : OMPRTL___kmpc_barrier),
1072 if (UseCancelBarrier && CheckCancelFlag)
1073 if (
Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
1076 return Builder.saveIP();
1079OpenMPIRBuilder::InsertPointOrErrorTy
1080OpenMPIRBuilder::createCancel(
const LocationDescription &
Loc,
1082 omp::Directive CanceledDirective) {
1083 if (!updateToLocation(
Loc))
1087 auto *UI = Builder.CreateUnreachable();
1092 Builder.SetInsertPoint(ThenTI);
1094 Value *CancelKind =
nullptr;
1095 switch (CanceledDirective) {
1096#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1097 case DirectiveEnum: \
1098 CancelKind = Builder.getInt32(Value); \
1100#include "llvm/Frontend/OpenMP/OMPKinds.def"
1106 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1107 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1108 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1110 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1111 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1112 if (CanceledDirective == OMPD_parallel) {
1114 Builder.restoreIP(IP);
1115 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1116 omp::Directive::OMPD_unknown,
1125 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1129 Builder.SetInsertPoint(UI->getParent());
1130 UI->eraseFromParent();
1132 return Builder.saveIP();
1135OpenMPIRBuilder::InsertPointOrErrorTy
1136OpenMPIRBuilder::createCancellationPoint(
const LocationDescription &
Loc,
1137 omp::Directive CanceledDirective) {
1138 if (!updateToLocation(
Loc))
1142 auto *UI = Builder.CreateUnreachable();
1143 Builder.SetInsertPoint(UI);
1145 Value *CancelKind =
nullptr;
1146 switch (CanceledDirective) {
1147#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1148 case DirectiveEnum: \
1149 CancelKind = Builder.getInt32(Value); \
1151#include "llvm/Frontend/OpenMP/OMPKinds.def"
1157 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1158 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1159 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1161 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
1162 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1163 if (CanceledDirective == OMPD_parallel) {
1165 Builder.restoreIP(IP);
1166 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1167 omp::Directive::OMPD_unknown,
1176 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1180 Builder.SetInsertPoint(UI->getParent());
1181 UI->eraseFromParent();
1183 return Builder.saveIP();
1186OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1187 const LocationDescription &
Loc, InsertPointTy AllocaIP,
Value *&Return,
1190 if (!updateToLocation(
Loc))
1193 Builder.restoreIP(AllocaIP);
1194 auto *KernelArgsPtr =
1195 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1196 updateToLocation(
Loc);
1200 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1201 Builder.CreateAlignedStore(
1203 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1207 NumThreads, HostPtr, KernelArgsPtr};
1209 Return = Builder.CreateCall(
1210 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1213 return Builder.saveIP();
1216OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
1217 const LocationDescription &
Loc,
Value *OutlinedFnID,
1218 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1219 Value *DeviceID,
Value *RTLoc, InsertPointTy AllocaIP) {
1221 if (!updateToLocation(
Loc))
1234 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1238 Value *Return =
nullptr;
1242 getKernelArgsVector(Args, Builder, ArgsVector);
1257 Builder.restoreIP(emitTargetKernel(
1258 Builder, AllocaIP, Return, RTLoc, DeviceID,
Args.NumTeams.front(),
1259 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1266 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1268 auto CurFn = Builder.GetInsertBlock()->getParent();
1269 emitBlock(OffloadFailedBlock, CurFn);
1270 InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
1272 return AfterIP.takeError();
1273 Builder.restoreIP(*AfterIP);
1274 emitBranch(OffloadContBlock);
1275 emitBlock(OffloadContBlock, CurFn,
true);
1276 return Builder.saveIP();
1279Error OpenMPIRBuilder::emitCancelationCheckImpl(
1280 Value *CancelFlag, omp::Directive CanceledDirective,
1281 FinalizeCallbackTy ExitCB) {
1282 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1283 "Unexpected cancellation!");
1288 if (Builder.GetInsertPoint() == BB->
end()) {
1294 NonCancellationBlock =
SplitBlock(BB, &*Builder.GetInsertPoint());
1296 Builder.SetInsertPoint(BB);
1302 Value *
Cmp = Builder.CreateIsNull(CancelFlag);
1303 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1308 Builder.SetInsertPoint(CancellationBlock);
1310 if (
Error Err = ExitCB(Builder.saveIP()))
1312 auto &FI = FinalizationStack.back();
1313 if (
Error Err = FI.FiniCB(Builder.saveIP()))
1317 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1336 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1339 "Expected at least tid and bounded tid as arguments");
1340 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1343 assert(CI &&
"Expected call instruction to outlined function");
1344 CI->
getParent()->setName(
"omp_parallel");
1346 Builder.SetInsertPoint(CI);
1347 Type *PtrTy = OMPIRBuilder->VoidPtr;
1351 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1355 Value *Args = ArgsAlloca;
1359 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1360 Builder.restoreIP(CurrentIP);
1363 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1365 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1367 Builder.CreateStore(V, StoreAddress);
1371 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1372 : Builder.getInt32(1);
1375 Value *Parallel51CallArgs[] = {
1379 NumThreads ? NumThreads : Builder.getInt32(-1),
1380 Builder.getInt32(-1),
1384 Builder.getInt64(NumCapturedVars)};
1387 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
1389 Builder.CreateCall(RTLFn, Parallel51CallArgs);
1392 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1395 Builder.SetInsertPoint(PrivTID);
1397 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1404 I->eraseFromParent();
1421 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1424 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1427 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1435 F->addMetadata(LLVMContext::MD_callback,
1444 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1447 "Expected at least tid and bounded tid as arguments");
1448 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1451 CI->
getParent()->setName(
"omp_parallel");
1452 Builder.SetInsertPoint(CI);
1455 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1459 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1461 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1468 auto PtrTy = OMPIRBuilder->VoidPtr;
1469 if (IfCondition && NumCapturedVars == 0) {
1474 Builder.CreateCall(RTLFn, RealArgs);
1477 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1480 Builder.SetInsertPoint(PrivTID);
1482 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1489 I->eraseFromParent();
1493OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
1494 const LocationDescription &
Loc, InsertPointTy OuterAllocaIP,
1495 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1496 FinalizeCallbackTy FiniCB,
Value *IfCondition,
Value *NumThreads,
1497 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1500 if (!updateToLocation(
Loc))
1504 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1505 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1506 Value *ThreadID = getOrCreateThreadID(Ident);
1512 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1516 if (NumThreads && !Config.isTargetDevice()) {
1519 Builder.CreateIntCast(NumThreads,
Int32,
false)};
1521 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1524 if (ProcBind != OMP_PROC_BIND_default) {
1528 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1530 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1533 BasicBlock *InsertBB = Builder.GetInsertBlock();
1538 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1546 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->
begin());
1547 Builder.restoreIP(NewOuter);
1548 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr");
1550 Builder.CreateAlloca(
Int32,
nullptr,
"zero.addr");
1553 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1556 TIDAddrAlloca, PointerType ::get(M.getContext(), 0),
"tid.addr.ascast");
1560 PointerType ::get(M.getContext(), 0),
1561 "zero.addr.ascast");
1582 auto FiniCBWrapper = [&](InsertPointTy IP) {
1587 Builder.restoreIP(IP);
1589 IP = InsertPointTy(
I->getParent(),
I->getIterator());
1593 "Unexpected insertion point for finalization call!");
1597 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1602 InsertPointTy InnerAllocaIP = Builder.saveIP();
1605 Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr.local");
1609 ToBeDeleted.
push_back(Builder.CreateLoad(
Int32, TIDAddr,
"tid.addr.use"));
1611 Builder.CreateLoad(
Int32, ZeroAddr,
"zero.addr.use");
1629 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1632 assert(BodyGenCB &&
"Expected body generation callback!");
1633 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->
begin());
1634 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1637 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1640 if (Config.isTargetDevice()) {
1642 OI.PostOutlineCB = [=, ToBeDeletedVec =
1643 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1645 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1646 ThreadID, ToBeDeletedVec);
1650 OI.PostOutlineCB = [=, ToBeDeletedVec =
1651 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1653 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1657 OI.OuterAllocaBB = OuterAllocaBlock;
1658 OI.EntryBB = PRegEntryBB;
1659 OI.ExitBB = PRegExitBB;
1663 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1674 ".omp_par", ArgsInZeroAddressSpace);
1679 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1681 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1686 return GV->getValueType() == OpenMPIRBuilder::Ident;
1691 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1694 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1697 if (&V == TIDAddr || &V == ZeroAddr) {
1698 OI.ExcludeArgsFromAggregate.push_back(&V);
1703 for (
Use &U : V.uses())
1705 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1715 if (!V.getType()->isPointerTy()) {
1719 Builder.restoreIP(OuterAllocaIP);
1721 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1725 Builder.SetInsertPoint(InsertBB,
1727 Builder.CreateStore(&V,
Ptr);
1730 Builder.restoreIP(InnerAllocaIP);
1731 Inner = Builder.CreateLoad(V.getType(),
Ptr);
1734 Value *ReplacementValue =
nullptr;
1737 ReplacementValue = PrivTID;
1739 InsertPointOrErrorTy AfterIP =
1740 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
1742 return AfterIP.takeError();
1743 Builder.restoreIP(*AfterIP);
1745 InnerAllocaIP.getBlock(),
1746 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1748 assert(ReplacementValue &&
1749 "Expected copy/create callback to set replacement value!");
1750 if (ReplacementValue == &V)
1755 UPtr->set(ReplacementValue);
1780 for (
Value *Output : Outputs)
1783 assert(Outputs.empty() &&
1784 "OpenMP outlining should not produce live-out values!");
1786 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1788 for (
auto *BB : Blocks)
1795 auto FiniInfo = FinalizationStack.pop_back_val();
1797 assert(FiniInfo.DK == OMPD_parallel &&
1798 "Unexpected finalization stack state!");
1802 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->
getIterator());
1803 if (
Error Err = FiniCB(PreFiniIP))
1807 addOutlineInfo(std::move(OI));
1809 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1810 UI->eraseFromParent();
1815void OpenMPIRBuilder::emitFlush(
const LocationDescription &
Loc) {
1818 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1819 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1821 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
1824void OpenMPIRBuilder::createFlush(
const LocationDescription &
Loc) {
1825 if (!updateToLocation(
Loc))
1830void OpenMPIRBuilder::emitTaskwaitImpl(
const LocationDescription &
Loc) {
1834 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1835 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1836 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1839 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
1843void OpenMPIRBuilder::createTaskwait(
const LocationDescription &
Loc) {
1844 if (!updateToLocation(
Loc))
1846 emitTaskwaitImpl(
Loc);
1849void OpenMPIRBuilder::emitTaskyieldImpl(
const LocationDescription &
Loc) {
1852 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1853 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1855 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1857 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
1861void OpenMPIRBuilder::createTaskyield(
const LocationDescription &
Loc) {
1862 if (!updateToLocation(
Loc))
1864 emitTaskyieldImpl(
Loc);
1873 OpenMPIRBuilder &OMPBuilder,
1876 if (Dependencies.
empty())
1896 Type *DependInfo = OMPBuilder.DependInfo;
1897 Module &M = OMPBuilder.M;
1899 Value *DepArray =
nullptr;
1900 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
1901 Builder.SetInsertPoint(
1902 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1905 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1907 Builder.restoreIP(OldIP);
1909 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1911 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
1913 Value *Addr = Builder.CreateStructGEP(
1915 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1916 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
1917 Builder.CreateStore(DepValPtr, Addr);
1920 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1921 Builder.CreateStore(
1922 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1925 Value *Flags = Builder.CreateStructGEP(
1927 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1928 Builder.CreateStore(
1929 ConstantInt::get(Builder.getInt8Ty(),
1930 static_cast<unsigned int>(Dep.DepKind)),
1936OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
1937 const LocationDescription &
Loc, InsertPointTy AllocaIP,
1938 BodyGenCallbackTy BodyGenCB,
bool Tied,
Value *Final,
Value *IfCondition,
1942 if (!updateToLocation(
Loc))
1943 return InsertPointTy();
1946 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1947 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1964 BasicBlock *TaskExitBB = splitBB(Builder,
true,
"task.exit");
1965 BasicBlock *TaskBodyBB = splitBB(Builder,
true,
"task.body");
1967 splitBB(Builder,
true,
"task.alloca");
1969 InsertPointTy TaskAllocaIP =
1970 InsertPointTy(TaskAllocaBB, TaskAllocaBB->
begin());
1971 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->
begin());
1972 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
1976 OI.EntryBB = TaskAllocaBB;
1977 OI.OuterAllocaBB = AllocaIP.getBlock();
1978 OI.ExitBB = TaskExitBB;
1983 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1985 OI.PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1986 Mergeable, Priority, EventHandle, TaskAllocaBB,
1987 ToBeDeleted](
Function &OutlinedFn)
mutable {
1990 "there must be a single user for the outlined function");
1995 bool HasShareds = StaleCI->
arg_size() > 1;
1996 Builder.SetInsertPoint(StaleCI);
2001 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2005 Value *ThreadID = getOrCreateThreadID(Ident);
2017 Value *Flags = Builder.getInt32(Tied);
2020 Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
2021 Flags = Builder.CreateOr(FinalFlag, Flags);
2025 Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
2027 Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
2033 Value *TaskSize = Builder.getInt64(
2034 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2039 Value *SharedsSize = Builder.getInt64(0);
2043 assert(ArgStructAlloca &&
2044 "Unable to find the alloca instruction corresponding to arguments "
2045 "for extracted function");
2048 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2049 "arguments for extracted function");
2051 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2056 CallInst *TaskData = Builder.CreateCall(
2057 TaskAllocFn, {Ident, ThreadID, Flags,
2058 TaskSize, SharedsSize,
2065 Function *TaskDetachFn = getOrCreateRuntimeFunctionPtr(
2066 OMPRTL___kmpc_task_allow_completion_event);
2068 Builder.CreateCall(TaskDetachFn, {Ident, ThreadID, TaskData});
2070 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2071 Builder.getPtrTy(0));
2072 EventVal = Builder.CreatePtrToInt(EventVal, Builder.getInt64Ty());
2073 Builder.CreateStore(EventVal, EventHandleAddr);
2079 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2080 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2098 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {
Zero,
Zero});
2101 VoidPtr, VoidPtr, Builder.getInt32Ty(), VoidPtr, VoidPtr);
2102 Value *PriorityData = Builder.CreateInBoundsGEP(
2103 TaskStructType, TaskGEP, {
Zero, ConstantInt::get(
Int32Ty, 4)});
2106 Value *CmplrData = Builder.CreateInBoundsGEP(CmplrStructType,
2108 Builder.CreateStore(Priority, CmplrData);
2133 splitBB(Builder,
true,
"if.end");
2135 Builder.GetInsertPoint()->
getParent()->getTerminator();
2136 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2137 Builder.SetInsertPoint(IfTerminator);
2140 Builder.SetInsertPoint(ElseTI);
2142 if (Dependencies.size()) {
2144 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
2147 {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray,
2148 ConstantInt::get(Builder.getInt32Ty(), 0),
2152 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
2154 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
2155 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
2158 CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData});
2160 CI = Builder.CreateCall(&OutlinedFn, {ThreadID});
2162 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
2163 Builder.SetInsertPoint(ThenTI);
2166 if (Dependencies.size()) {
2168 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
2171 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
2172 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
2177 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
2178 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
2183 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2185 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2187 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2191 I->eraseFromParent();
2194 addOutlineInfo(std::move(OI));
2195 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2197 return Builder.saveIP();
2200OpenMPIRBuilder::InsertPointOrErrorTy
2201OpenMPIRBuilder::createTaskgroup(
const LocationDescription &
Loc,
2202 InsertPointTy AllocaIP,
2203 BodyGenCallbackTy BodyGenCB) {
2204 if (!updateToLocation(
Loc))
2205 return InsertPointTy();
2208 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2209 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2210 Value *ThreadID = getOrCreateThreadID(Ident);
2214 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2215 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2217 BasicBlock *TaskgroupExitBB = splitBB(Builder,
true,
"taskgroup.exit");
2218 if (
Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
2221 Builder.SetInsertPoint(TaskgroupExitBB);
2224 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2225 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2227 return Builder.saveIP();
2230OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2231 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2233 FinalizeCallbackTy FiniCB,
bool IsCancellable,
bool IsNowait) {
2236 if (!updateToLocation(
Loc))
2242 auto FiniCBWrapper = [&](InsertPointTy IP) {
2251 CancellationBranches.
push_back(DummyBranch);
2255 FinalizationStack.
push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
2273 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP,
Value *IndVar) ->
Error {
2274 Builder.restoreIP(CodeGenIP);
2276 splitBBWithSuffix(Builder,
false,
".sections.after");
2280 unsigned CaseNumber = 0;
2281 for (
auto SectionCB : SectionCBs) {
2283 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2284 SwitchStmt->
addCase(Builder.getInt32(CaseNumber), CaseBB);
2285 Builder.SetInsertPoint(CaseBB);
2287 if (
Error Err = SectionCB(InsertPointTy(), {CaseEndBr->
getParent(),
2299 Value *LB = ConstantInt::get(I32Ty, 0);
2300 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2301 Value *
ST = ConstantInt::get(I32Ty, 1);
2303 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2307 InsertPointOrErrorTy WsloopIP =
2308 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2309 WorksharingLoopType::ForStaticLoop, !IsNowait);
2311 return WsloopIP.takeError();
2312 InsertPointTy AfterIP = *WsloopIP;
2315 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2318 auto FiniInfo = FinalizationStack.pop_back_val();
2319 assert(FiniInfo.DK == OMPD_sections &&
2320 "Unexpected finalization stack state!");
2321 if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
2322 Builder.restoreIP(AfterIP);
2324 splitBBWithSuffix(Builder,
true,
"sections.fini");
2325 if (
Error Err = CB(Builder.saveIP()))
2327 AfterIP = {FiniBB, FiniBB->
begin()};
2331 for (
BranchInst *DummyBranch : CancellationBranches) {
2339OpenMPIRBuilder::InsertPointOrErrorTy
2340OpenMPIRBuilder::createSection(
const LocationDescription &
Loc,
2341 BodyGenCallbackTy BodyGenCB,
2342 FinalizeCallbackTy FiniCB) {
2343 if (!updateToLocation(
Loc))
2346 auto FiniCBWrapper = [&](InsertPointTy IP) {
2357 Builder.restoreIP(IP);
2358 auto *CaseBB =
Loc.IP.getBlock();
2362 IP = InsertPointTy(
I->getParent(),
I->getIterator());
2366 Directive OMPD = Directive::OMPD_sections;
2369 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2377 return OpenMPIRBuilder::InsertPointTy(
I->getParent(),
IT);
2380Value *OpenMPIRBuilder::getGPUThreadID() {
2381 return Builder.CreateCall(
2382 getOrCreateRuntimeFunction(M,
2383 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2387Value *OpenMPIRBuilder::getGPUWarpSize() {
2388 return Builder.CreateCall(
2389 getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), {});
2392Value *OpenMPIRBuilder::getNVPTXWarpID() {
2393 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2394 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2397Value *OpenMPIRBuilder::getNVPTXLaneID() {
2398 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2399 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2400 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2401 return Builder.CreateAnd(getGPUThreadID(), Builder.getInt32(LaneIDMask),
2405Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *From,
2408 uint64_t FromSize = M.getDataLayout().getTypeStoreSize(FromType);
2409 uint64_t ToSize = M.getDataLayout().getTypeStoreSize(ToType);
2410 assert(FromSize > 0 &&
"From size must be greater than zero");
2411 assert(ToSize > 0 &&
"To size must be greater than zero");
2412 if (FromType == ToType)
2414 if (FromSize == ToSize)
2415 return Builder.CreateBitCast(From, ToType);
2417 return Builder.CreateIntCast(From, ToType,
true);
2418 InsertPointTy SaveIP = Builder.saveIP();
2419 Builder.restoreIP(AllocaIP);
2420 Value *CastItem = Builder.CreateAlloca(ToType);
2421 Builder.restoreIP(SaveIP);
2423 Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
2424 CastItem, Builder.getPtrTy(0));
2425 Builder.CreateStore(From, ValCastItem);
2426 return Builder.CreateLoad(ToType, CastItem);
2429Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2433 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElementType);
2434 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2438 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2440 Builder.CreateIntCast(getGPUWarpSize(), Builder.getInt16Ty(),
true);
2441 Function *ShuffleFunc = getOrCreateRuntimeFunctionPtr(
2442 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2443 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2444 Value *WarpSizeCast =
2445 Builder.CreateIntCast(WarpSize, Builder.getInt16Ty(),
true);
2446 Value *ShuffleCall =
2447 Builder.CreateCall(ShuffleFunc, {ElemCast,
Offset, WarpSizeCast});
2448 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2451void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2454 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElemType);
2465 Type *IndexTy = Builder.getIndexTy(
2466 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2467 Value *ElemPtr = DstAddr;
2469 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2473 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2474 Ptr, Builder.getPtrTy(0),
Ptr->getName() +
".ascast");
2476 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2477 ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2478 ElemPtr, Builder.getPtrTy(0), ElemPtr->
getName() +
".ascast");
2481 if ((
Size / IntSize) > 1) {
2482 Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
2483 SrcAddrGEP, Builder.getPtrTy());
2488 BasicBlock *CurrentBB = Builder.GetInsertBlock();
2489 emitBlock(PreCondBB, CurFunc);
2491 Builder.CreatePHI(
Ptr->getType(), 2);
2494 Builder.CreatePHI(ElemPtr->
getType(), 2);
2498 Value *PtrDiff = Builder.CreatePtrDiff(
2499 Builder.getInt8Ty(), PtrEnd,
2500 Builder.CreatePointerBitCastOrAddrSpaceCast(
Ptr, Builder.getPtrTy()));
2501 Builder.CreateCondBr(
2502 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2504 emitBlock(ThenBB, CurFunc);
2505 Value *Res = createRuntimeShuffleFunction(
2507 Builder.CreateAlignedLoad(
2508 IntType,
Ptr, M.getDataLayout().getPrefTypeAlign(ElemType)),
2510 Builder.CreateAlignedStore(Res, ElemPtr,
2511 M.getDataLayout().getPrefTypeAlign(ElemType));
2513 Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2514 Value *LocalElemPtr =
2515 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2518 emitBranch(PreCondBB);
2519 emitBlock(ExitBB, CurFunc);
2521 Value *Res = createRuntimeShuffleFunction(
2522 AllocaIP, Builder.CreateLoad(IntType,
Ptr), IntType,
Offset);
2525 Res = Builder.CreateTrunc(Res, ElemType);
2526 Builder.CreateStore(Res, ElemPtr);
2527 Ptr = Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2529 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2535void OpenMPIRBuilder::emitReductionListCopy(
2536 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2538 CopyOptionsTy CopyOptions) {
2539 Type *IndexTy = Builder.getIndexTy(
2540 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2541 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2545 for (
auto En :
enumerate(ReductionInfos)) {
2546 const ReductionInfo &RI = En.value();
2547 Value *SrcElementAddr =
nullptr;
2548 Value *DestElementAddr =
nullptr;
2549 Value *DestElementPtrAddr =
nullptr;
2551 bool ShuffleInElement =
false;
2554 bool UpdateDestListPtr =
false;
2557 Value *SrcElementPtrAddr = Builder.CreateInBoundsGEP(
2558 ReductionArrayTy, SrcBase,
2559 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2560 SrcElementAddr = Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrAddr);
2564 DestElementPtrAddr = Builder.CreateInBoundsGEP(
2565 ReductionArrayTy, DestBase,
2566 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2568 case CopyAction::RemoteLaneToThread: {
2569 InsertPointTy CurIP = Builder.saveIP();
2570 Builder.restoreIP(AllocaIP);
2571 AllocaInst *DestAlloca = Builder.CreateAlloca(RI.ElementType,
nullptr,
2572 ".omp.reduction.element");
2574 M.getDataLayout().getPrefTypeAlign(RI.ElementType));
2575 DestElementAddr = DestAlloca;
2577 Builder.CreateAddrSpaceCast(DestElementAddr, Builder.getPtrTy(),
2578 DestElementAddr->
getName() +
".ascast");
2579 Builder.restoreIP(CurIP);
2580 ShuffleInElement =
true;
2581 UpdateDestListPtr =
true;
2584 case CopyAction::ThreadCopy: {
2586 Builder.CreateLoad(Builder.getPtrTy(), DestElementPtrAddr);
2593 if (ShuffleInElement) {
2594 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2595 RemoteLaneOffset, ReductionArrayTy);
2597 switch (RI.EvaluationKind) {
2598 case EvalKind::Scalar: {
2599 Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
2601 Builder.CreateStore(Elem, DestElementAddr);
2604 case EvalKind::Complex: {
2605 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
2606 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2607 Value *SrcReal = Builder.CreateLoad(
2608 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2609 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
2610 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2611 Value *SrcImg = Builder.CreateLoad(
2612 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2614 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
2615 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2616 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
2617 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2618 Builder.CreateStore(SrcReal, DestRealPtr);
2619 Builder.CreateStore(SrcImg, DestImgPtr);
2622 case EvalKind::Aggregate: {
2623 Value *SizeVal = Builder.getInt64(
2624 M.getDataLayout().getTypeStoreSize(RI.ElementType));
2625 Builder.CreateMemCpy(
2626 DestElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2627 SrcElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2639 if (UpdateDestListPtr) {
2640 Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2641 DestElementAddr, Builder.getPtrTy(),
2642 DestElementAddr->
getName() +
".ascast");
2643 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
2650 AttributeList FuncAttrs) {
2651 InsertPointTy SavedIP = Builder.saveIP();
2654 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
2658 "_omp_reduction_inter_warp_copy_func", &M);
2663 Builder.SetInsertPoint(EntryBB);
2681 "__openmp_nvptx_data_transfer_temporary_storage";
2682 GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
2683 unsigned WarpSize = Config.getGridValue().GV_Warp_Size;
2685 if (!TransferMedium) {
2694 Value *GPUThreadID = getGPUThreadID();
2696 Value *LaneID = getNVPTXLaneID();
2698 Value *WarpID = getNVPTXWarpID();
2700 InsertPointTy AllocaIP =
2701 InsertPointTy(Builder.GetInsertBlock(),
2702 Builder.GetInsertBlock()->getFirstInsertionPt());
2705 Builder.restoreIP(AllocaIP);
2706 AllocaInst *ReduceListAlloca = Builder.CreateAlloca(
2707 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2709 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
2710 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2711 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2712 Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2713 NumWarpsAlloca, Builder.getPtrTy(0),
2714 NumWarpsAlloca->
getName() +
".ascast");
2715 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2716 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
2718 InsertPointTy CodeGenIP =
2720 Builder.restoreIP(CodeGenIP);
2723 Builder.CreateLoad(Builder.getPtrTy(), ReduceListAddrCast);
2725 for (
auto En :
enumerate(ReductionInfos)) {
2730 const ReductionInfo &RI = En.value();
2731 unsigned RealTySize = M.getDataLayout().getTypeAllocSize(RI.ElementType);
2732 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2735 unsigned NumIters = RealTySize / TySize;
2738 Value *Cnt =
nullptr;
2739 Value *CntAddr =
nullptr;
2743 CodeGenIP = Builder.saveIP();
2744 Builder.restoreIP(AllocaIP);
2746 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
".cnt.addr");
2748 CntAddr = Builder.CreateAddrSpaceCast(CntAddr, Builder.getPtrTy(),
2749 CntAddr->
getName() +
".ascast");
2750 Builder.restoreIP(CodeGenIP);
2757 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
2758 Cnt = Builder.CreateLoad(Builder.getInt32Ty(), CntAddr,
2760 Value *
Cmp = Builder.CreateICmpULT(
2761 Cnt, ConstantInt::get(Builder.getInt32Ty(), NumIters));
2762 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
2763 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
2767 InsertPointOrErrorTy BarrierIP1 =
2768 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2769 omp::Directive::OMPD_unknown,
2773 return BarrierIP1.takeError();
2779 Value *IsWarpMaster = Builder.CreateIsNull(LaneID,
"warp_master");
2780 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
2781 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2784 auto *RedListArrayTy =
2786 Type *IndexTy = Builder.getIndexTy(
2787 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2789 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2790 {ConstantInt::get(IndexTy, 0),
2791 ConstantInt::get(IndexTy, En.index())});
2793 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
2795 ElemPtr = Builder.CreateGEP(Builder.getInt32Ty(), ElemPtr, Cnt);
2799 Value *MediumPtr = Builder.CreateInBoundsGEP(
2800 ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
2803 Value *Elem = Builder.CreateLoad(CType, ElemPtr);
2805 Builder.CreateStore(Elem, MediumPtr,
2807 Builder.CreateBr(MergeBB);
2810 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2811 Builder.CreateBr(MergeBB);
2814 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2815 InsertPointOrErrorTy BarrierIP2 =
2816 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2817 omp::Directive::OMPD_unknown,
2821 return BarrierIP2.takeError();
2828 Value *NumWarpsVal =
2829 Builder.CreateLoad(Builder.getInt32Ty(), NumWarpsAddrCast);
2831 Value *IsActiveThread =
2832 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
2833 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
2835 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
2839 Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
2840 ArrayTy, TransferMedium, {Builder.getInt64(0), GPUThreadID});
2842 Value *TargetElemPtrPtr =
2843 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2844 {ConstantInt::get(IndexTy, 0),
2845 ConstantInt::get(IndexTy, En.index())});
2846 Value *TargetElemPtrVal =
2847 Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtrPtr);
2848 Value *TargetElemPtr = TargetElemPtrVal;
2851 Builder.CreateGEP(Builder.getInt32Ty(), TargetElemPtr, Cnt);
2854 Value *SrcMediumValue =
2855 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
2856 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
2857 Builder.CreateBr(W0MergeBB);
2859 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
2860 Builder.CreateBr(W0MergeBB);
2862 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
2865 Cnt = Builder.CreateNSWAdd(
2866 Cnt, ConstantInt::get(Builder.getInt32Ty(), 1));
2867 Builder.CreateStore(Cnt, CntAddr,
false);
2869 auto *CurFn = Builder.GetInsertBlock()->
getParent();
2870 emitBranch(PrecondBB);
2871 emitBlock(ExitBB, CurFn);
2873 RealTySize %= TySize;
2877 Builder.CreateRetVoid();
2878 Builder.restoreIP(SavedIP);
2883Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2885 AttributeList FuncAttrs) {
2889 {Builder.getPtrTy(), Builder.getInt16Ty(),
2890 Builder.getInt16Ty(), Builder.getInt16Ty()},
2894 "_omp_reduction_shuffle_and_reduce_func", &M);
2904 Builder.SetInsertPoint(EntryBB);
2915 Type *ReduceListArgType = ReduceListArg->
getType();
2917 Type *LaneIDArgPtrType = Builder.getPtrTy(0);
2918 Value *ReduceListAlloca = Builder.CreateAlloca(
2919 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2920 Value *LaneIdAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2921 LaneIDArg->
getName() +
".addr");
2922 Value *RemoteLaneOffsetAlloca = Builder.CreateAlloca(
2923 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2924 Value *AlgoVerAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2925 AlgoVerArg->
getName() +
".addr");
2931 Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
2932 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2934 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2935 ReduceListAlloca, ReduceListArgType,
2936 ReduceListAlloca->
getName() +
".ascast");
2937 Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2938 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2939 Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2940 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2941 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2942 Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2943 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2944 Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2945 RemoteReductionListAlloca, Builder.getPtrTy(),
2946 RemoteReductionListAlloca->
getName() +
".ascast");
2948 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2949 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
2950 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
2951 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
2953 Value *ReduceList = Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
2954 Value *LaneId = Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
2955 Value *RemoteLaneOffset =
2956 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
2957 Value *AlgoVer = Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
2964 emitReductionListCopy(
2965 AllocaIP, CopyAction::RemoteLaneToThread, RedListArrayTy, ReductionInfos,
2966 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2989 Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
2990 Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
2991 Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
2992 Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
2993 Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
2994 Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
2995 Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
2996 Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
2997 Value *RemoteOffsetComp =
2998 Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
2999 Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3000 Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
3001 Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
3007 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3008 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3009 Value *LocalReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3010 ReduceList, Builder.getPtrTy());
3011 Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3012 RemoteListAddrCast, Builder.getPtrTy());
3013 Builder.CreateCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr})
3014 ->addFnAttr(Attribute::NoUnwind);
3015 Builder.CreateBr(MergeBB);
3017 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3018 Builder.CreateBr(MergeBB);
3020 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3024 Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3025 Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3026 Value *CondCopy = Builder.CreateAnd(Algo1, LaneIdGtOffset);
3031 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3033 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
3034 emitReductionListCopy(AllocaIP, CopyAction::ThreadCopy, RedListArrayTy,
3035 ReductionInfos, RemoteListAddrCast, ReduceList);
3036 Builder.CreateBr(CpyMergeBB);
3038 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
3039 Builder.CreateBr(CpyMergeBB);
3041 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
3043 Builder.CreateRetVoid();
3048Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3050 AttributeList FuncAttrs) {
3051 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3054 Builder.getVoidTy(),
3055 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3059 "_omp_reduction_list_to_global_copy_func", &M);
3066 Builder.SetInsertPoint(EntryBlock);
3075 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3076 BufferArg->
getName() +
".addr");
3077 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3079 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3080 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3081 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3082 BufferArgAlloca, Builder.getPtrTy(),
3083 BufferArgAlloca->
getName() +
".ascast");
3084 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3085 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3086 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3087 ReduceListArgAlloca, Builder.getPtrTy(),
3088 ReduceListArgAlloca->
getName() +
".ascast");
3090 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3091 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3092 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3094 Value *LocalReduceList =
3095 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3096 Value *BufferArgVal =
3097 Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3098 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3099 Type *IndexTy = Builder.getIndexTy(
3100 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3101 for (
auto En :
enumerate(ReductionInfos)) {
3102 const ReductionInfo &RI = En.value();
3103 auto *RedListArrayTy =
3106 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3107 RedListArrayTy, LocalReduceList,
3108 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3110 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3114 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3115 Value *GlobVal = Builder.CreateConstInBoundsGEP2_32(
3116 ReductionsBufferTy, BufferVD, 0, En.index());
3118 switch (RI.EvaluationKind) {
3119 case EvalKind::Scalar: {
3120 Value *TargetElement = Builder.CreateLoad(RI.ElementType, ElemPtr);
3121 Builder.CreateStore(TargetElement, GlobVal);
3124 case EvalKind::Complex: {
3125 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3126 RI.ElementType, ElemPtr, 0, 0,
".realp");
3127 Value *SrcReal = Builder.CreateLoad(
3128 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3129 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3130 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3131 Value *SrcImg = Builder.CreateLoad(
3132 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3134 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3135 RI.ElementType, GlobVal, 0, 0,
".realp");
3136 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3137 RI.ElementType, GlobVal, 0, 1,
".imagp");
3138 Builder.CreateStore(SrcReal, DestRealPtr);
3139 Builder.CreateStore(SrcImg, DestImgPtr);
3142 case EvalKind::Aggregate: {
3144 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3145 Builder.CreateMemCpy(
3146 GlobVal, M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3147 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3153 Builder.CreateRetVoid();
3154 Builder.restoreIP(OldIP);
3158Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3160 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3161 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3164 Builder.getVoidTy(),
3165 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3169 "_omp_reduction_list_to_global_reduce_func", &M);
3176 Builder.SetInsertPoint(EntryBlock);
3185 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3186 BufferArg->
getName() +
".addr");
3187 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3189 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3190 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3191 auto *RedListArrayTy =
3196 Value *LocalReduceList =
3197 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3199 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3200 BufferArgAlloca, Builder.getPtrTy(),
3201 BufferArgAlloca->
getName() +
".ascast");
3202 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3203 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3204 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3205 ReduceListArgAlloca, Builder.getPtrTy(),
3206 ReduceListArgAlloca->
getName() +
".ascast");
3207 Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3208 LocalReduceList, Builder.getPtrTy(),
3209 LocalReduceList->
getName() +
".ascast");
3211 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3212 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3213 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3215 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3216 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3217 Type *IndexTy = Builder.getIndexTy(
3218 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3219 for (
auto En :
enumerate(ReductionInfos)) {
3220 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3221 RedListArrayTy, LocalReduceListAddrCast,
3222 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3224 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3226 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3227 ReductionsBufferTy, BufferVD, 0, En.index());
3228 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3233 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3234 Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
3235 ->addFnAttr(Attribute::NoUnwind);
3236 Builder.CreateRetVoid();
3237 Builder.restoreIP(OldIP);
3241Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3243 AttributeList FuncAttrs) {
3244 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3247 Builder.getVoidTy(),
3248 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3252 "_omp_reduction_global_to_list_copy_func", &M);
3259 Builder.SetInsertPoint(EntryBlock);
3268 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3269 BufferArg->
getName() +
".addr");
3270 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3272 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3273 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3274 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3275 BufferArgAlloca, Builder.getPtrTy(),
3276 BufferArgAlloca->
getName() +
".ascast");
3277 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3278 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3279 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3280 ReduceListArgAlloca, Builder.getPtrTy(),
3281 ReduceListArgAlloca->
getName() +
".ascast");
3282 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3283 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3284 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3286 Value *LocalReduceList =
3287 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3288 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3289 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3290 Type *IndexTy = Builder.getIndexTy(
3291 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3292 for (
auto En :
enumerate(ReductionInfos)) {
3293 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3294 auto *RedListArrayTy =
3297 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3298 RedListArrayTy, LocalReduceList,
3299 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3301 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3304 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3305 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3306 ReductionsBufferTy, BufferVD, 0, En.index());
3308 switch (RI.EvaluationKind) {
3309 case EvalKind::Scalar: {
3310 Value *TargetElement = Builder.CreateLoad(RI.ElementType, GlobValPtr);
3311 Builder.CreateStore(TargetElement, ElemPtr);
3314 case EvalKind::Complex: {
3315 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3316 RI.ElementType, GlobValPtr, 0, 0,
".realp");
3317 Value *SrcReal = Builder.CreateLoad(
3318 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3319 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3320 RI.ElementType, GlobValPtr, 0, 1,
".imagp");
3321 Value *SrcImg = Builder.CreateLoad(
3322 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3324 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3325 RI.ElementType, ElemPtr, 0, 0,
".realp");
3326 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3327 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3328 Builder.CreateStore(SrcReal, DestRealPtr);
3329 Builder.CreateStore(SrcImg, DestImgPtr);
3332 case EvalKind::Aggregate: {
3334 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3335 Builder.CreateMemCpy(
3336 ElemPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3337 GlobValPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3344 Builder.CreateRetVoid();
3345 Builder.restoreIP(OldIP);
3349Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3351 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3352 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3355 Builder.getVoidTy(),
3356 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3360 "_omp_reduction_global_to_list_reduce_func", &M);
3367 Builder.SetInsertPoint(EntryBlock);
3376 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3377 BufferArg->
getName() +
".addr");
3378 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3380 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3381 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3387 Value *LocalReduceList =
3388 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3390 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3391 BufferArgAlloca, Builder.getPtrTy(),
3392 BufferArgAlloca->
getName() +
".ascast");
3393 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3394 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3395 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3396 ReduceListArgAlloca, Builder.getPtrTy(),
3397 ReduceListArgAlloca->
getName() +
".ascast");
3398 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3399 LocalReduceList, Builder.getPtrTy(),
3400 LocalReduceList->
getName() +
".ascast");
3402 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3403 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3404 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3406 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3407 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3408 Type *IndexTy = Builder.getIndexTy(
3409 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3410 for (
auto En :
enumerate(ReductionInfos)) {
3411 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3412 RedListArrayTy, ReductionList,
3413 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3416 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3417 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3418 ReductionsBufferTy, BufferVD, 0, En.index());
3419 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3424 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3425 Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
3426 ->addFnAttr(Attribute::NoUnwind);
3427 Builder.CreateRetVoid();
3428 Builder.restoreIP(OldIP);
3432std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3433 std::string Suffix =
3434 createPlatformSpecificName({
"omp",
"reduction",
"reduction_func"});
3435 return (Name + Suffix).
str();
3440 ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
3442 {Builder.getPtrTy(), Builder.getPtrTy()},
3444 std::string
Name = getReductionFuncName(ReducerName);
3452 Builder.SetInsertPoint(EntryBB);
3456 Value *LHSArrayPtr =
nullptr;
3457 Value *RHSArrayPtr =
nullptr;
3464 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3466 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3467 Value *LHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3468 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3469 Value *RHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3470 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3471 Builder.CreateStore(Arg0, LHSAddrCast);
3472 Builder.CreateStore(Arg1, RHSAddrCast);
3473 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3474 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3477 Type *IndexTy = Builder.getIndexTy(
3478 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3480 for (
auto En :
enumerate(ReductionInfos)) {
3481 const ReductionInfo &RI = En.value();
3482 Value *RHSI8PtrPtr = Builder.CreateInBoundsGEP(
3483 RedArrayTy, RHSArrayPtr,
3484 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3485 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3486 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3487 RHSI8Ptr, RI.PrivateVariable->getType(),
3488 RHSI8Ptr->
getName() +
".ascast");
3490 Value *LHSI8PtrPtr = Builder.CreateInBoundsGEP(
3491 RedArrayTy, LHSArrayPtr,
3492 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3493 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3494 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3495 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3497 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3501 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3502 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3504 InsertPointOrErrorTy AfterIP =
3505 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3507 return AfterIP.takeError();
3508 if (!Builder.GetInsertBlock())
3509 return ReductionFunc;
3510 Builder.CreateStore(Reduced, LHSPtr);
3514 if (ReductionGenCBKind == ReductionGenCBKind::Clang)
3515 for (
auto En :
enumerate(ReductionInfos)) {
3516 unsigned Index = En.index();
3517 const ReductionInfo &RI = En.value();
3518 Value *LHSFixupPtr, *RHSFixupPtr;
3519 Builder.restoreIP(RI.ReductionGenClang(
3520 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3525 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3530 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3536 Builder.CreateRetVoid();
3537 return ReductionFunc;
3543 for (
const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
3545 assert(RI.Variable &&
"expected non-null variable");
3546 assert(RI.PrivateVariable &&
"expected non-null private variable");
3547 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3548 "expected non-null reduction generator callback");
3551 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3552 "expected variables and their private equivalents to have the same "
3555 assert(RI.Variable->getType()->isPointerTy() &&
3556 "expected variables to be pointers");
3560OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
3561 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3563 bool IsNoWait,
bool IsTeamsReduction, ReductionGenCBKind ReductionGenCBKind,
3564 std::optional<omp::GV> GridValue,
unsigned ReductionBufNum,
3565 Value *SrcLocInfo) {
3566 if (!updateToLocation(
Loc))
3567 return InsertPointTy();
3568 Builder.restoreIP(CodeGenIP);
3575 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3576 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3579 if (ReductionInfos.
size() == 0)
3580 return Builder.saveIP();
3583 if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
3589 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3593 AttributeList FuncAttrs;
3594 AttrBuilder AttrBldr(Ctx);
3596 AttrBldr.addAttribute(Attr);
3597 AttrBldr.removeAttribute(Attribute::OptimizeNone);
3598 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
3600 CodeGenIP = Builder.saveIP();
3602 createReductionFunction(Builder.GetInsertBlock()->getParent()->getName(),
3603 ReductionInfos, ReductionGenCBKind, FuncAttrs);
3604 if (!ReductionResult)
3606 Function *ReductionFunc = *ReductionResult;
3607 Builder.restoreIP(CodeGenIP);
3610 if (GridValue.has_value())
3611 Config.setGridValue(GridValue.value());
3626 CodeGenIP = Builder.saveIP();
3627 Builder.restoreIP(AllocaIP);
3628 Value *ReductionListAlloca =
3629 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
3630 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3631 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3632 Builder.restoreIP(CodeGenIP);
3633 Type *IndexTy = Builder.getIndexTy(
3634 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3635 for (
auto En :
enumerate(ReductionInfos)) {
3636 const ReductionInfo &RI = En.value();
3637 Value *ElemPtr = Builder.CreateInBoundsGEP(
3638 RedArrayTy, ReductionList,
3639 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3641 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3642 Builder.CreateStore(CastElem, ElemPtr);
3644 CodeGenIP = Builder.saveIP();
3646 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3648 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs);
3652 Builder.restoreIP(CodeGenIP);
3654 Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
3656 unsigned MaxDataSize = 0;
3658 for (
auto En :
enumerate(ReductionInfos)) {
3659 auto Size = M.getDataLayout().getTypeStoreSize(En.value().ElementType);
3660 if (
Size > MaxDataSize)
3662 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3664 Value *ReductionDataSize =
3665 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
3666 if (!IsTeamsReduction) {
3667 Value *SarFuncCast =
3668 Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, PtrTy);
3670 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, PtrTy);
3671 Value *
Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3673 Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
3674 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3675 Res = Builder.CreateCall(Pv2Ptr, Args);
3677 CodeGenIP = Builder.saveIP();
3679 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3680 Function *RedFixedBuferFn = getOrCreateRuntimeFunctionPtr(
3681 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3682 Function *LtGCFunc = emitListToGlobalCopyFunction(
3683 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3684 Function *LtGRFunc = emitListToGlobalReduceFunction(
3685 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3686 Function *GtLCFunc = emitGlobalToListCopyFunction(
3687 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3688 Function *GtLRFunc = emitGlobalToListReduceFunction(
3689 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3690 Builder.restoreIP(CodeGenIP);
3692 Value *KernelTeamsReductionPtr = Builder.CreateCall(
3693 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3695 Value *Args3[] = {SrcLocInfo,
3696 KernelTeamsReductionPtr,
3697 Builder.getInt32(ReductionBufNum),
3707 Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
3708 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3709 Res = Builder.CreateCall(TeamsReduceFn, Args3);
3715 Value *
Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
3716 Builder.CreateCondBr(
Cond, ThenBB, ExitBB);
3722 emitBlock(ThenBB, CurFunc);
3725 for (
auto En :
enumerate(ReductionInfos)) {
3726 const ReductionInfo &RI = En.value();
3729 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3731 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3732 Value *LHSPtr, *RHSPtr;
3733 Builder.restoreIP(RI.ReductionGenClang(Builder.saveIP(), En.index(),
3734 &LHSPtr, &RHSPtr, CurFunc));
3747 Value *LHSValue = Builder.CreateLoad(RI.ElementType,
LHS,
"final.lhs");
3748 Value *RHSValue = Builder.CreateLoad(RI.ElementType,
RHS,
"final.rhs");
3750 InsertPointOrErrorTy AfterIP =
3751 RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced);
3753 return AfterIP.takeError();
3754 Builder.CreateStore(Reduced,
LHS,
false);
3757 emitBlock(ExitBB, CurFunc);
3758 if (ContinuationBlock) {
3759 Builder.CreateBr(ContinuationBlock);
3760 Builder.SetInsertPoint(ContinuationBlock);
3762 Config.setEmitLLVMUsed();
3764 return Builder.saveIP();
3773 ".omp.reduction.func", &M);
3783 Builder.SetInsertPoint(ReductionFuncBlock);
3784 Value *LHSArrayPtr =
nullptr;
3785 Value *RHSArrayPtr =
nullptr;
3796 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3798 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3799 Value *LHSAddrCast =
3800 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
3801 Value *RHSAddrCast =
3802 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
3803 Builder.CreateStore(Arg0, LHSAddrCast);
3804 Builder.CreateStore(Arg1, RHSAddrCast);
3805 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3806 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3808 LHSArrayPtr = ReductionFunc->
getArg(0);
3809 RHSArrayPtr = ReductionFunc->
getArg(1);
3812 unsigned NumReductions = ReductionInfos.
size();
3815 for (
auto En :
enumerate(ReductionInfos)) {
3816 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3817 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3818 RedArrayTy, LHSArrayPtr, 0, En.index());
3819 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3820 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3821 LHSI8Ptr, RI.Variable->
getType());
3822 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3823 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3824 RedArrayTy, RHSArrayPtr, 0, En.index());
3825 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3826 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3827 RHSI8Ptr, RI.PrivateVariable->
getType());
3828 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3830 OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
3831 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3833 return AfterIP.takeError();
3835 Builder.restoreIP(*AfterIP);
3837 if (!Builder.GetInsertBlock())
3841 if (!IsByRef[En.index()])
3842 Builder.CreateStore(Reduced, LHSPtr);
3844 Builder.CreateRetVoid();
3848OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
3849 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3851 bool IsNoWait,
bool IsTeamsReduction) {
3854 return createReductionsGPU(
Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
3855 IsNoWait, IsTeamsReduction);
3859 if (!updateToLocation(
Loc))
3860 return InsertPointTy();
3862 if (ReductionInfos.
size() == 0)
3863 return Builder.saveIP();
3872 unsigned NumReductions = ReductionInfos.
size();
3874 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
3875 Value *RedArray = Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
3877 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3879 for (
auto En :
enumerate(ReductionInfos)) {
3880 unsigned Index = En.index();
3881 const ReductionInfo &RI = En.value();
3882 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
3883 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
3884 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
3889 Type *IndexTy = Builder.getIndexTy(
3890 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3891 Function *
Func = Builder.GetInsertBlock()->getParent();
3894 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3895 bool CanGenerateAtomic =
all_of(ReductionInfos, [](
const ReductionInfo &RI) {
3896 return RI.AtomicReductionGen;
3898 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
3900 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3902 Value *ThreadId = getOrCreateThreadID(Ident);
3903 Constant *NumVariables = Builder.getInt32(NumReductions);
3905 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3906 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
3908 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3909 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
3910 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3911 : RuntimeFunction::OMPRTL___kmpc_reduce);
3913 Builder.CreateCall(ReduceFunc,
3914 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3915 ReductionFunc, Lock},
3926 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
3927 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
3928 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
3933 Builder.SetInsertPoint(NonAtomicRedBlock);
3934 for (
auto En :
enumerate(ReductionInfos)) {
3935 const ReductionInfo &RI = En.value();
3939 Value *RedValue = RI.Variable;
3940 if (!IsByRef[En.index()]) {
3941 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
3942 "red.value." +
Twine(En.index()));
3944 Value *PrivateRedValue =
3945 Builder.CreateLoad(
ValueType, RI.PrivateVariable,
3946 "red.private.value." +
Twine(En.index()));
3948 InsertPointOrErrorTy AfterIP =
3949 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
3951 return AfterIP.takeError();
3952 Builder.restoreIP(*AfterIP);
3954 if (!Builder.GetInsertBlock())
3955 return InsertPointTy();
3957 if (!IsByRef[En.index()])
3958 Builder.CreateStore(Reduced, RI.Variable);
3960 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
3961 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3962 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3963 Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
3964 Builder.CreateBr(ContinuationBlock);
3969 Builder.SetInsertPoint(AtomicRedBlock);
3970 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3971 for (
const ReductionInfo &RI : ReductionInfos) {
3972 InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
3973 Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
3975 return AfterIP.takeError();
3976 Builder.restoreIP(*AfterIP);
3977 if (!Builder.GetInsertBlock())
3978 return InsertPointTy();
3980 Builder.CreateBr(ContinuationBlock);
3982 Builder.CreateUnreachable();
3993 if (!Builder.GetInsertBlock())
3994 return InsertPointTy();
3996 Builder.SetInsertPoint(ContinuationBlock);
3997 return Builder.saveIP();
4000OpenMPIRBuilder::InsertPointOrErrorTy
4001OpenMPIRBuilder::createMaster(
const LocationDescription &
Loc,
4002 BodyGenCallbackTy BodyGenCB,
4003 FinalizeCallbackTy FiniCB) {
4004 if (!updateToLocation(
Loc))
4007 Directive OMPD = Directive::OMPD_master;
4009 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4010 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4011 Value *ThreadId = getOrCreateThreadID(Ident);
4014 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
4015 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4017 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
4018 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
4020 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4024OpenMPIRBuilder::InsertPointOrErrorTy
4025OpenMPIRBuilder::createMasked(
const LocationDescription &
Loc,
4026 BodyGenCallbackTy BodyGenCB,
4028 if (!updateToLocation(
Loc))
4031 Directive OMPD = Directive::OMPD_masked;
4033 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4034 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4035 Value *ThreadId = getOrCreateThreadID(Ident);
4037 Value *ArgsEnd[] = {Ident, ThreadId};
4039 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
4040 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4042 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
4043 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
4045 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4055 Call->setDoesNotThrow();
4067OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
4068 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4070 bool IsInclusive, ScanInfo *ScanRedInfo) {
4071 if (ScanRedInfo->OMPFirstScanLoop) {
4072 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4073 ScanVarsType, ScanRedInfo);
4077 if (!updateToLocation(
Loc))
4082 if (ScanRedInfo->OMPFirstScanLoop) {
4084 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4085 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4086 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4087 Type *DestTy = ScanVarsType[i];
4088 Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4089 Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
4091 Builder.CreateStore(Src, Val);
4094 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4095 emitBlock(ScanRedInfo->OMPScanDispatch,
4096 Builder.GetInsertBlock()->getParent());
4098 if (!ScanRedInfo->OMPFirstScanLoop) {
4099 IV = ScanRedInfo->IV;
4102 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4103 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4104 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4105 Type *DestTy = ScanVarsType[i];
4107 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4108 Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
4109 Builder.CreateStore(Src, ScanVars[i]);
4115 if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
4116 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
4117 ScanRedInfo->OMPAfterScanBlock);
4119 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
4120 ScanRedInfo->OMPBeforeScanBlock);
4122 emitBlock(ScanRedInfo->OMPAfterScanBlock,
4123 Builder.GetInsertBlock()->getParent());
4124 Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
4125 return Builder.saveIP();
4128Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4132 Builder.restoreIP(AllocaIP);
4134 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4136 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4137 (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
4141 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4142 InsertPointTy CodeGenIP) ->
Error {
4143 Builder.restoreIP(CodeGenIP);
4145 Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
4146 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4150 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4151 AllocSpan,
nullptr,
"arr");
4152 Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
4160 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
4162 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4163 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4166 return AfterIP.takeError();
4167 Builder.restoreIP(*AfterIP);
4168 BasicBlock *InputBB = Builder.GetInsertBlock();
4170 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4171 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4173 return AfterIP.takeError();
4174 Builder.restoreIP(*AfterIP);
4179Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4181 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4182 InsertPointTy CodeGenIP) ->
Error {
4183 Builder.restoreIP(CodeGenIP);
4184 for (ReductionInfo RedInfo : ReductionInfos) {
4185 Value *PrivateVar = RedInfo.PrivateVariable;
4186 Value *OrigVar = RedInfo.Variable;
4187 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
4188 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4190 Type *SrcTy = RedInfo.ElementType;
4191 Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
4193 Value *Src = Builder.CreateLoad(SrcTy, Val);
4195 Builder.CreateStore(Src, OrigVar);
4196 Builder.CreateFree(Buff);
4204 if (ScanRedInfo->OMPScanFinish->getTerminator())
4205 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
4207 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
4210 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4211 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4214 return AfterIP.takeError();
4215 Builder.restoreIP(*AfterIP);
4216 BasicBlock *InputBB = Builder.GetInsertBlock();
4218 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4219 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4221 return AfterIP.takeError();
4222 Builder.restoreIP(*AfterIP);
4226OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
4227 const LocationDescription &
Loc,
4229 ScanInfo *ScanRedInfo) {
4231 if (!updateToLocation(
Loc))
4233 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4234 InsertPointTy CodeGenIP) ->
Error {
4235 Builder.restoreIP(CodeGenIP);
4241 splitBB(Builder,
false,
"omp.outer.log.scan.exit");
4243 Builder.GetInsertBlock()->getModule(),
4247 Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
4250 Builder.GetInsertBlock()->getModule(),
4253 LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
4256 llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
4257 Builder.SetInsertPoint(InputBB);
4258 Builder.CreateBr(LoopBB);
4259 emitBlock(LoopBB, CurFn);
4260 Builder.SetInsertPoint(LoopBB);
4262 PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4264 PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4265 Counter->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
4267 Pow2K->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
4275 llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
4276 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4277 emitBlock(InnerLoopBB, CurFn);
4278 Builder.SetInsertPoint(InnerLoopBB);
4279 PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4281 for (ReductionInfo RedInfo : ReductionInfos) {
4282 Value *ReductionVal = RedInfo.PrivateVariable;
4283 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
4284 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4285 Type *DestTy = RedInfo.ElementType;
4286 Value *
IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
4288 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4289 Value *OffsetIval = Builder.CreateNUWSub(
IV, Pow2K);
4291 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4292 Value *
LHS = Builder.CreateLoad(DestTy, LHSPtr);
4293 Value *
RHS = Builder.CreateLoad(DestTy, RHSPtr);
4295 InsertPointOrErrorTy AfterIP =
4296 RedInfo.ReductionGen(Builder.saveIP(),
LHS,
RHS, Result);
4298 return AfterIP.takeError();
4299 Builder.CreateStore(Result, LHSPtr);
4302 IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
4303 IVal->
addIncoming(NextIVal, Builder.GetInsertBlock());
4304 CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
4305 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4306 emitBlock(InnerExitBB, CurFn);
4308 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4311 llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1,
"",
true);
4312 Pow2K->
addIncoming(NextPow2K, Builder.GetInsertBlock());
4314 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4324 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4325 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4328 return AfterIP.takeError();
4329 Builder.restoreIP(*AfterIP);
4330 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4333 return AfterIP.takeError();
4334 Builder.restoreIP(*AfterIP);
4335 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4342Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4345 ScanInfo *ScanRedInfo) {
4353 ScanRedInfo->OMPFirstScanLoop =
true;
4354 Error Err = InputLoopGen();
4364 ScanRedInfo->OMPFirstScanLoop =
false;
4365 Error Err = ScanLoopGen(Builder.saveIP());
4372void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
4373 Function *
Fun = Builder.GetInsertBlock()->getParent();
4374 ScanRedInfo->OMPScanDispatch =
4376 ScanRedInfo->OMPAfterScanBlock =
4378 ScanRedInfo->OMPBeforeScanBlock =
4380 ScanRedInfo->OMPScanLoopExit =
4383CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
4407 Builder.SetCurrentDebugLocation(
DL);
4409 Builder.SetInsertPoint(Preheader);
4410 Builder.CreateBr(Header);
4412 Builder.SetInsertPoint(Header);
4413 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
4414 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4415 Builder.CreateBr(
Cond);
4417 Builder.SetInsertPoint(
Cond);
4419 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
4420 Builder.CreateCondBr(Cmp, Body, Exit);
4422 Builder.SetInsertPoint(Body);
4423 Builder.CreateBr(Latch);
4425 Builder.SetInsertPoint(Latch);
4426 Value *
Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
4427 "omp_" + Name +
".next",
true);
4428 Builder.CreateBr(Header);
4431 Builder.SetInsertPoint(Exit);
4432 Builder.CreateBr(After);
4435 LoopInfos.emplace_front();
4436 CanonicalLoopInfo *CL = &LoopInfos.front();
4438 CL->Header = Header;
4450OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &
Loc,
4451 LoopBodyGenCallbackTy BodyGenCB,
4456 CanonicalLoopInfo *CL = createLoopSkeleton(
Loc.DL, TripCount, BB->
getParent(),
4457 NextBB, NextBB, Name);
4461 if (updateToLocation(
Loc)) {
4465 spliceBB(Builder, After,
false);
4466 Builder.CreateBr(CL->getPreheader());
4471 if (
Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
4481 ScanInfos.emplace_front();
4482 ScanInfo *
Result = &ScanInfos.front();
4487OpenMPIRBuilder::createCanonicalScanLoops(
4488 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4489 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4490 InsertPointTy ComputeIP,
const Twine &Name, ScanInfo *ScanRedInfo) {
4491 LocationDescription ComputeLoc =
4492 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4493 updateToLocation(ComputeLoc);
4497 Value *TripCount = calculateCanonicalLoopTripCount(
4498 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4499 ScanRedInfo->Span = TripCount;
4500 ScanRedInfo->OMPScanInit = splitBB(Builder,
true,
"scan.init");
4501 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
4503 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4504 Builder.restoreIP(CodeGenIP);
4505 ScanRedInfo->IV =
IV;
4506 createScanBBs(ScanRedInfo);
4507 BasicBlock *InputBlock = Builder.GetInsertBlock();
4511 Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
4512 emitBlock(ScanRedInfo->OMPBeforeScanBlock,
4513 Builder.GetInsertBlock()->getParent());
4514 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4515 emitBlock(ScanRedInfo->OMPScanLoopExit,
4516 Builder.GetInsertBlock()->getParent());
4517 Builder.CreateBr(ContinueBlock);
4518 Builder.SetInsertPoint(
4519 ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
4520 return BodyGenCB(Builder.saveIP(),
IV);
4523 const auto &&InputLoopGen = [&]() ->
Error {
4525 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
4526 ComputeIP, Name,
true, ScanRedInfo);
4530 Builder.restoreIP((*LoopInfo)->getAfterIP());
4533 const auto &&ScanLoopGen = [&](LocationDescription
Loc) ->
Error {
4535 createCanonicalLoop(
Loc, BodyGen, Start, Stop, Step, IsSigned,
4536 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
4540 Builder.restoreIP((*LoopInfo)->getAfterIP());
4541 ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
4544 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
4550Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
4552 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
4562 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4563 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4565 updateToLocation(
Loc);
4582 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
4583 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
4584 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
4585 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
4586 Span = Builder.CreateSub(UB, LB,
"",
false,
true);
4587 ZeroCmp = Builder.CreateICmp(
4590 Span = Builder.CreateSub(Stop, Start,
"",
true);
4591 ZeroCmp = Builder.CreateICmp(
4595 Value *CountIfLooping;
4596 if (InclusiveStop) {
4597 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
4600 Value *CountIfTwo = Builder.CreateAdd(
4601 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
4603 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
4606 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
4607 "omp_" + Name +
".tripcount");
4611 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4612 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4613 InsertPointTy ComputeIP,
const Twine &Name,
bool InScan,
4614 ScanInfo *ScanRedInfo) {
4615 LocationDescription ComputeLoc =
4616 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4618 Value *TripCount = calculateCanonicalLoopTripCount(
4619 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4621 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4622 Builder.restoreIP(CodeGenIP);
4623 Value *Span = Builder.CreateMul(
IV, Step);
4624 Value *IndVar = Builder.CreateAdd(Span, Start);
4626 ScanRedInfo->IV = IndVar;
4627 return BodyGenCB(Builder.saveIP(), IndVar);
4629 LocationDescription LoopLoc =
4632 : LocationDescription(Builder.saveIP(),
4633 Builder.getCurrentDebugLocation());
4634 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
4643 OpenMPIRBuilder &OMPBuilder) {
4644 unsigned Bitwidth = Ty->getIntegerBitWidth();
4646 return OMPBuilder.getOrCreateRuntimeFunction(
4647 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4649 return OMPBuilder.getOrCreateRuntimeFunction(
4650 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4659 OpenMPIRBuilder &OMPBuilder) {
4660 unsigned Bitwidth = Ty->getIntegerBitWidth();
4662 return OMPBuilder.getOrCreateRuntimeFunction(
4663 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4665 return OMPBuilder.getOrCreateRuntimeFunction(
4666 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4670OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4671 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4673 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4675 "Require dedicated allocate IP");
4678 Builder.restoreIP(CLI->getPreheaderIP());
4679 Builder.SetCurrentDebugLocation(
DL);
4682 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4683 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4687 Type *IVTy =
IV->getType();
4689 LoopType == WorksharingLoopType::DistributeForStaticLoop
4693 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4696 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4699 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4700 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
4701 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
4702 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
4703 CLI->setLastIter(PLastIter);
4709 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4711 Constant *One = ConstantInt::get(IVTy, 1);
4712 Builder.CreateStore(Zero, PLowerBound);
4713 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
4714 Builder.CreateStore(UpperBound, PUpperBound);
4715 Builder.CreateStore(One, PStride);
4717 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4720 (LoopType == WorksharingLoopType::DistributeStaticLoop)
4721 ? OMPScheduleType::OrderedDistribute
4724 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4729 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4730 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4731 Value *PDistUpperBound =
4732 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
4733 Args.push_back(PDistUpperBound);
4736 Builder.CreateCall(StaticInit, Args);
4737 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
4738 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
4739 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
4740 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
4741 CLI->setTripCount(TripCount);
4748 Builder.SetInsertPoint(CLI->getBody(),
4749 CLI->getBody()->getFirstInsertionPt());
4750 Builder.SetCurrentDebugLocation(
DL);
4751 return Builder.CreateAdd(OldIV, LowerBound);
4755 Builder.SetInsertPoint(CLI->getExit(),
4756 CLI->getExit()->getTerminator()->getIterator());
4757 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4761 InsertPointOrErrorTy BarrierIP =
4762 createBarrier(LocationDescription(Builder.saveIP(),
DL),
4763 omp::Directive::OMPD_for,
false,
4766 return BarrierIP.takeError();
4769 InsertPointTy AfterIP = CLI->getAfterIP();
4775OpenMPIRBuilder::InsertPointOrErrorTy
4776OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
DebugLoc DL,
4777 CanonicalLoopInfo *CLI,
4778 InsertPointTy AllocaIP,
4781 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4782 assert(ChunkSize &&
"Chunk size is required");
4784 LLVMContext &Ctx = CLI->getFunction()->getContext();
4786 Value *OrigTripCount = CLI->getTripCount();
4787 Type *IVTy =
IV->getType();
4789 "Max supported tripcount bitwidth is 64 bits");
4791 :
Type::getInt64Ty(Ctx);
4794 Constant *One = ConstantInt::get(InternalIVTy, 1);
4800 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4803 Builder.restoreIP(AllocaIP);
4804 Builder.SetCurrentDebugLocation(
DL);
4805 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4806 Value *PLowerBound =
4807 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
4808 Value *PUpperBound =
4809 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
4810 Value *PStride = Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
4811 CLI->setLastIter(PLastIter);
4814 Builder.restoreIP(CLI->getPreheaderIP());
4815 Builder.SetCurrentDebugLocation(
DL);
4818 Value *CastedChunkSize =
4819 Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy,
"chunksize");
4820 Value *CastedTripCount =
4821 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
4823 Constant *SchedulingType = ConstantInt::get(
4824 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4825 Builder.CreateStore(Zero, PLowerBound);
4826 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
4827 Builder.CreateStore(OrigUpperBound, PUpperBound);
4828 Builder.CreateStore(One, PStride);
4833 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4834 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4835 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4836 Builder.CreateCall(StaticInit,
4838 SchedulingType, PLastIter,
4839 PLowerBound, PUpperBound,
4844 Value *FirstChunkStart =
4845 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
4846 Value *FirstChunkStop =
4847 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
4848 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
4850 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
4851 Value *NextChunkStride =
4852 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
4855 BasicBlock *DispatchEnter = splitBB(Builder,
true);
4856 Value *DispatchCounter;
4861 CanonicalLoopInfo *DispatchCLI =
cantFail(createCanonicalLoop(
4862 {Builder.saveIP(),
DL},
4863 [&](InsertPointTy BodyIP,
Value *Counter) {
4864 DispatchCounter = Counter;
4867 FirstChunkStart, CastedTripCount, NextChunkStride,
4873 BasicBlock *DispatchBody = DispatchCLI->getBody();
4874 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
4875 BasicBlock *DispatchExit = DispatchCLI->getExit();
4876 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
4877 DispatchCLI->invalidate();
4885 Builder.restoreIP(CLI->getPreheaderIP());
4886 Builder.SetCurrentDebugLocation(
DL);
4889 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4890 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
4891 Value *IsLastChunk =
4892 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
4893 Value *CountUntilOrigTripCount =
4894 Builder.CreateSub(CastedTripCount, DispatchCounter);
4895 Value *ChunkTripCount = Builder.CreateSelect(
4896 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4897 Value *BackcastedChunkTC =
4898 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
4899 CLI->setTripCount(BackcastedChunkTC);
4904 Value *BackcastedDispatchCounter =
4905 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
4907 Builder.restoreIP(CLI->getBodyIP());
4908 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
4913 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4917 InsertPointOrErrorTy AfterIP =
4918 createBarrier(LocationDescription(Builder.saveIP(),
DL), OMPD_for,
4921 return AfterIP.takeError();
4939 unsigned Bitwidth = Ty->getIntegerBitWidth();
4940 Module &M = OMPBuilder->M;
4942 case WorksharingLoopType::ForStaticLoop:
4944 return OMPBuilder->getOrCreateRuntimeFunction(
4945 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4947 return OMPBuilder->getOrCreateRuntimeFunction(
4948 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4950 case WorksharingLoopType::DistributeStaticLoop:
4952 return OMPBuilder->getOrCreateRuntimeFunction(
4953 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4955 return OMPBuilder->getOrCreateRuntimeFunction(
4956 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4958 case WorksharingLoopType::DistributeForStaticLoop:
4960 return OMPBuilder->getOrCreateRuntimeFunction(
4961 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4963 return OMPBuilder->getOrCreateRuntimeFunction(
4964 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4967 if (Bitwidth != 32 && Bitwidth != 64) {
4981 Module &M = OMPBuilder->M;
4990 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
4991 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4992 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
4993 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4994 Builder.CreateCall(RTLFn, RealArgs);
4997 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
4998 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
4999 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5000 Value *NumThreads = Builder.CreateCall(RTLNumThreads, {});
5003 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5004 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5005 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5006 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5008 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5010 Builder.CreateCall(RTLFn, RealArgs);
5014 OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI,
Value *Ident,
5019 Value *TripCount = CLI->getTripCount();
5025 Preheader->
splice(std::prev(Preheader->
end()), CLI->getBody(),
5026 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
5031 Builder.restoreIP({Preheader, Preheader->
end()});
5034 Builder.CreateBr(CLI->getExit());
5037 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
5040 CleanUpInfo.EntryBB = CLI->getHeader();
5041 CleanUpInfo.ExitBB = CLI->getExit();
5042 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5050 "Expected unique undroppable user of outlined function");
5052 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5054 "Expected outlined function call to be located in loop preheader");
5056 if (OutlinedFnCallInstruction->
arg_size() > 1)
5063 LoopBodyArg, TripCount, OutlinedFn);
5065 for (
auto &ToBeDeletedItem : ToBeDeleted)
5066 ToBeDeletedItem->eraseFromParent();
5070OpenMPIRBuilder::InsertPointTy
5071OpenMPIRBuilder::applyWorkshareLoopTarget(
DebugLoc DL, CanonicalLoopInfo *CLI,
5072 InsertPointTy AllocaIP,
5075 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5076 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5079 OI.OuterAllocaBB = CLI->getPreheader();
5085 OI.OuterAllocaBB = AllocaIP.getBlock();
5088 OI.EntryBB = CLI->getBody();
5089 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
5090 "omp.prelatch",
true);
5093 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
5097 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0,
"");
5099 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
5110 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5121 CLI->getPreheader(),
5130 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5136 CLI->getIndVar()->user_end());
5139 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5140 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
5146 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5153 OI.PostOutlineCB = [=, ToBeDeletedVec =
5154 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5158 addOutlineInfo(std::move(OI));
5159 return CLI->getAfterIP();
5162OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
5163 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5164 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5165 bool HasSimdModifier,
bool HasMonotonicModifier,
5166 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5168 if (Config.isTargetDevice())
5169 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType);
5171 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5172 HasNonmonotonicModifier, HasOrderedClause);
5174 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5175 OMPScheduleType::ModifierOrdered;
5176 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5177 case OMPScheduleType::BaseStatic:
5178 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
5180 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5181 NeedsBarrier, ChunkSize);
5183 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier);
5185 case OMPScheduleType::BaseStaticChunked:
5187 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5188 NeedsBarrier, ChunkSize);
5190 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
5193 case OMPScheduleType::BaseRuntime:
5194 case OMPScheduleType::BaseAuto:
5195 case OMPScheduleType::BaseGreedy:
5196 case OMPScheduleType::BaseBalanced:
5197 case OMPScheduleType::BaseSteal:
5198 case OMPScheduleType::BaseGuidedSimd:
5199 case OMPScheduleType::BaseRuntimeSimd:
5201 "schedule type does not support user-defined chunk sizes");
5203 case OMPScheduleType::BaseDynamicChunked:
5204 case OMPScheduleType::BaseGuidedChunked:
5205 case OMPScheduleType::BaseGuidedIterativeChunked:
5206 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5207 case OMPScheduleType::BaseStaticBalancedChunked:
5208 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5209 NeedsBarrier, ChunkSize);
5222 unsigned Bitwidth = Ty->getIntegerBitWidth();
5224 return OMPBuilder.getOrCreateRuntimeFunction(
5225 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5227 return OMPBuilder.getOrCreateRuntimeFunction(
5228 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5238 unsigned Bitwidth = Ty->getIntegerBitWidth();
5240 return OMPBuilder.getOrCreateRuntimeFunction(
5241 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5243 return OMPBuilder.getOrCreateRuntimeFunction(
5244 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5253 unsigned Bitwidth = Ty->getIntegerBitWidth();
5255 return OMPBuilder.getOrCreateRuntimeFunction(
5256 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5258 return OMPBuilder.getOrCreateRuntimeFunction(
5259 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5263OpenMPIRBuilder::InsertPointOrErrorTy
5264OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
5265 InsertPointTy AllocaIP,
5267 bool NeedsBarrier,
Value *Chunk) {
5268 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5270 "Require dedicated allocate IP");
5272 "Require valid schedule type");
5274 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
5275 OMPScheduleType::ModifierOrdered;
5278 Builder.SetCurrentDebugLocation(
DL);
5281 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5282 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5286 Type *IVTy =
IV->getType();
5291 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5293 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5294 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5295 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5296 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5297 CLI->setLastIter(PLastIter);
5305 Constant *One = ConstantInt::get(IVTy, 1);
5306 Builder.CreateStore(One, PLowerBound);
5307 Value *UpperBound = CLI->getTripCount();
5308 Builder.CreateStore(UpperBound, PUpperBound);
5309 Builder.CreateStore(One, PStride);
5315 InsertPointTy AfterIP = CLI->getAfterIP();
5323 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5326 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5329 Builder.CreateCall(DynamicInit,
5330 {SrcLoc, ThreadNum, SchedulingType, One,
5331 UpperBound, One, Chunk});
5340 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
5341 PLowerBound, PUpperBound, PStride});
5342 Constant *Zero32 = ConstantInt::get(I32Type, 0);
5345 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
5346 Builder.CreateCondBr(MoreWork, Header, Exit);
5352 PI->setIncomingBlock(0, OuterCond);
5353 PI->setIncomingValue(0, LowerBound);
5358 Br->setSuccessor(0, OuterCond);
5363 Builder.SetInsertPoint(
Cond,
Cond->getFirstInsertionPt());
5364 UpperBound = Builder.CreateLoad(IVTy, PUpperBound,
"ub");
5371 assert(BI->getSuccessor(1) == Exit);
5372 BI->setSuccessor(1, OuterCond);
5376 Builder.SetInsertPoint(&Latch->
back());
5378 Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
5383 Builder.SetInsertPoint(&
Exit->back());
5384 InsertPointOrErrorTy BarrierIP =
5385 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5386 omp::Directive::OMPD_for,
false,
5389 return BarrierIP.takeError();
5408 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
5413 if (BBsToErase.
count(UseInst->getParent()))
5420 while (BBsToErase.
remove_if(HasRemainingUses)) {
5430 InsertPointTy ComputeIP) {
5431 assert(
Loops.size() >= 1 &&
"At least one loop required");
5432 size_t NumLoops =
Loops.size();
5436 return Loops.front();
5438 CanonicalLoopInfo *Outermost =
Loops.front();
5439 CanonicalLoopInfo *Innermost =
Loops.back();
5440 BasicBlock *OrigPreheader = Outermost->getPreheader();
5441 BasicBlock *OrigAfter = Outermost->getAfter();
5448 Loop->collectControlBlocks(OldControlBBs);
5451 Builder.SetCurrentDebugLocation(
DL);
5452 if (ComputeIP.isSet())
5453 Builder.restoreIP(ComputeIP);
5455 Builder.restoreIP(Outermost->getPreheaderIP());
5459 Value *CollapsedTripCount =
nullptr;
5460 for (CanonicalLoopInfo *L :
Loops) {
5462 "All loops to collapse must be valid canonical loops");
5463 Value *OrigTripCount =
L->getTripCount();
5464 if (!CollapsedTripCount) {
5465 CollapsedTripCount = OrigTripCount;
5470 CollapsedTripCount = Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
5474 CanonicalLoopInfo *
Result =
5475 createLoopSkeleton(
DL, CollapsedTripCount,
F,
5476 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
5482 Builder.restoreIP(
Result->getBodyIP());
5486 NewIndVars.
resize(NumLoops);
5487 for (
int i = NumLoops - 1; i >= 1; --i) {
5488 Value *OrigTripCount =
Loops[i]->getTripCount();
5490 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
5491 NewIndVars[i] = NewIndVar;
5493 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
5496 NewIndVars[0] = Leftover;
5507 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
5514 ContinueBlock =
nullptr;
5515 ContinuePred = NextSrc;
5522 for (
size_t i = 0; i < NumLoops - 1; ++i)
5523 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
5526 ContinueWith(Innermost->getBody(), Innermost->getLatch());
5529 for (
size_t i = NumLoops - 1; i > 0; --i)
5530 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
5533 ContinueWith(
Result->getLatch(),
nullptr);
5540 for (
size_t i = 0; i < NumLoops; ++i)
5541 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5546 for (CanonicalLoopInfo *L :
Loops)
5555std::vector<CanonicalLoopInfo *>
5559 "Must pass as many tile sizes as there are loops");
5560 int NumLoops =
Loops.size();
5561 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5563 CanonicalLoopInfo *OutermostLoop =
Loops.front();
5564 CanonicalLoopInfo *InnermostLoop =
Loops.back();
5565 Function *
F = OutermostLoop->getBody()->getParent();
5566 BasicBlock *InnerEnter = InnermostLoop->getBody();
5567 BasicBlock *InnerLatch = InnermostLoop->getLatch();
5573 Loop->collectControlBlocks(OldControlBBs);
5580 for (CanonicalLoopInfo *L :
Loops) {
5581 assert(
L->isValid() &&
"All input loops must be valid canonical loops");
5593 for (
int i = 0; i < NumLoops - 1; ++i) {
5594 CanonicalLoopInfo *Surrounding =
Loops[i];
5597 BasicBlock *EnterBB = Surrounding->getBody();
5603 Builder.SetCurrentDebugLocation(
DL);
5604 Builder.restoreIP(OutermostLoop->getPreheaderIP());
5606 for (
int i = 0; i < NumLoops; ++i) {
5608 Value *OrigTripCount = OrigTripCounts[i];
5611 Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount,
TileSize);
5612 Value *FloorTripRem = Builder.CreateURem(OrigTripCount,
TileSize);
5621 Value *FloorTripOverflow =
5622 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
5624 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5625 Value *FloorTripCount =
5626 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
5627 "omp_floor" +
Twine(i) +
".tripcount",
true);
5630 FloorCompleteCount.
push_back(FloorCompleteTripCount);
5636 std::vector<CanonicalLoopInfo *>
Result;
5637 Result.reserve(NumLoops * 2);
5641 BasicBlock *Enter = OutermostLoop->getPreheader();
5648 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
5650 auto EmbeddNewLoop =
5651 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5653 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
5654 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
5659 Enter = EmbeddedLoop->getBody();
5660 Continue = EmbeddedLoop->getLatch();
5661 OutroInsertBefore = EmbeddedLoop->getLatch();
5662 return EmbeddedLoop;
5666 const Twine &NameBase) {
5668 CanonicalLoopInfo *EmbeddedLoop =
5669 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5670 Result.push_back(EmbeddedLoop);
5674 EmbeddNewLoops(FloorCount,
"floor");
5678 Builder.SetInsertPoint(Enter->getTerminator());
5680 for (
int i = 0; i < NumLoops; ++i) {
5681 CanonicalLoopInfo *FloorLoop =
Result[i];
5684 Value *FloorIsEpilogue =
5685 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
5686 Value *TileTripCount =
5687 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i],
TileSize);
5693 EmbeddNewLoops(TileCounts,
"tile");
5698 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5707 BodyEnter =
nullptr;
5708 BodyEntered = ExitBB;
5720 Builder.restoreIP(
Result.back()->getBodyIP());
5721 for (
int i = 0; i < NumLoops; ++i) {
5722 CanonicalLoopInfo *FloorLoop =
Result[i];
5723 CanonicalLoopInfo *TileLoop =
Result[NumLoops + i];
5724 Value *OrigIndVar = OrigIndVars[i];
5728 Builder.CreateMul(
Size, FloorLoop->getIndVar(), {},
true);
5730 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {},
true);
5737 for (CanonicalLoopInfo *L :
Loops)
5741 for (CanonicalLoopInfo *GenL : Result)
5752 if (Properties.
empty())
5775 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5779 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5787 if (
I.mayReadOrWriteMemory()) {
5791 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5796void OpenMPIRBuilder::unrollLoopFull(
DebugLoc, CanonicalLoopInfo *
Loop) {
5803void OpenMPIRBuilder::unrollLoopHeuristic(
DebugLoc, CanonicalLoopInfo *
Loop) {
5811void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
5814 const Twine &NamePrefix) {
5815 Function *
F = CanonicalLoop->getFunction();
5837 auto SplitBeforeIt = CanonicalLoop->getBody()->getFirstNonPHIIt();
5843 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
5845 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->getExit());
5848 Builder.SetInsertPoint(SplitBeforeIt);
5850 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
5853 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
5856 Builder.SetInsertPoint(ElseBlock);
5862 ExistingBlocks.
reserve(
L->getNumBlocks() + 1);
5864 ExistingBlocks.
append(
L->block_begin(),
L->block_end());
5870 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
5872 if (
Block ==
L->getLoopPreheader() ||
Block ==
L->getLoopLatch() ||
5879 if (
Block == ThenBlock)
5880 NewBB->
setName(NamePrefix +
".if.else");
5883 VMap[
Block] = NewBB;
5887 Builder.CreateBr(NewBlocks.
front());
5891 L->getLoopLatch()->splitBasicBlock(
5892 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
5896 L->addBasicBlockToLoop(ThenBlock, LI);
5900OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
const Triple &TargetTriple,
5902 if (TargetTriple.
isX86()) {
5903 if (Features.
lookup(
"avx512f"))
5905 else if (Features.
lookup(
"avx"))
5909 if (TargetTriple.
isPPC())
5911 if (TargetTriple.
isWasm())
5916void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
5918 Value *IfCond, OrderKind Order,
5922 Function *
F = CanonicalLoop->getFunction();
5937 if (AlignedVars.
size()) {
5938 InsertPointTy IP = Builder.saveIP();
5939 for (
auto &AlignedItem : AlignedVars) {
5940 Value *AlignedPtr = AlignedItem.first;
5941 Value *Alignment = AlignedItem.second;
5944 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
5947 Builder.restoreIP(IP);
5952 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
5962 if (
Block == CanonicalLoop->getCond() ||
5963 Block == CanonicalLoop->getHeader())
5965 Reachable.insert(
Block);
5975 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5983 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5999 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6001 if (Simdlen || Safelen) {
6005 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6031static std::unique_ptr<TargetMachine>
6035 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6036 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6047 std::nullopt, OptLevel));
6071 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6072 FAM.registerPass([&]() {
return TIRA; });
6086 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6091 nullptr, ORE,
static_cast<int>(OptLevel),
6112 <<
" Threshold=" << UP.
Threshold <<
"\n"
6115 <<
" PartialOptSizeThreshold="
6135 Ptr = Load->getPointerOperand();
6137 Ptr = Store->getPointerOperand();
6141 Ptr =
Ptr->stripPointerCasts();
6144 if (Alloca->getParent() == &
F->getEntryBlock())
6164 int MaxTripCount = 0;
6165 bool MaxOrZero =
false;
6166 unsigned TripMultiple = 0;
6168 bool UseUpperBound =
false;
6170 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6172 unsigned Factor = UP.
Count;
6173 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6181void OpenMPIRBuilder::unrollLoopPartial(
DebugLoc DL, CanonicalLoopInfo *
Loop,
6183 CanonicalLoopInfo **UnrolledCLI) {
6184 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6200 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6213 *UnrolledCLI =
Loop;
6218 "unrolling only makes sense with a factor of 2 or larger");
6220 Type *IndVarTy =
Loop->getIndVarType();
6227 std::vector<CanonicalLoopInfo *>
LoopNest =
6228 tileLoops(
DL, {
Loop}, {FactorVal});
6231 CanonicalLoopInfo *InnerLoop =
LoopNest[1];
6242 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6245 (*UnrolledCLI)->assertOK();
6249OpenMPIRBuilder::InsertPointTy
6250OpenMPIRBuilder::createCopyPrivate(
const LocationDescription &
Loc,
6253 if (!updateToLocation(
Loc))
6257 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6258 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6259 Value *ThreadId = getOrCreateThreadID(Ident);
6261 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
6263 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6265 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
6266 Builder.CreateCall(Fn, Args);
6268 return Builder.saveIP();
6271OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
6272 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6276 if (!updateToLocation(
Loc))
6282 if (!CPVars.
empty()) {
6284 Builder.CreateStore(Builder.getInt32(0), DidIt);
6287 Directive OMPD = Directive::OMPD_single;
6289 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6290 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6291 Value *ThreadId = getOrCreateThreadID(Ident);
6294 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
6295 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6297 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
6298 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6300 auto FiniCBWrapper = [&](InsertPointTy IP) ->
Error {
6301 if (
Error Err = FiniCB(IP))
6308 Builder.CreateStore(Builder.getInt32(1), DidIt);
6321 InsertPointOrErrorTy AfterIP =
6322 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
6326 return AfterIP.takeError();
6329 for (
size_t I = 0,
E = CPVars.
size();
I <
E; ++
I)
6331 createCopyPrivate(LocationDescription(Builder.saveIP(),
Loc.DL),
6332 ConstantInt::get(
Int64, 0), CPVars[
I],
6335 }
else if (!IsNowait) {
6336 InsertPointOrErrorTy AfterIP =
6337 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
6338 omp::Directive::OMPD_unknown,
false,
6341 return AfterIP.takeError();
6343 return Builder.saveIP();
6346OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
6347 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6348 FinalizeCallbackTy FiniCB,
StringRef CriticalName,
Value *HintInst) {
6350 if (!updateToLocation(
Loc))
6353 Directive OMPD = Directive::OMPD_critical;
6355 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6356 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6357 Value *ThreadId = getOrCreateThreadID(Ident);
6358 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
6359 Value *
Args[] = {Ident, ThreadId, LockVar};
6365 EnterArgs.push_back(HintInst);
6366 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
6368 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
6370 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
6373 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
6374 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6376 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6380OpenMPIRBuilder::InsertPointTy
6381OpenMPIRBuilder::createOrderedDepend(
const LocationDescription &
Loc,
6382 InsertPointTy AllocaIP,
unsigned NumLoops,
6384 const Twine &Name,
bool IsDependSource) {
6388 "OpenMP runtime requires depend vec with i64 type");
6390 if (!updateToLocation(
Loc))
6395 Builder.restoreIP(AllocaIP);
6396 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty,
nullptr, Name);
6398 updateToLocation(
Loc);
6401 for (
unsigned I = 0;
I < NumLoops; ++
I) {
6402 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
6403 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(
I)});
6404 StoreInst *STInst = Builder.CreateStore(StoreValues[
I], DependAddrGEPIter);
6408 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
6409 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
6412 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6413 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6414 Value *ThreadId = getOrCreateThreadID(Ident);
6415 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
6419 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
6421 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
6422 Builder.CreateCall(RTLFn, Args);
6424 return Builder.saveIP();
6427OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
6428 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6429 FinalizeCallbackTy FiniCB,
bool IsThreads) {
6430 if (!updateToLocation(
Loc))
6433 Directive OMPD = Directive::OMPD_ordered;
6439 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6440 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6441 Value *ThreadId = getOrCreateThreadID(Ident);
6444 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
6445 EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6448 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
6449 ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6452 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6456OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
6458 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
6459 bool HasFinalize,
bool IsCancellable) {
6462 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
6466 BasicBlock *EntryBB = Builder.GetInsertBlock();
6475 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
6478 if (
Error Err = BodyGenCB( InsertPointTy(),
6486 "Unexpected control flow graph state!!");
6487 InsertPointOrErrorTy AfterIP =
6488 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
6490 return AfterIP.takeError();
6492 "Unexpected Control Flow State!");
6498 "Unexpected Insertion point location!");
6501 auto InsertBB = merged ? ExitPredBB : ExitBB;
6504 Builder.SetInsertPoint(InsertBB);
6506 return Builder.saveIP();
6509OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
6512 if (!Conditional || !EntryCall)
6513 return Builder.saveIP();
6515 BasicBlock *EntryBB = Builder.GetInsertBlock();
6516 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
6528 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
6530 Builder.SetInsertPoint(UI);
6531 Builder.Insert(EntryBBTI);
6532 UI->eraseFromParent();
6539OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
6540 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
6543 Builder.restoreIP(FinIP);
6547 assert(!FinalizationStack.empty() &&
6548 "Unexpected finalization stack state!");
6550 FinalizationInfo Fi = FinalizationStack.pop_back_val();
6551 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
6553 if (
Error Err = Fi.FiniCB(FinIP))
6560 Builder.SetInsertPoint(FiniBBTI);
6564 return Builder.saveIP();
6568 Builder.Insert(ExitCall);
6574OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
6575 InsertPointTy IP,
Value *MasterAddr,
Value *PrivateAddr,
6604 "copyin.not.master.end");
6611 Builder.SetInsertPoint(OMP_Entry);
6612 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
6613 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
6614 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
6615 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
6617 Builder.SetInsertPoint(CopyBegin);
6619 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
6621 return Builder.saveIP();
6624CallInst *OpenMPIRBuilder::createOMPAlloc(
const LocationDescription &
Loc,
6628 updateToLocation(
Loc);
6631 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6632 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6633 Value *ThreadId = getOrCreateThreadID(Ident);
6636 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
6638 return Builder.CreateCall(Fn, Args, Name);
6641CallInst *OpenMPIRBuilder::createOMPFree(
const LocationDescription &
Loc,
6645 updateToLocation(
Loc);
6648 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6649 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6650 Value *ThreadId = getOrCreateThreadID(Ident);
6652 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
6653 return Builder.CreateCall(Fn, Args, Name);
6656CallInst *OpenMPIRBuilder::createOMPInteropInit(
6657 const LocationDescription &
Loc,
Value *InteropVar,
6659 Value *DependenceAddress,
bool HaveNowaitClause) {
6661 updateToLocation(
Loc);
6664 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6665 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6666 Value *ThreadId = getOrCreateThreadID(Ident);
6667 if (Device ==
nullptr)
6669 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
6670 if (NumDependences ==
nullptr) {
6671 NumDependences = ConstantInt::get(
Int32, 0);
6675 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6677 Ident, ThreadId, InteropVar, InteropTypeVal,
6678 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6680 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
6682 return Builder.CreateCall(Fn, Args);
6685CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
6686 const LocationDescription &
Loc,
Value *InteropVar,
Value *Device,
6687 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6689 updateToLocation(
Loc);
6692 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6693 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6694 Value *ThreadId = getOrCreateThreadID(Ident);
6695 if (Device ==
nullptr)
6697 if (NumDependences ==
nullptr) {
6698 NumDependences = ConstantInt::get(
Int32, 0);
6702 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6704 Ident, ThreadId, InteropVar,
Device,
6705 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6707 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
6709 return Builder.CreateCall(Fn, Args);
6712CallInst *OpenMPIRBuilder::createOMPInteropUse(
const LocationDescription &
Loc,
6714 Value *NumDependences,
6715 Value *DependenceAddress,
6716 bool HaveNowaitClause) {
6718 updateToLocation(
Loc);
6720 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6721 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6722 Value *ThreadId = getOrCreateThreadID(Ident);
6723 if (Device ==
nullptr)
6725 if (NumDependences ==
nullptr) {
6726 NumDependences = ConstantInt::get(
Int32, 0);
6730 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6732 Ident, ThreadId, InteropVar,
Device,
6733 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6735 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
6737 return Builder.CreateCall(Fn, Args);
6740CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
6744 updateToLocation(
Loc);
6747 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6748 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6749 Value *ThreadId = getOrCreateThreadID(Ident);
6751 getOrCreateInternalVariable(Int8PtrPtr,
Name.str());
6755 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
6757 return Builder.CreateCall(Fn, Args);
6760OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
6761 const LocationDescription &
Loc,
6762 const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6764 "expected num_threads and num_teams to be specified");
6766 if (!updateToLocation(
Loc))
6770 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6771 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6783 const std::string DebugPrefix =
"_debug__";
6784 if (KernelName.
ends_with(DebugPrefix)) {
6785 KernelName = KernelName.
drop_back(DebugPrefix.length());
6786 Kernel = M.getFunction(KernelName);
6792 if (
Attrs.MinTeams > 1 ||
Attrs.MaxTeams.front() > 0)
6797 int32_t MaxThreadsVal =
Attrs.MaxThreads.front();
6798 if (MaxThreadsVal < 0)
6799 MaxThreadsVal = std::max(
6802 if (MaxThreadsVal > 0)
6803 writeThreadBoundsForKernel(
T, *
Kernel,
Attrs.MinThreads, MaxThreadsVal);
6814 Function *Fn = getOrCreateRuntimeFunctionPtr(
6815 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6818 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6819 Constant *DynamicEnvironmentInitializer =
6823 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6825 DL.getDefaultGlobalsAddressSpace());
6829 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6830 ? DynamicEnvironmentGV
6832 DynamicEnvironmentPtr);
6835 ConfigurationEnvironment, {
6836 UseGenericStateMachineVal,
6837 MayUseNestedParallelismVal,
6844 ReductionBufferLength,
6847 KernelEnvironment, {
6848 ConfigurationEnvironmentInitializer,
6852 std::string KernelEnvironmentName =
6853 (KernelName +
"_kernel_environment").str();
6856 KernelEnvironmentInitializer, KernelEnvironmentName,
6858 DL.getDefaultGlobalsAddressSpace());
6862 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6863 ? KernelEnvironmentGV
6865 KernelEnvironmentPtr);
6866 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6868 KernelLaunchEnvironment =
6869 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
6870 ? KernelLaunchEnvironment
6871 : Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
6872 KernelLaunchEnvParamTy);
6874 Builder.CreateCall(Fn, {KernelEnvironment, KernelLaunchEnvironment});
6876 Value *ExecUserCode = Builder.CreateICmpEQ(
6886 auto *UI = Builder.CreateUnreachable();
6892 Builder.SetInsertPoint(WorkerExitBB);
6893 Builder.CreateRetVoid();
6896 Builder.SetInsertPoint(CheckBBTI);
6897 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
6900 UI->eraseFromParent();
6907void OpenMPIRBuilder::createTargetDeinit(
const LocationDescription &
Loc,
6908 int32_t TeamsReductionDataSize,
6909 int32_t TeamsReductionBufferLength) {
6910 if (!updateToLocation(
Loc))
6913 Function *Fn = getOrCreateRuntimeFunctionPtr(
6914 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6916 Builder.CreateCall(Fn, {});
6918 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6924 const std::string DebugPrefix =
"_debug__";
6926 KernelName = KernelName.
drop_back(DebugPrefix.length());
6927 auto *KernelEnvironmentGV =
6928 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
6929 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6930 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->
getInitializer();
6932 KernelEnvironmentInitializer,
6933 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6935 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6942 if (
Kernel.hasFnAttribute(Name)) {
6943 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
6949std::pair<int32_t, int32_t>
6951 int32_t ThreadLimit =
6952 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
6955 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
6956 if (!Attr.isValid() || !Attr.isStringAttribute())
6957 return {0, ThreadLimit};
6958 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
6961 return {0, ThreadLimit};
6962 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6968 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
6969 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
6970 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6972 return {0, ThreadLimit};
6975void OpenMPIRBuilder::writeThreadBoundsForKernel(
const Triple &
T,
6978 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
6981 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
6989std::pair<int32_t, int32_t>
6992 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
6996 int32_t LB, int32_t UB) {
7003 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7006void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7008 if (Config.isTargetDevice()) {
7015 else if (
T.isNVPTX())
7017 else if (
T.isSPIRV())
7024 if (Config.isTargetDevice()) {
7025 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7034Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7039 assert(!M.getGlobalVariable(EntryFnName,
true) &&
7040 "Named kernel already exists?");
7046Error OpenMPIRBuilder::emitTargetRegionFunction(
7047 TargetRegionEntryInfo &EntryInfo,
7048 FunctionGenCallback &GenerateFunctionCallback,
bool IsOffloadEntry,
7052 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
7054 if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
7058 OutlinedFn = *CBResult;
7060 OutlinedFn =
nullptr;
7066 if (!IsOffloadEntry)
7069 std::string EntryFnIDName =
7070 Config.isTargetDevice()
7071 ? std::string(EntryFnName)
7072 : createPlatformSpecificName({EntryFnName,
"region_id"});
7074 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
7075 EntryFnName, EntryFnIDName);
7079Constant *OpenMPIRBuilder::registerTargetRegionFunction(
7080 TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFn,
7083 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7084 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7085 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7086 OffloadInfoManager.registerTargetRegionEntryInfo(
7087 EntryInfo, EntryAddr, OutlinedFnID,
7088 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7089 return OutlinedFnID;
7092OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
7093 const LocationDescription &
Loc, InsertPointTy AllocaIP,
7094 InsertPointTy CodeGenIP,
Value *DeviceID,
Value *IfCond,
7095 TargetDataInfo &
Info, GenMapInfoCallbackTy GenMapInfoCB,
7097 function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
7098 BodyGenTy BodyGenType)>
7101 if (!updateToLocation(
Loc))
7102 return InsertPointTy();
7104 Builder.restoreIP(CodeGenIP);
7106 if (Config.IsTargetDevice.value_or(
false)) {
7108 InsertPointOrErrorTy AfterIP =
7109 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7111 return AfterIP.takeError();
7112 Builder.restoreIP(*AfterIP);
7114 return Builder.saveIP();
7117 bool IsStandAlone = !BodyGenCB;
7118 MapInfosTy *MapInfo;
7122 auto BeginThenGen = [&](InsertPointTy AllocaIP,
7123 InsertPointTy CodeGenIP) ->
Error {
7124 MapInfo = &GenMapInfoCB(Builder.saveIP());
7125 if (
Error Err = emitOffloadingArrays(
7126 AllocaIP, Builder.saveIP(), *MapInfo,
Info, CustomMapperCB,
7127 true, DeviceAddrCB))
7130 TargetDataRTArgs RTArgs;
7131 emitOffloadingArraysArgument(Builder, RTArgs,
Info);
7134 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7139 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7140 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7144 SrcLocInfo, DeviceID,
7145 PointerNum, RTArgs.BasePointersArray,
7146 RTArgs.PointersArray, RTArgs.SizesArray,
7147 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7148 RTArgs.MappersArray};
7151 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7155 if (
Info.HasNoWait) {
7162 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
7165 if (
Info.HasNoWait) {
7169 emitBlock(OffloadContBlock, CurFn,
true);
7170 Builder.restoreIP(Builder.saveIP());
7175 bool RequiresOuterTargetTask =
Info.HasNoWait;
7176 if (!RequiresOuterTargetTask)
7177 cantFail(TaskBodyCB(
nullptr,
nullptr,
7180 cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
7181 {}, RTArgs,
Info.HasNoWait));
7183 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
7184 omp::OMPRTL___tgt_target_data_begin_mapper);
7186 Builder.CreateCall(BeginMapperFunc, OffloadingArgs);
7188 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7191 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
7192 Builder.CreateStore(LI, DeviceMap.second.second);
7199 InsertPointOrErrorTy AfterIP =
7200 BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
7202 return AfterIP.takeError();
7203 Builder.restoreIP(*AfterIP);
7211 auto BeginElseGen = [&](InsertPointTy AllocaIP,
7212 InsertPointTy CodeGenIP) ->
Error {
7213 InsertPointOrErrorTy AfterIP =
7214 BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
7216 return AfterIP.takeError();
7217 Builder.restoreIP(*AfterIP);
7222 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7223 TargetDataRTArgs RTArgs;
7224 Info.EmitDebug = !MapInfo->Names.empty();
7225 emitOffloadingArraysArgument(Builder, RTArgs,
Info,
true);
7228 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7233 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7234 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7237 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7238 PointerNum, RTArgs.BasePointersArray,
7239 RTArgs.PointersArray, RTArgs.SizesArray,
7240 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7241 RTArgs.MappersArray};
7243 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
7245 Builder.CreateCall(EndMapperFunc, OffloadingArgs);
7251 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7259 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7260 return BeginThenGen(AllocaIP, Builder.saveIP());
7268 InsertPointOrErrorTy AfterIP =
7269 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7271 return AfterIP.takeError();
7275 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7276 return EndThenGen(AllocaIP, Builder.saveIP());
7279 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
7280 return BeginThenGen(AllocaIP, Builder.saveIP());
7286 return Builder.saveIP();
7290OpenMPIRBuilder::createForStaticInitFunction(
unsigned IVSize,
bool IVSigned,
7291 bool IsGPUDistribute) {
7292 assert((IVSize == 32 || IVSize == 64) &&
7293 "IV size is not compatible with the omp runtime");
7295 if (IsGPUDistribute)
7297 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
7298 : omp::OMPRTL___kmpc_distribute_static_init_4u)
7299 : (IVSigned ?
omp::OMPRTL___kmpc_distribute_static_init_8
7300 :
omp::OMPRTL___kmpc_distribute_static_init_8u);
7302 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
7303 : omp::OMPRTL___kmpc_for_static_init_4u)
7304 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
7305 : omp::OMPRTL___kmpc_for_static_init_8u);
7307 return getOrCreateRuntimeFunction(M, Name);
7310FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(
unsigned IVSize,
7312 assert((IVSize == 32 || IVSize == 64) &&
7313 "IV size is not compatible with the omp runtime");
7315 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
7316 : omp::OMPRTL___kmpc_dispatch_init_4u)
7317 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_init_8
7318 :
omp::OMPRTL___kmpc_dispatch_init_8u);
7320 return getOrCreateRuntimeFunction(M, Name);
7323FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(
unsigned IVSize,
7325 assert((IVSize == 32 || IVSize == 64) &&
7326 "IV size is not compatible with the omp runtime");
7328 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
7329 : omp::OMPRTL___kmpc_dispatch_next_4u)
7330 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_next_8
7331 :
omp::OMPRTL___kmpc_dispatch_next_8u);
7333 return getOrCreateRuntimeFunction(M, Name);
7336FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(
unsigned IVSize,
7338 assert((IVSize == 32 || IVSize == 64) &&
7339 "IV size is not compatible with the omp runtime");
7341 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
7342 : omp::OMPRTL___kmpc_dispatch_fini_4u)
7343 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_fini_8
7344 :
omp::OMPRTL___kmpc_dispatch_fini_8u);
7346 return getOrCreateRuntimeFunction(M, Name);
7350 return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
7355 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
7363 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
7367 if (NewVar && (arg == NewVar->
getArg()))
7377 auto UpdateDebugRecord = [&](
auto *DR) {
7380 for (
auto Loc : DR->location_ops()) {
7381 auto Iter = ValueReplacementMap.find(
Loc);
7382 if (Iter != ValueReplacementMap.end()) {
7383 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
7384 ArgNo = std::get<1>(Iter->second) + 1;
7388 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
7395 "Unexpected debug intrinsic");
7397 UpdateDebugRecord(&DVR);
7400 if (OMPBuilder.Config.isTargetDevice()) {
7402 Module *M = Func->getParent();
7405 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
7407 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
7408 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
7410 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
7423 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7425 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7426 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7428 if (OMPBuilder.Config.isTargetDevice()) {
7436 for (
auto &Arg : Inputs)
7441 for (
auto &Arg : Inputs)
7445 auto BB = Builder.GetInsertBlock();
7457 if (TargetCpuAttr.isStringAttribute())
7458 Func->addFnAttr(TargetCpuAttr);
7460 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
7461 if (TargetFeaturesAttr.isStringAttribute())
7462 Func->addFnAttr(TargetFeaturesAttr);
7464 if (OMPBuilder.Config.isTargetDevice()) {
7466 OMPBuilder.emitKernelExecutionMode(FuncName, DefaultAttrs.ExecFlags);
7467 OMPBuilder.emitUsed(
"llvm.compiler.used", {ExecMode});
7478 Builder.SetInsertPoint(EntryBB);
7481 if (OMPBuilder.Config.isTargetDevice())
7482 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
7484 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
7489 if (OMPBuilder.Config.isTargetDevice())
7490 OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
7494 splitBB(Builder,
true,
"outlined.body");
7495 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
7497 OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->
begin()));
7499 return AfterIP.takeError();
7500 Builder.restoreIP(*AfterIP);
7501 if (OMPBuilder.Config.isTargetDevice())
7502 OMPBuilder.createTargetDeinit(Builder);
7505 Builder.CreateRetVoid();
7509 auto AllocaIP = Builder.saveIP();
7514 const auto &ArgRange =
7515 OMPBuilder.Config.isTargetDevice()
7516 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7549 if (Instr->getFunction() == Func)
7550 Instr->replaceUsesOfWith(
Input, InputCopy);
7556 for (
auto InArg :
zip(Inputs, ArgRange)) {
7558 Argument &Arg = std::get<1>(InArg);
7559 Value *InputCopy =
nullptr;
7561 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
7562 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
7564 return AfterIP.takeError();
7565 Builder.restoreIP(*AfterIP);
7566 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7586 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
7593 ReplaceValue(
Input, InputCopy, Func);
7597 for (
auto Deferred : DeferredReplacement)
7598 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7601 ValueReplacementMap);
7609 Value *TaskWithPrivates,
7610 Type *TaskWithPrivatesTy) {
7612 Type *TaskTy = OMPIRBuilder.Task;
7615 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7616 Value *Shareds = TaskT;
7626 if (TaskWithPrivatesTy != TaskTy)
7627 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7644 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
7649 assert((!NumOffloadingArrays || PrivatesTy) &&
7650 "PrivatesTy cannot be nullptr when there are offloadingArrays"
7653 Module &M = OMPBuilder.M;
7677 OpenMPIRBuilder::InsertPointTy IP(StaleCI->
getParent(),
7683 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7684 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
7690 ".omp_target_task_proxy_func",
7691 Builder.GetInsertBlock()->getModule());
7692 Value *ThreadId = ProxyFn->getArg(0);
7693 Value *TaskWithPrivates = ProxyFn->getArg(1);
7694 ThreadId->
setName(
"thread.id");
7695 TaskWithPrivates->
setName(
"task");
7697 bool HasShareds = SharedArgsOperandNo > 0;
7698 bool HasOffloadingArrays = NumOffloadingArrays > 0;
7701 Builder.SetInsertPoint(EntryBB);
7707 if (HasOffloadingArrays) {
7708 assert(TaskTy != TaskWithPrivatesTy &&
7709 "If there are offloading arrays to pass to the target"
7710 "TaskTy cannot be the same as TaskWithPrivatesTy");
7713 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
7714 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
7716 Builder.CreateStructGEP(PrivatesTy, Privates, i));
7720 auto *ArgStructAlloca =
7722 assert(ArgStructAlloca &&
7723 "Unable to find the alloca instruction corresponding to arguments "
7724 "for extracted function");
7728 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
7730 Value *SharedsSize =
7731 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7734 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
7736 Builder.CreateMemCpy(
7737 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7739 KernelLaunchArgs.
push_back(NewArgStructAlloca);
7741 Builder.CreateCall(KernelLaunchFunction, KernelLaunchArgs);
7742 Builder.CreateRetVoid();
7748 return GEP->getSourceElementType();
7750 return Alloca->getAllocatedType();
7773 if (OffloadingArraysToPrivatize.
empty())
7774 return OMPIRBuilder.Task;
7777 for (
Value *V : OffloadingArraysToPrivatize) {
7778 assert(V->getType()->isPointerTy() &&
7779 "Expected pointer to array to privatize. Got a non-pointer value "
7782 assert(ArrayTy &&
"ArrayType cannot be nullptr");
7788 "struct.task_with_privates");
7791 OpenMPIRBuilder &OMPBuilder,
IRBuilderBase &Builder,
bool IsOffloadEntry,
7792 TargetRegionEntryInfo &EntryInfo,
7793 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7796 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7797 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7799 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
7802 EntryFnName, Inputs, CBFunc,
7806 return OMPBuilder.emitTargetRegionFunction(
7807 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
7811OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7812 TargetTaskBodyCallbackTy TaskBodyCB,
Value *DeviceID,
Value *RTLoc,
7813 OpenMPIRBuilder::InsertPointTy AllocaIP,
7815 const TargetDataRTArgs &RTArgs,
bool HasNoWait) {
7939 splitBB(Builder,
true,
"target.task.body");
7941 splitBB(Builder,
true,
"target.task.alloca");
7943 InsertPointTy TargetTaskAllocaIP(TargetTaskAllocaBB,
7944 TargetTaskAllocaBB->
begin());
7945 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->
begin());
7948 OI.EntryBB = TargetTaskAllocaBB;
7949 OI.OuterAllocaBB = AllocaIP.getBlock();
7954 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
7957 Builder.restoreIP(TargetTaskBodyIP);
7958 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
7972 emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
7976 bool NeedsTargetTask = HasNoWait && DeviceID;
7977 if (NeedsTargetTask) {
7979 {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
7980 RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
7981 RTArgs.SizesArray}) {
7983 OffloadingArraysToPrivatize.
push_back(V);
7984 OI.ExcludeArgsFromAggregate.push_back(V);
7988 OI.PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
7989 DeviceID, OffloadingArraysToPrivatize](
7992 "there must be a single user for the outlined function");
8006 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8007 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8009 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8010 "Wrong number of arguments for StaleCI when shareds are present");
8011 int SharedArgOperandNo =
8012 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8018 if (!OffloadingArraysToPrivatize.
empty())
8023 *
this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8024 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8026 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8029 Builder.SetInsertPoint(StaleCI);
8034 getOrCreateSrcLocStr(LocationDescription(Builder), SrcLocStrSize);
8035 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
8044 ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
8045 : getOrCreateRuntimeFunctionPtr(
8046 OMPRTL___kmpc_omp_target_task_alloc);
8050 Value *ThreadID = getOrCreateThreadID(Ident);
8057 Value *TaskSize = Builder.getInt64(
8058 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8063 Value *SharedsSize = Builder.getInt64(0);
8065 auto *ArgStructAlloca =
8067 assert(ArgStructAlloca &&
8068 "Unable to find the alloca instruction corresponding to arguments "
8069 "for extracted function");
8070 auto *ArgStructType =
8072 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8073 "arguments for extracted function");
8075 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8084 Value *Flags = Builder.getInt32(0);
8094 TaskSize, SharedsSize,
8097 if (NeedsTargetTask) {
8098 assert(DeviceID &&
"Expected non-empty device ID.");
8102 TaskData = Builder.CreateCall(TaskAllocFn, TaskAllocArgs);
8108 *
this, Builder, TaskData, TaskWithPrivatesTy);
8109 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8112 if (!OffloadingArraysToPrivatize.
empty()) {
8114 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8115 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8116 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8123 "ElementType should match ArrayType");
8126 Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i);
8127 Builder.CreateMemCpy(
8128 Dst, Alignment, PtrToPrivatize, Alignment,
8129 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType)));
8143 if (!NeedsTargetTask) {
8146 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
8150 Builder.getInt32(Dependencies.size()),
8152 ConstantInt::get(Builder.getInt32Ty(), 0),
8158 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
8160 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
8161 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
8162 CallInst *CI = Builder.CreateCall(ProxyFn, {ThreadID, TaskData});
8164 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
8165 }
else if (DepArray) {
8170 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
8173 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
8174 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
8178 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
8179 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
8184 I->eraseFromParent();
8186 addOutlineInfo(std::move(OI));
8189 << *(Builder.GetInsertBlock()) <<
"\n");
8191 << *(Builder.GetInsertBlock()->getParent()->getParent())
8193 return Builder.saveIP();
8196Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
8197 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8198 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
8199 CustomMapperCallbackTy CustomMapperCB,
bool IsNonContiguous,
8202 emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo,
Info,
8203 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8205 emitOffloadingArraysArgument(Builder, RTArgs,
Info, ForEndCall);
8211 OpenMPIRBuilder::InsertPointTy AllocaIP,
8212 OpenMPIRBuilder::TargetDataInfo &
Info,
8213 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8214 const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
8217 OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
8218 OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB,
8224 auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
8225 -> OpenMPIRBuilder::InsertPointOrErrorTy {
8226 Builder.restoreIP(IP);
8227 Builder.CreateCall(OutlinedFn, Args);
8228 return Builder.saveIP();
8231 bool HasDependencies = Dependencies.
size() > 0;
8232 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8234 OpenMPIRBuilder::TargetKernelArgs KArgs;
8241 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8249 if (OutlinedFnID && DeviceID)
8250 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8251 EmitTargetCallFallbackCB, KArgs,
8252 DeviceID, RTLoc, TargetTaskAllocaIP);
8260 return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
8263 OMPBuilder.Builder.restoreIP(AfterIP);
8267 auto &&EmitTargetCallElse =
8268 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8269 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8272 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8273 if (RequiresOuterTargetTask) {
8277 OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
8278 return OMPBuilder.emitTargetTask(TaskBodyCB,
nullptr,
8280 Dependencies, EmptyRTArgs, HasNoWait);
8282 return EmitTargetCallFallbackCB(Builder.saveIP());
8285 Builder.restoreIP(AfterIP);
8289 auto &&EmitTargetCallThen =
8290 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8291 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8292 Info.HasNoWait = HasNoWait;
8293 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
8294 OpenMPIRBuilder::TargetDataRTArgs RTArgs;
8295 if (
Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
8296 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
8303 zip_equal(DefaultAttrs.MaxTeams, RuntimeAttrs.MaxTeams))
8304 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
8309 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
8311 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
8315 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
8318 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
8326 Value *MaxThreadsClause =
8327 RuntimeAttrs.TeamsThreadLimit.size() == 1
8328 ? InitMaxThreadsClause(RuntimeAttrs.MaxThreads)
8331 for (
auto [TeamsVal, TargetVal] :
zip_equal(
8332 RuntimeAttrs.TeamsThreadLimit, RuntimeAttrs.TargetThreadLimit)) {
8333 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
8334 Value *NumThreads = InitMaxThreadsClause(TargetVal);
8336 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
8337 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
8339 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
8342 unsigned NumTargetItems =
Info.NumberOfPtrs;
8346 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8347 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
8350 Value *TripCount = RuntimeAttrs.LoopTripCount
8351 ? Builder.CreateIntCast(RuntimeAttrs.LoopTripCount,
8352 Builder.getInt64Ty(),
8354 : Builder.getInt64(0);
8357 Value *DynCGGroupMem = Builder.getInt32(0);
8359 KArgs = OpenMPIRBuilder::TargetKernelArgs(NumTargetItems, RTArgs, TripCount,
8360 NumTeamsC, NumThreadsC,
8361 DynCGGroupMem, HasNoWait);
8365 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8368 if (RequiresOuterTargetTask)
8369 return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
8370 Dependencies, KArgs.RTArgs,
8373 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8374 EmitTargetCallFallbackCB, KArgs,
8375 DeviceID, RTLoc, AllocaIP);
8378 Builder.restoreIP(AfterIP);
8385 if (!OutlinedFnID) {
8386 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
8392 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
8396 cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
8397 EmitTargetCallElse, AllocaIP));
8400OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
8401 const LocationDescription &
Loc,
bool IsOffloadEntry, InsertPointTy AllocaIP,
8402 InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8403 TargetRegionEntryInfo &EntryInfo,
8404 const TargetKernelDefaultAttrs &DefaultAttrs,
8405 const TargetKernelRuntimeAttrs &RuntimeAttrs,
Value *IfCond,
8407 OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
8408 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
8409 CustomMapperCallbackTy CustomMapperCB,
8412 if (!updateToLocation(
Loc))
8413 return InsertPointTy();
8415 Builder.restoreIP(CodeGenIP);
8423 *
this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
8424 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
8430 if (!Config.isTargetDevice())
8432 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
8433 CustomMapperCB, Dependencies, HasNowait);
8434 return Builder.saveIP();
8447 return OS.
str().str();
8452 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
8453 Config.separator());
8457OpenMPIRBuilder::getOrCreateInternalVariable(
Type *Ty,
const StringRef &Name,
8459 auto &Elem = *InternalVars.try_emplace(Name,
nullptr).first;
8461 assert(Elem.second->getValueType() == Ty &&
8462 "OMP internal variable has different type than requested");
8478 GV->setAlignment(std::max(TypeAlign, PtrAlign));
8485Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
8486 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
8487 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
8488 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
8491Value *OpenMPIRBuilder::getSizeInBytes(
Value *BasePtr) {
8496 Builder.CreateGEP(
BasePtr->getType(),
Null, Builder.getInt32(1));
8498 return SizePtrToInt;
8503 std::string VarName) {
8507 M, MaptypesArrayInit->
getType(),
8511 return MaptypesArrayGlobal;
8514void OpenMPIRBuilder::createMapperAllocas(
const LocationDescription &
Loc,
8515 InsertPointTy AllocaIP,
8516 unsigned NumOperands,
8517 struct MapperAllocas &MapperAllocas) {
8518 if (!updateToLocation(
Loc))
8523 Builder.restoreIP(AllocaIP);
8525 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
8529 ArrI64Ty,
nullptr,
".offload_sizes");
8530 updateToLocation(
Loc);
8531 MapperAllocas.ArgsBase = ArgsBase;
8532 MapperAllocas.Args =
Args;
8533 MapperAllocas.ArgSizes = ArgSizes;
8536void OpenMPIRBuilder::emitMapperCall(
const LocationDescription &
Loc,
8539 struct MapperAllocas &MapperAllocas,
8540 int64_t DeviceID,
unsigned NumOperands) {
8541 if (!updateToLocation(
Loc))
8546 Value *ArgsBaseGEP =
8547 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
8548 {Builder.getInt32(0), Builder.getInt32(0)});
8550 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
8551 {Builder.getInt32(0), Builder.getInt32(0)});
8552 Value *ArgSizesGEP =
8553 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
8554 {Builder.getInt32(0), Builder.getInt32(0)});
8557 Builder.CreateCall(MapperFunc,
8558 {SrcLocInfo, Builder.getInt64(DeviceID),
8559 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
8560 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
8563void OpenMPIRBuilder::emitOffloadingArraysArgument(
IRBuilderBase &Builder,
8564 TargetDataRTArgs &RTArgs,
8565 TargetDataInfo &
Info,
8567 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
8568 "expected region end call to runtime only when end call is separate");
8570 auto VoidPtrTy = UnqualPtrTy;
8571 auto VoidPtrPtrTy = UnqualPtrTy;
8573 auto Int64PtrTy = UnqualPtrTy;
8575 if (!
Info.NumberOfPtrs) {
8585 RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
8587 Info.RTArgs.BasePointersArray,
8589 RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
8593 RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
8596 RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
8598 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
8599 :
Info.RTArgs.MapTypesArray,
8605 if (!
Info.EmitDebug)
8608 RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
8614 if (!
Info.HasMapper)
8617 RTArgs.MappersArray =
8618 Builder.CreatePointerCast(
Info.RTArgs.MappersArray, VoidPtrPtrTy);
8621void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
8622 InsertPointTy CodeGenIP,
8623 MapInfosTy &CombinedInfo,
8624 TargetDataInfo &
Info) {
8625 MapInfosTy::StructNonContiguousInfo &NonContigInfo =
8626 CombinedInfo.NonContigInfo;
8639 "struct.descriptor_dim");
8641 enum { OffsetFD = 0, CountFD, StrideFD };
8645 for (
unsigned I = 0, L = 0,
E = NonContigInfo.Dims.size();
I <
E; ++
I) {
8648 if (NonContigInfo.Dims[
I] == 1)
8650 Builder.restoreIP(AllocaIP);
8653 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
8654 Builder.restoreIP(CodeGenIP);
8655 for (
unsigned II = 0, EE = NonContigInfo.Dims[
I];
II < EE; ++
II) {
8656 unsigned RevIdx = EE -
II - 1;
8657 Value *DimsLVal = Builder.CreateInBoundsGEP(
8659 {Builder.getInt64(0), Builder.getInt64(II)});
8661 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
8662 Builder.CreateAlignedStore(
8663 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
8664 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
8666 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
8667 Builder.CreateAlignedStore(
8668 NonContigInfo.Counts[L][RevIdx], CountLVal,
8669 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8671 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
8672 Builder.CreateAlignedStore(
8673 NonContigInfo.Strides[L][RevIdx], StrideLVal,
8674 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8677 Builder.restoreIP(CodeGenIP);
8678 Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
8679 DimsAddr, Builder.getPtrTy());
8680 Value *
P = Builder.CreateConstInBoundsGEP2_32(
8682 Info.RTArgs.PointersArray, 0,
I);
8683 Builder.CreateAlignedStore(
8684 DAddr,
P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
8689void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
8697 M.getContext(), createPlatformSpecificName({
"omp.array",
Prefix}));
8699 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
8700 Value *DeleteBit = Builder.CreateAnd(
8703 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8704 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
8709 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
8711 Value *PtrAndObjBit = Builder.CreateAnd(
8714 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8715 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
8716 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
8717 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
8718 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
8719 DeleteCond = Builder.CreateIsNull(
8721 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8724 DeleteCond = Builder.CreateIsNotNull(
8726 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8728 Cond = Builder.CreateAnd(
Cond, DeleteCond);
8729 Builder.CreateCondBr(
Cond, BodyBB, ExitBB);
8731 emitBlock(BodyBB, MapperFn);
8734 Value *ArraySize = Builder.CreateNUWMul(
Size, Builder.getInt64(ElementSize));
8737 Value *MapTypeArg = Builder.CreateAnd(
8740 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8741 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8742 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8743 MapTypeArg = Builder.CreateOr(
8746 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8747 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
8751 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
8752 ArraySize, MapTypeArg, MapName};
8754 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8762 Type *ElemTy,
StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
8778 MapperFn->
addFnAttr(Attribute::NoInline);
8779 MapperFn->
addFnAttr(Attribute::NoUnwind);
8789 auto SavedIP = Builder.saveIP();
8790 Builder.SetInsertPoint(EntryBB);
8802 TypeSize ElementSize = M.getDataLayout().getTypeStoreSize(ElemTy);
8803 Size = Builder.CreateExactUDiv(
Size, Builder.getInt64(ElementSize));
8804 Value *PtrBegin = BeginIn;
8805 Value *PtrEnd = Builder.CreateGEP(ElemTy, PtrBegin,
Size);
8810 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8811 MapType, MapName, ElementSize, HeadBB,
8817 emitBlock(HeadBB, MapperFn);
8822 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
8823 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8826 emitBlock(BodyBB, MapperFn);
8829 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
8833 MapInfosOrErrorTy
Info = GenMapInfoCB(Builder.saveIP(), PtrPHI, BeginIn);
8835 return Info.takeError();
8839 Value *OffloadingArgs[] = {MapperHandle};
8840 Value *PreviousSize = Builder.CreateCall(
8841 getOrCreateRuntimeFunction(M, OMPRTL___tgt_mapper_num_components),
8843 Value *ShiftedPreviousSize =
8844 Builder.CreateShl(PreviousSize, Builder.getInt64(getFlagMemberOffset()));
8847 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
8856 Value *OriMapType = Builder.getInt64(
8857 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8859 Value *MemberMapType =
8860 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8874 Value *LeftToFrom = Builder.CreateAnd(
8877 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8878 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8879 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8888 Value *IsAlloc = Builder.CreateIsNull(LeftToFrom);
8889 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
8891 emitBlock(AllocBB, MapperFn);
8892 Value *AllocMapType = Builder.CreateAnd(
8895 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8896 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8897 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8898 Builder.CreateBr(EndBB);
8899 emitBlock(AllocElseBB, MapperFn);
8900 Value *IsTo = Builder.CreateICmpEQ(
8903 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8904 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8905 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
8907 emitBlock(ToBB, MapperFn);
8908 Value *ToMapType = Builder.CreateAnd(
8911 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8912 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8913 Builder.CreateBr(EndBB);
8914 emitBlock(ToElseBB, MapperFn);
8915 Value *IsFrom = Builder.CreateICmpEQ(
8918 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8919 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8920 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
8922 emitBlock(FromBB, MapperFn);
8923 Value *FromMapType = Builder.CreateAnd(
8926 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8927 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8929 emitBlock(EndBB, MapperFn);
8932 Builder.CreatePHI(Builder.getInt64Ty(), 4,
"omp.maptype");
8938 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
8939 CurSizeArg, CurMapType, CurNameArg};
8941 auto ChildMapperFn = CustomMapperCB(
I);
8943 return ChildMapperFn.takeError();
8944 if (*ChildMapperFn) {
8946 Builder.CreateCall(*ChildMapperFn, OffloadingArgs)->setDoesNotThrow();
8951 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8958 Value *PtrNext = Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
8959 "omp.arraymap.next");
8961 Value *IsDone = Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
8963 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
8965 emitBlock(ExitBB, MapperFn);
8968 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8969 MapType, MapName, ElementSize, DoneBB,
8973 emitBlock(DoneBB, MapperFn,
true);
8975 Builder.CreateRetVoid();
8976 Builder.restoreIP(SavedIP);
8980Error OpenMPIRBuilder::emitOffloadingArrays(
8981 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
8982 TargetDataInfo &
Info, CustomMapperCallbackTy CustomMapperCB,
8983 bool IsNonContiguous,
8987 Info.clearArrayInfo();
8988 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8990 if (
Info.NumberOfPtrs == 0)
8993 Builder.restoreIP(AllocaIP);
8999 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
9000 PointerArrayType,
nullptr,
".offload_baseptrs");
9002 Info.RTArgs.PointersArray = Builder.CreateAlloca(
9003 PointerArrayType,
nullptr,
".offload_ptrs");
9004 AllocaInst *MappersArray = Builder.CreateAlloca(
9005 PointerArrayType,
nullptr,
".offload_mappers");
9006 Info.RTArgs.MappersArray = MappersArray;
9013 ConstantInt::get(Int64Ty, 0));
9015 for (
unsigned I = 0,
E = CombinedInfo.Sizes.size();
I <
E; ++
I) {
9018 if (IsNonContiguous &&
9019 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9020 CombinedInfo.Types[
I] &
9021 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9023 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[
I]);
9029 RuntimeSizes.set(
I);
9032 if (RuntimeSizes.all()) {
9034 Info.RTArgs.SizesArray = Builder.CreateAlloca(
9035 SizeArrayType,
nullptr,
".offload_sizes");
9040 std::string
Name = createPlatformSpecificName({
"offload_sizes"});
9041 auto *SizesArrayGbl =
9046 if (!RuntimeSizes.any()) {
9047 Info.RTArgs.SizesArray = SizesArrayGbl;
9049 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9050 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
9053 SizeArrayType,
nullptr,
".offload_sizes");
9056 Builder.CreateMemCpy(
9057 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9058 SizesArrayGbl, OffloadSizeAlign,
9063 Info.RTArgs.SizesArray = Buffer;
9071 for (
auto mapFlag : CombinedInfo.Types)
9073 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9075 std::string MaptypesName = createPlatformSpecificName({
"offload_maptypes"});
9076 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9077 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9080 if (!CombinedInfo.Names.empty()) {
9081 auto *MapNamesArrayGbl = createOffloadMapnames(
9082 CombinedInfo.Names, createPlatformSpecificName({
"offload_mapnames"}));
9083 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9084 Info.EmitDebug =
true;
9086 Info.RTArgs.MapNamesArray =
9088 Info.EmitDebug =
false;
9093 if (
Info.separateBeginEndCalls()) {
9094 bool EndMapTypesDiffer =
false;
9096 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9097 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9098 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9099 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9100 EndMapTypesDiffer =
true;
9103 if (EndMapTypesDiffer) {
9104 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9105 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9110 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9111 Value *BPVal = CombinedInfo.BasePointers[
I];
9112 Value *BP = Builder.CreateConstInBoundsGEP2_32(
9115 Builder.CreateAlignedStore(BPVal, BP,
9116 M.getDataLayout().getPrefTypeAlign(PtrTy));
9118 if (
Info.requiresDevicePointerInfo()) {
9119 if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Pointer) {
9120 CodeGenIP = Builder.saveIP();
9121 Builder.restoreIP(AllocaIP);
9122 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
9123 Builder.restoreIP(CodeGenIP);
9125 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9126 }
else if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Address) {
9127 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9129 DeviceAddrCB(
I, BP);
9133 Value *PVal = CombinedInfo.Pointers[
I];
9134 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9138 Builder.CreateAlignedStore(PVal,
P,
9139 M.getDataLayout().getPrefTypeAlign(PtrTy));
9141 if (RuntimeSizes.test(
I)) {
9142 Value *S = Builder.CreateConstInBoundsGEP2_32(
9146 Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[
I],
9149 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
9152 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9155 auto CustomMFunc = CustomMapperCB(
I);
9157 return CustomMFunc.takeError();
9159 MFunc = Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9161 Value *MAddr = Builder.CreateInBoundsGEP(
9163 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9164 Builder.CreateAlignedStore(
9165 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9168 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9169 Info.NumberOfPtrs == 0)
9171 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo,
Info);
9176 BasicBlock *CurBB = Builder.GetInsertBlock();
9183 Builder.CreateBr(
Target);
9186 Builder.ClearInsertionPoint();
9191 BasicBlock *CurBB = Builder.GetInsertBlock();
9207 Builder.SetInsertPoint(BB);
9210Error OpenMPIRBuilder::emitIfClause(
Value *
Cond, BodyGenCallbackTy ThenGen,
9211 BodyGenCallbackTy ElseGen,
9212 InsertPointTy AllocaIP) {
9216 auto CondConstant = CI->getSExtValue();
9218 return ThenGen(AllocaIP, Builder.saveIP());
9220 return ElseGen(AllocaIP, Builder.saveIP());
9230 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
9232 emitBlock(ThenBlock, CurFn);
9233 if (
Error Err = ThenGen(AllocaIP, Builder.saveIP()))
9235 emitBranch(ContBlock);
9238 emitBlock(ElseBlock, CurFn);
9239 if (
Error Err = ElseGen(AllocaIP, Builder.saveIP()))
9242 emitBranch(ContBlock);
9244 emitBlock(ContBlock, CurFn,
true);
9248bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9252 "Unexpected Atomic Ordering.");
9309OpenMPIRBuilder::InsertPointTy
9310OpenMPIRBuilder::createAtomicRead(
const LocationDescription &
Loc,
9311 AtomicOpValue &
X, AtomicOpValue &V,
9313 if (!updateToLocation(
Loc))
9316 assert(
X.Var->getType()->isPointerTy() &&
9317 "OMP Atomic expects a pointer to target memory");
9318 Type *XElemTy =
X.ElemTy;
9321 "OMP atomic read expected a scalar type");
9323 Value *XRead =
nullptr;
9327 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
9333 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9338 OpenMPIRBuilder::AtomicInfo atomicInfo(
9339 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9340 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9341 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9342 XRead = AtomicLoadRes.first;
9349 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
9352 XRead = Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
9354 XRead = Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
9357 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
9358 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
9359 return Builder.saveIP();
9362OpenMPIRBuilder::InsertPointTy
9363OpenMPIRBuilder::createAtomicWrite(
const LocationDescription &
Loc,
9364 AtomicOpValue &
X,
Value *Expr,
9366 if (!updateToLocation(
Loc))
9369 assert(
X.Var->getType()->isPointerTy() &&
9370 "OMP Atomic expects a pointer to target memory");
9371 Type *XElemTy =
X.ElemTy;
9374 "OMP atomic write expected a scalar type");
9377 StoreInst *XSt = Builder.CreateStore(Expr,
X.Var,
X.IsVolatile);
9380 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9384 OpenMPIRBuilder::AtomicInfo atomicInfo(
9385 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9386 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9387 atomicInfo.EmitAtomicStoreLibcall(AO, Expr);
9394 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
9395 StoreInst *XSt = Builder.CreateStore(ExprCast,
X.Var,
X.IsVolatile);
9399 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
9400 return Builder.saveIP();
9403OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
9404 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9406 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
9407 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9409 if (!updateToLocation(
Loc))
9413 Type *XTy =
X.Var->getType();
9415 "OMP Atomic expects a pointer to target memory");
9416 Type *XElemTy =
X.ElemTy;
9419 "OMP atomic update expected a scalar type");
9422 "OpenMP atomic does not support LT or GT operations");
9426 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
9427 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9429 return AtomicResult.takeError();
9430 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
9431 return Builder.saveIP();
9435Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
9439 return Builder.CreateAdd(Src1, Src2);
9441 return Builder.CreateSub(Src1, Src2);
9443 return Builder.CreateAnd(Src1, Src2);
9445 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
9447 return Builder.CreateOr(Src1, Src2);
9449 return Builder.CreateXor(Src1, Src2);
9474 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
9475 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9478 bool emitRMWOp =
false;
9486 emitRMWOp = XElemTy;
9489 emitRMWOp = (IsXBinopExpr && XElemTy);
9496 std::pair<Value *, Value *> Res;
9501 if (IsIgnoreDenormalMode)
9502 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
9504 if (!IsFineGrainedMemory)
9505 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
9507 if (!IsRemoteMemory)
9511 Res.first = RMWInst;
9516 Res.second = Res.first;
9518 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
9522 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
9528 OpenMPIRBuilder::AtomicInfo atomicInfo(
9529 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9530 OldVal->
getAlign(),
true , AllocaIP,
X);
9531 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9532 BasicBlock *CurBB = Builder.GetInsertBlock();
9534 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9538 X->getName() +
".atomic.cont");
9540 Builder.restoreIP(AllocaIP);
9541 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9542 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9543 Builder.SetInsertPoint(ContBB);
9545 PHI->addIncoming(AtomicLoadRes.first, CurBB);
9550 Value *Upd = *CBResult;
9551 Builder.CreateStore(Upd, NewAtomicAddr);
9554 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
9555 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
9557 PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
9558 Builder.CreateCondBr(
Result.second, ExitBB, ContBB);
9560 Res.first = OldExprVal;
9566 Builder.SetInsertPoint(ExitBB);
9568 Builder.SetInsertPoint(ExitTI);
9574 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
9581 BasicBlock *CurBB = Builder.GetInsertBlock();
9583 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9587 X->getName() +
".atomic.cont");
9589 Builder.restoreIP(AllocaIP);
9590 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9591 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9592 Builder.SetInsertPoint(ContBB);
9594 PHI->addIncoming(OldVal, CurBB);
9599 OldExprVal = Builder.CreateBitCast(
PHI, XElemTy,
9600 X->getName() +
".atomic.fltCast");
9602 OldExprVal = Builder.CreateIntToPtr(
PHI, XElemTy,
9603 X->getName() +
".atomic.ptrCast");
9610 Value *Upd = *CBResult;
9611 Builder.CreateStore(Upd, NewAtomicAddr);
9612 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
9617 Result->setVolatile(VolatileX);
9618 Value *PreviousVal = Builder.CreateExtractValue(Result, 0);
9619 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9620 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
9621 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
9623 Res.first = OldExprVal;
9630 Builder.SetInsertPoint(ExitBB);
9632 Builder.SetInsertPoint(ExitTI);
9639OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
9640 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9643 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
9644 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9645 if (!updateToLocation(
Loc))
9649 Type *XTy =
X.Var->getType();
9651 "OMP Atomic expects a pointer to target memory");
9652 Type *XElemTy =
X.ElemTy;
9655 "OMP atomic capture expected a scalar type");
9657 "OpenMP atomic does not support LT or GT operations");
9664 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
9665 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9668 Value *CapturedVal =
9669 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
9670 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
9672 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
9673 return Builder.saveIP();
9676OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9677 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9683 return createAtomicCompare(
Loc,
X, V, R,
E,
D, AO,
Op, IsXBinopExpr,
9684 IsPostfixUpdate, IsFailOnly, Failure);
9687OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9688 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9693 if (!updateToLocation(
Loc))
9696 assert(
X.Var->getType()->isPointerTy() &&
9697 "OMP atomic expects a pointer to target memory");
9700 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
9701 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
9704 bool IsInteger =
E->getType()->isIntegerTy();
9706 if (
Op == OMPAtomicCompareOp::EQ) {
9711 Value *EBCast = Builder.CreateBitCast(
E, IntCastTy);
9712 Value *DBCast = Builder.CreateBitCast(
D, IntCastTy);
9717 Builder.CreateAtomicCmpXchg(
X.Var,
E,
D,
MaybeAlign(), AO, Failure);
9721 Value *OldValue = Builder.CreateExtractValue(Result, 0);
9723 OldValue = Builder.CreateBitCast(OldValue,
X.ElemTy);
9725 "OldValue and V must be of same type");
9726 if (IsPostfixUpdate) {
9727 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
9729 Value *SuccessOrFail = Builder.CreateExtractValue(Result, 1);
9740 BasicBlock *CurBB = Builder.GetInsertBlock();
9742 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9744 CurBBTI,
X.Var->getName() +
".atomic.exit");
9750 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
9752 Builder.SetInsertPoint(ContBB);
9753 Builder.CreateStore(OldValue, V.Var);
9754 Builder.CreateBr(ExitBB);
9759 Builder.SetInsertPoint(ExitBB);
9761 Builder.SetInsertPoint(ExitTI);
9764 Value *CapturedValue =
9765 Builder.CreateSelect(SuccessOrFail,
E, OldValue);
9766 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9772 assert(
R.Var->getType()->isPointerTy() &&
9773 "r.var must be of pointer type");
9774 assert(
R.ElemTy->isIntegerTy() &&
"r must be of integral type");
9776 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9777 Value *ResultCast =
R.IsSigned
9778 ? Builder.CreateSExt(SuccessFailureVal,
R.ElemTy)
9779 : Builder.CreateZExt(SuccessFailureVal,
R.ElemTy);
9780 Builder.CreateStore(ResultCast,
R.Var,
R.IsVolatile);
9783 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
9784 "Op should be either max or min at this point");
9785 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
9823 Builder.CreateAtomicRMW(NewOp,
X.Var,
E,
MaybeAlign(), AO);
9825 Value *CapturedValue =
nullptr;
9826 if (IsPostfixUpdate) {
9827 CapturedValue = OldValue;
9852 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue,
E);
9853 CapturedValue = Builder.CreateSelect(NonAtomicCmp,
E, OldValue);
9855 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9859 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
9861 return Builder.saveIP();
9864OpenMPIRBuilder::InsertPointOrErrorTy
9865OpenMPIRBuilder::createTeams(
const LocationDescription &
Loc,
9866 BodyGenCallbackTy BodyGenCB,
Value *NumTeamsLower,
9869 if (!updateToLocation(
Loc))
9870 return InsertPointTy();
9873 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
9874 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
9879 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
9880 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.entry");
9881 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
9901 BasicBlock *ExitBB = splitBB(Builder,
true,
"teams.exit");
9902 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.body");
9904 splitBB(Builder,
true,
"teams.alloca");
9906 bool SubClausesPresent =
9907 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
9909 if (!Config.isTargetDevice() && SubClausesPresent) {
9910 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
9911 "if lowerbound is non-null, then upperbound must also be non-null "
9912 "for bounds on num_teams");
9914 if (NumTeamsUpper ==
nullptr)
9915 NumTeamsUpper = Builder.getInt32(0);
9917 if (NumTeamsLower ==
nullptr)
9918 NumTeamsLower = NumTeamsUpper;
9922 "argument to if clause must be an integer value");
9926 IfExpr = Builder.CreateICmpNE(IfExpr,
9927 ConstantInt::get(IfExpr->
getType(), 0));
9928 NumTeamsUpper = Builder.CreateSelect(
9929 IfExpr, NumTeamsUpper, Builder.getInt32(1),
"numTeamsUpper");
9932 NumTeamsLower = Builder.CreateSelect(
9933 IfExpr, NumTeamsLower, Builder.getInt32(1),
"numTeamsLower");
9936 if (ThreadLimit ==
nullptr)
9937 ThreadLimit = Builder.getInt32(0);
9939 Value *ThreadNum = getOrCreateThreadID(Ident);
9941 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
9942 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
9945 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
9946 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
9947 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
9951 OI.EntryBB = AllocaBB;
9953 OI.OuterAllocaBB = &OuterAllocaBB;
9957 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.
begin());
9959 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
9961 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
9963 auto HostPostOutlineCB = [
this, Ident,
9964 ToBeDeleted](
Function &OutlinedFn)
mutable {
9969 "there must be a single user for the outlined function");
9974 "Outlined function must have two or three arguments only");
9976 bool HasShared = OutlinedFn.
arg_size() == 3;
9984 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
9985 "outlined function.");
9986 Builder.SetInsertPoint(StaleCI);
9988 Ident, Builder.getInt32(StaleCI->
arg_size() - 2), &OutlinedFn};
9991 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
9992 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
9996 I->eraseFromParent();
9999 if (!Config.isTargetDevice())
10000 OI.PostOutlineCB = HostPostOutlineCB;
10002 addOutlineInfo(std::move(OI));
10004 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10006 return Builder.saveIP();
10009OpenMPIRBuilder::InsertPointOrErrorTy
10010OpenMPIRBuilder::createDistribute(
const LocationDescription &
Loc,
10011 InsertPointTy OuterAllocaIP,
10012 BodyGenCallbackTy BodyGenCB) {
10013 if (!updateToLocation(
Loc))
10014 return InsertPointTy();
10016 BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
10018 if (OuterAllocaBB == Builder.GetInsertBlock()) {
10020 splitBB(Builder,
true,
"distribute.entry");
10021 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10024 splitBB(Builder,
true,
"distribute.exit");
10026 splitBB(Builder,
true,
"distribute.body");
10028 splitBB(Builder,
true,
"distribute.alloca");
10031 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10032 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10033 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10037 OI.OuterAllocaBB = OuterAllocaIP.getBlock();
10038 OI.EntryBB = AllocaBB;
10039 OI.ExitBB = ExitBB;
10041 addOutlineInfo(std::move(OI));
10042 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10044 return Builder.saveIP();
10049 std::string VarName) {
10055 M, MapNamesArrayInit->
getType(),
10058 return MapNamesArrayGlobal;
10063void OpenMPIRBuilder::initializeTypes(
Module &M) {
10066 unsigned DefaultTargetAS = Config.getDefaultTargetAS();
10067#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10068#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10069 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10070 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10071#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10072 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10073 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10074#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10075 T = StructType::getTypeByName(Ctx, StructName); \
10077 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10079 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10080#include "llvm/Frontend/OpenMP/OMPKinds.def"
10083void OpenMPIRBuilder::OutlineInfo::collectBlocks(
10091 while (!Worklist.
empty()) {
10095 if (
BlockSet.insert(SuccBB).second)
10104 if (!Config.isGPU()) {
10119 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10120 Fn->
addFnAttr(Attribute::MustProgress);
10124void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
10125 EmitMetadataErrorReportFunctionTy &ErrorFn) {
10128 if (OffloadInfoManager.empty())
10132 SmallVector<std::pair<
const OffloadEntriesInfoManager::OffloadEntryInfo *,
10133 TargetRegionEntryInfo>,
10135 OrderedEntries(OffloadInfoManager.size());
10138 auto &&GetMDInt = [
this](
unsigned V) {
10145 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"omp_offload.info");
10146 auto &&TargetRegionMetadataEmitter =
10147 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10148 const TargetRegionEntryInfo &EntryInfo,
10149 const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &
E) {
10162 GetMDInt(
E.getKind()), GetMDInt(EntryInfo.DeviceID),
10163 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10164 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10165 GetMDInt(
E.getOrder())};
10168 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, EntryInfo);
10174 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
10177 auto &&DeviceGlobalVarMetadataEmitter =
10178 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10180 const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &
E) {
10188 Metadata *
Ops[] = {GetMDInt(
E.getKind()), GetMDString(MangledName),
10189 GetMDInt(
E.getFlags()), GetMDInt(
E.getOrder())};
10192 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
10193 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, varInfo);
10199 OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
10200 DeviceGlobalVarMetadataEmitter);
10202 for (
const auto &
E : OrderedEntries) {
10203 assert(
E.first &&
"All ordered entries must exist!");
10204 if (
const auto *CE =
10207 if (!
CE->getID() || !
CE->getAddress()) {
10209 TargetRegionEntryInfo EntryInfo =
E.second;
10210 StringRef FnName = EntryInfo.ParentName;
10211 if (!M.getNamedValue(FnName))
10213 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
10216 createOffloadEntry(
CE->getID(),
CE->getAddress(),
10219 }
else if (
const auto *CE =
dyn_cast<
10220 OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
10222 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
10223 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10226 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
10227 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
10228 if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
10230 if (!
CE->getAddress()) {
10231 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR,
E.second);
10235 if (
CE->getVarSize() == 0)
10238 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
10239 assert(((Config.isTargetDevice() && !
CE->getAddress()) ||
10240 (!Config.isTargetDevice() &&
CE->getAddress())) &&
10241 "Declaret target link address is set.");
10242 if (Config.isTargetDevice())
10244 if (!
CE->getAddress()) {
10245 ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
10257 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
10258 Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10263 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10264 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10265 Flags,
CE->getLinkage(),
CE->getVarName());
10267 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10268 Flags,
CE->getLinkage());
10279 if (Config.hasRequiresFlags() && !Config.isTargetDevice())
10284 OffloadEntriesInfoManager::OMPTargetGlobalRegisterRequires,
10285 Config.getRequiresFlags());
10288void TargetRegionEntryInfo::getTargetRegionEntryFnName(
10290 unsigned FileID,
unsigned Line,
unsigned Count) {
10292 OS << KernelNamePrefix <<
llvm::format(
"%x", DeviceID)
10293 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
10295 OS <<
"_" <<
Count;
10298void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
10300 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
10301 TargetRegionEntryInfo::getTargetRegionEntryFnName(
10302 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
10303 EntryInfo.Line, NewCount);
10306TargetRegionEntryInfo
10307OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
10310 auto FileIDInfo = CallBack();
10316 FileID =
hash_value(std::get<0>(FileIDInfo));
10318 FileID =
ID.getFile();
10320 return TargetRegionEntryInfo(ParentName,
ID.getDevice(), FileID,
10321 std::get<1>(FileIDInfo));
10324unsigned OpenMPIRBuilder::getFlagMemberOffset() {
10327 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10329 !(Remain & 1); Remain = Remain >> 1)
10335OpenMPIRBuilder::getMemberOfFlag(
unsigned Position) {
10338 << getFlagMemberOffset());
10341void OpenMPIRBuilder::setCorrectMemberOfFlag(
10347 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10349 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10356 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
10357 Flags |= MemberOfFlag;
10360Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
10361 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10362 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10363 bool IsDeclaration,
bool IsExternallyVisible,
10364 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10365 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10366 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
10367 std::function<
Constant *()> GlobalInitializer,
10374 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
10375 ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10377 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10378 Config.hasRequiresUnifiedSharedMemory())) {
10383 if (!IsExternallyVisible)
10384 OS <<
format(
"_%x", EntryInfo.FileID);
10385 OS <<
"_decl_tgt_ref_ptr";
10388 Value *
Ptr = M.getNamedValue(PtrName);
10392 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
10397 if (!Config.isTargetDevice()) {
10398 if (GlobalInitializer)
10399 GV->setInitializer(GlobalInitializer());
10404 registerTargetGlobalVariable(
10405 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10406 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10416void OpenMPIRBuilder::registerTargetGlobalVariable(
10417 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10418 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10419 bool IsDeclaration,
bool IsExternallyVisible,
10420 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10421 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10422 std::vector<Triple> TargetTriple,
10423 std::function<
Constant *()> GlobalInitializer,
10426 if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
10427 (TargetTriple.empty() && !Config.isTargetDevice()))
10430 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10435 if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10437 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10438 !Config.hasRequiresUnifiedSharedMemory()) {
10439 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10443 if (!IsDeclaration)
10445 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
10452 if (Config.isTargetDevice() &&
10456 if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10459 std::string RefName = createPlatformSpecificName({
VarName,
"ref"});
10461 if (!M.getNamedValue(RefName)) {
10463 getOrCreateInternalVariable(Addr->
getType(), RefName);
10465 GvAddrRef->setConstant(
true);
10467 GvAddrRef->setInitializer(Addr);
10468 GeneratedRefs.push_back(GvAddrRef);
10472 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
10473 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10475 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10477 if (Config.isTargetDevice()) {
10481 Addr = getAddrOfDeclareTargetVar(
10482 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10483 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10484 LlvmPtrTy, GlobalInitializer, VariableLinkage);
10487 VarSize = M.getDataLayout().getPointerSize();
10491 OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
10497void OpenMPIRBuilder::loadOffloadInfoMetadata(
Module &M) {
10501 NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
10506 auto &&GetMDInt = [MN](
unsigned Idx) {
10511 auto &&GetMDString = [MN](
unsigned Idx) {
10513 return V->getString();
10516 switch (GetMDInt(0)) {
10520 case OffloadEntriesInfoManager::OffloadEntryInfo::
10521 OffloadingEntryInfoTargetRegion: {
10522 TargetRegionEntryInfo EntryInfo(GetMDString(3),
10527 OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
10531 case OffloadEntriesInfoManager::OffloadEntryInfo::
10532 OffloadingEntryInfoDeviceGlobalVar:
10533 OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
10535 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10545 if (HostFilePath.
empty())
10549 if (std::error_code Err = Buf.getError()) {
10551 "OpenMPIRBuilder: " +
10559 if (std::error_code Err = M.getError()) {
10561 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
10565 loadOffloadInfoMetadata(*M.get());
10572bool OffloadEntriesInfoManager::empty()
const {
10573 return OffloadEntriesTargetRegion.empty() &&
10574 OffloadEntriesDeviceGlobalVar.empty();
10577unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
10578 const TargetRegionEntryInfo &EntryInfo)
const {
10579 auto It = OffloadEntriesTargetRegionCount.find(
10580 getTargetRegionEntryCountKey(EntryInfo));
10581 if (It == OffloadEntriesTargetRegionCount.end())
10586void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
10587 const TargetRegionEntryInfo &EntryInfo) {
10588 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
10589 EntryInfo.Count + 1;
10593void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
10594 const TargetRegionEntryInfo &EntryInfo,
unsigned Order) {
10595 OffloadEntriesTargetRegion[EntryInfo] =
10596 OffloadEntryInfoTargetRegion(Order,
nullptr,
nullptr,
10597 OMPTargetRegionEntryTargetRegion);
10598 ++OffloadingEntriesNum;
10601void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
10603 OMPTargetRegionEntryKind Flags) {
10604 assert(EntryInfo.Count == 0 &&
"expected default EntryInfo");
10607 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10611 if (OMPBuilder->Config.isTargetDevice()) {
10613 if (!hasTargetRegionEntryInfo(EntryInfo)) {
10616 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
10617 Entry.setAddress(Addr);
10619 Entry.setFlags(Flags);
10621 if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
10622 hasTargetRegionEntryInfo(EntryInfo,
true))
10624 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
10625 "Target region entry already registered!");
10626 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr,
ID, Flags);
10627 OffloadEntriesTargetRegion[EntryInfo] = Entry;
10628 ++OffloadingEntriesNum;
10630 incrementTargetRegionEntryInfoCount(EntryInfo);
10633bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
10634 TargetRegionEntryInfo EntryInfo,
bool IgnoreAddressId)
const {
10637 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10639 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
10640 if (It == OffloadEntriesTargetRegion.end()) {
10644 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
10649void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
10650 const OffloadTargetRegionEntryInfoActTy &Action) {
10652 for (
const auto &It : OffloadEntriesTargetRegion) {
10653 Action(It.first, It.second);
10657void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
10658 StringRef Name, OMPTargetGlobalVarEntryKind Flags,
unsigned Order) {
10659 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
10660 ++OffloadingEntriesNum;
10663void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
10666 if (OMPBuilder->Config.isTargetDevice()) {
10668 if (!hasDeviceGlobalVarEntryInfo(VarName))
10670 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10671 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
10672 if (Entry.getVarSize() == 0) {
10673 Entry.setVarSize(VarSize);
10678 Entry.setVarSize(VarSize);
10680 Entry.setAddress(Addr);
10682 if (hasDeviceGlobalVarEntryInfo(VarName)) {
10683 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10684 assert(Entry.isValid() && Entry.getFlags() == Flags &&
10685 "Entry not initialized!");
10686 if (Entry.getVarSize() == 0) {
10687 Entry.setVarSize(VarSize);
10692 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10693 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
10694 Addr, VarSize, Flags,
Linkage,
10697 OffloadEntriesDeviceGlobalVar.try_emplace(
10698 VarName, OffloadingEntriesNum, Addr, VarSize, Flags,
Linkage,
"");
10699 ++OffloadingEntriesNum;
10703void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
10704 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
10706 for (
const auto &
E : OffloadEntriesDeviceGlobalVar)
10707 Action(
E.getKey(),
E.getValue());
10714void CanonicalLoopInfo::collectControlBlocks(
10721 BBs.
append({getPreheader(), Header,
Cond, Latch,
Exit, getAfter()});
10724BasicBlock *CanonicalLoopInfo::getPreheader()
const {
10733void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
10745void CanonicalLoopInfo::mapIndVar(
10755 for (
Use &U : OldIV->
uses()) {
10759 if (
User->getParent() == getCond())
10761 if (
User->getParent() == getLatch())
10767 Value *NewIV = Updater(OldIV);
10770 for (
Use *U : ReplacableUses)
10778void CanonicalLoopInfo::assertOK()
const {
10791 "Preheader must terminate with unconditional branch");
10793 "Preheader must jump to header");
10797 "Header must terminate with unconditional branch");
10798 assert(Header->getSingleSuccessor() ==
Cond &&
10799 "Header must jump to exiting block");
10802 assert(
Cond->getSinglePredecessor() == Header &&
10803 "Exiting block only reachable from header");
10806 "Exiting block must terminate with conditional branch");
10808 "Exiting block must have two successors");
10810 "Exiting block's first successor jump to the body");
10812 "Exiting block's second successor must exit the loop");
10816 "Body only reachable from exiting block");
10821 "Latch must terminate with unconditional branch");
10830 "Exit block must terminate with unconditional branch");
10831 assert(
Exit->getSingleSuccessor() == After &&
10832 "Exit block must jump to after block");
10836 "After block only reachable from exit block");
10840 assert(IndVar &&
"Canonical induction variable not found?");
10842 "Induction variable must be an integer");
10844 "Induction variable must be a PHI in the loop header");
10850 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
10857 Value *TripCount = getTripCount();
10858 assert(TripCount &&
"Loop trip count not found?");
10860 "Trip count and induction variable must have the same type");
10864 "Exit condition must be a signed less-than comparison");
10866 "Exit condition must compare the induction variable");
10868 "Exit condition must compare with the trip count");
10872void CanonicalLoopInfo::invalidate() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static Value * removeASCastIfPresent(Value *V)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType)
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn)
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI void emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
LLVM_ABI std::error_code getUniqueID(const Twine Path, UniqueID &Result)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...