LLVM 22.0.0git
OMPIRBuilder.h
Go to the documentation of this file.
1//===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the OpenMPIRBuilder class and helpers used as a convenient
10// way to create LLVM instructions for OpenMP directives.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16
20#include "llvm/IR/DebugLoc.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Module.h"
23#include "llvm/IR/ValueMap.h"
26#include "llvm/Support/Error.h"
28#include <forward_list>
29#include <map>
30#include <optional>
31
32namespace llvm {
33class CanonicalLoopInfo;
34class ScanInfo;
35struct TargetRegionEntryInfo;
36class OffloadEntriesInfoManager;
37class OpenMPIRBuilder;
38class Loop;
39class LoopAnalysis;
40class LoopInfo;
41
42/// Move the instruction after an InsertPoint to the beginning of another
43/// BasicBlock.
44///
45/// The instructions after \p IP are moved to the beginning of \p New which must
46/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
47/// \p New will be added such that there is no semantic change. Otherwise, the
48/// \p IP insert block remains degenerate and it is up to the caller to insert a
49/// terminator. \p DL is used as the debug location for the branch instruction
50/// if one is created.
51LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
52 bool CreateBranch, DebugLoc DL);
53
54/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
55/// insert location will stick to after the instruction before the insertion
56/// point (instead of moving with the instruction the InsertPoint stores
57/// internally).
58LLVM_ABI void spliceBB(IRBuilder<> &Builder, BasicBlock *New,
59 bool CreateBranch);
60
61/// Split a BasicBlock at an InsertPoint, even if the block is degenerate
62/// (missing the terminator).
63///
64/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
65/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
66/// is true, a branch to the new successor will new created such that
67/// semantically there is no change; otherwise the block of the insertion point
68/// remains degenerate and it is the caller's responsibility to insert a
69/// terminator. \p DL is used as the debug location for the branch instruction
70/// if one is created. Returns the new successor block.
71LLVM_ABI BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
72 DebugLoc DL, llvm::Twine Name = {});
73
74/// Split a BasicBlock at \p Builder's insertion point, even if the block is
75/// degenerate (missing the terminator). Its new insert location will stick to
76/// after the instruction before the insertion point (instead of moving with the
77/// instruction the InsertPoint stores internally).
78LLVM_ABI BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
79 llvm::Twine Name = {});
80
81/// Split a BasicBlock at \p Builder's insertion point, even if the block is
82/// degenerate (missing the terminator). Its new insert location will stick to
83/// after the instruction before the insertion point (instead of moving with the
84/// instruction the InsertPoint stores internally).
85LLVM_ABI BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch,
87
88/// Like splitBB, but reuses the current block's name for the new name.
89LLVM_ABI BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder,
90 bool CreateBranch,
91 llvm::Twine Suffix = ".split");
92
93/// Captures attributes that affect generating LLVM-IR using the
94/// OpenMPIRBuilder and related classes. Note that not all attributes are
95/// required for all classes or functions. In some use cases the configuration
96/// is not necessary at all, because because the only functions that are called
97/// are ones that are not dependent on the configuration.
99public:
100 /// Flag to define whether to generate code for the role of the OpenMP host
101 /// (if set to false) or device (if set to true) in an offloading context. It
102 /// is set when the -fopenmp-is-target-device compiler frontend option is
103 /// specified.
104 std::optional<bool> IsTargetDevice;
105
106 /// Flag for specifying if the compilation is done for an accelerator. It is
107 /// set according to the architecture of the target triple and currently only
108 /// true when targeting AMDGPU or NVPTX. Today, these targets can only perform
109 /// the role of an OpenMP target device, so `IsTargetDevice` must also be true
110 /// if `IsGPU` is true. This restriction might be lifted if an accelerator-
111 /// like target with the ability to work as the OpenMP host is added, or if
112 /// the capabilities of the currently supported GPU architectures are
113 /// expanded.
114 std::optional<bool> IsGPU;
115
116 /// Flag for specifying if LLVMUsed information should be emitted.
117 std::optional<bool> EmitLLVMUsedMetaInfo;
118
119 /// Flag for specifying if offloading is mandatory.
120 std::optional<bool> OpenMPOffloadMandatory;
121
122 /// First separator used between the initial two parts of a name.
123 std::optional<StringRef> FirstSeparator;
124 /// Separator used between all of the rest consecutive parts of s name
125 std::optional<StringRef> Separator;
126
127 // Grid Value for the GPU target
128 std::optional<omp::GV> GridValue;
129
130 /// When compilation is being done for the OpenMP host (i.e. `IsTargetDevice =
131 /// false`), this contains the list of offloading triples associated, if any.
133
137 bool HasRequiresReverseOffload,
138 bool HasRequiresUnifiedAddress,
139 bool HasRequiresUnifiedSharedMemory,
140 bool HasRequiresDynamicAllocators);
141
142 // Getters functions that assert if the required values are not present.
143 bool isTargetDevice() const {
144 assert(IsTargetDevice.has_value() && "IsTargetDevice is not set");
145 return *IsTargetDevice;
146 }
147
148 bool isGPU() const {
149 assert(IsGPU.has_value() && "IsGPU is not set");
150 return *IsGPU;
151 }
152
154 assert(OpenMPOffloadMandatory.has_value() &&
155 "OpenMPOffloadMandatory is not set");
157 }
158
160 assert(GridValue.has_value() && "GridValue is not set");
161 return *GridValue;
162 }
163
164 bool hasRequiresFlags() const { return RequiresFlags; }
169
170 /// Returns requires directive clauses as flags compatible with those expected
171 /// by libomptarget.
172 LLVM_ABI int64_t getRequiresFlags() const;
173
174 // Returns the FirstSeparator if set, otherwise use the default separator
175 // depending on isGPU
177 if (FirstSeparator.has_value())
178 return *FirstSeparator;
179 if (isGPU())
180 return "_";
181 return ".";
182 }
183
184 // Returns the Separator if set, otherwise use the default separator depending
185 // on isGPU
187 if (Separator.has_value())
188 return *Separator;
189 if (isGPU())
190 return "$";
191 return ".";
192 }
193
195 void setIsGPU(bool Value) { IsGPU = Value; }
201
206
207private:
208 /// Flags for specifying which requires directive clauses are present.
209 int64_t RequiresFlags;
210};
211
212/// Data structure to contain the information needed to uniquely identify
213/// a target entry.
215 /// The prefix used for kernel names.
216 static constexpr const char *KernelNamePrefix = "__omp_offloading_";
217
218 std::string ParentName;
219 unsigned DeviceID;
220 unsigned FileID;
221 unsigned Line;
222 unsigned Count;
223
226 unsigned FileID, unsigned Line, unsigned Count = 0)
228 Count(Count) {}
229
230 LLVM_ABI static void
232 unsigned DeviceID, unsigned FileID, unsigned Line,
233 unsigned Count);
234
236 return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
237 std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
238 RHS.Count);
239 }
240};
241
242/// Class that manages information about offload code regions and data
244 /// Number of entries registered so far.
245 OpenMPIRBuilder *OMPBuilder;
246 unsigned OffloadingEntriesNum = 0;
247
248public:
249 /// Base class of the entries info.
251 public:
252 /// Kind of a given entry.
253 enum OffloadingEntryInfoKinds : unsigned {
254 /// Entry is a target region.
256 /// Entry is a declare target variable.
258 /// Invalid entry info.
260 };
261
262 protected:
264 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
265 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
266 uint32_t Flags)
267 : Flags(Flags), Order(Order), Kind(Kind) {}
268 ~OffloadEntryInfo() = default;
269
270 public:
271 bool isValid() const { return Order != ~0u; }
272 unsigned getOrder() const { return Order; }
273 OffloadingEntryInfoKinds getKind() const { return Kind; }
274 uint32_t getFlags() const { return Flags; }
275 void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
276 Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
278 assert(!Addr.pointsToAliveValue() && "Address has been set before!");
279 Addr = V;
280 }
281 static bool classof(const OffloadEntryInfo *Info) { return true; }
282
283 private:
284 /// Address of the entity that has to be mapped for offloading.
285 WeakTrackingVH Addr;
286
287 /// Flags associated with the device global.
288 uint32_t Flags = 0u;
289
290 /// Order this entry was emitted.
291 unsigned Order = ~0u;
292
294 };
295
296 /// Return true if a there are no entries defined.
297 LLVM_ABI bool empty() const;
298 /// Return number of entries defined so far.
299 unsigned size() const { return OffloadingEntriesNum; }
300
301 OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {}
302
303 //
304 // Target region entries related.
305 //
306
307 /// Kind of the target registry entry.
309 /// Mark the entry as target region.
311 };
312
313 /// Target region entries info.
315 /// Address that can be used as the ID of the entry.
316 Constant *ID = nullptr;
317
318 public:
321 explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
322 Constant *ID,
325 ID(ID) {
327 }
328
329 Constant *getID() const { return ID; }
330 void setID(Constant *V) {
331 assert(!ID && "ID has been set before!");
332 ID = V;
333 }
334 static bool classof(const OffloadEntryInfo *Info) {
335 return Info->getKind() == OffloadingEntryInfoTargetRegion;
336 }
337 };
338
339 /// Initialize target region entry.
340 /// This is ONLY needed for DEVICE compilation.
341 LLVM_ABI void
343 unsigned Order);
344 /// Register target region entry.
348 /// Return true if a target region entry with the provided information
349 /// exists.
351 bool IgnoreAddressId = false) const;
352
353 // Return the Name based on \a EntryInfo using the next available Count.
354 LLVM_ABI void
356 const TargetRegionEntryInfo &EntryInfo);
357
358 /// brief Applies action \a Action on all registered entries.
359 typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
360 const OffloadEntryInfoTargetRegion &)>
362 LLVM_ABI void
364
365 //
366 // Device global variable entries related.
367 //
368
369 /// Kind of the global variable entry..
371 /// Mark the entry as a to declare target.
373 /// Mark the entry as a to declare target link.
375 /// Mark the entry as a declare target enter.
377 /// Mark the entry as having no declare target entry kind.
379 /// Mark the entry as a declare target indirect global.
381 /// Mark the entry as a register requires global.
383 };
384
385 /// Kind of device clause for declare target variables
386 /// and functions
387 /// NOTE: Currently not used as a part of a variable entry
388 /// used for Flang and Clang to interface with the variable
389 /// related registration functions
391 /// The target is marked for all devices
393 /// The target is marked for non-host devices
395 /// The target is marked for host devices
397 /// The target is marked as having no clause
399 };
400
401 /// Device global variable entries info.
403 /// Type of the global variable.
404 int64_t VarSize;
406 const std::string VarName;
407
408 public:
411 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
414 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
415 int64_t VarSize,
418 const std::string &VarName)
420 VarSize(VarSize), Linkage(Linkage), VarName(VarName) {
422 }
423
424 int64_t getVarSize() const { return VarSize; }
425 StringRef getVarName() const { return VarName; }
426 void setVarSize(int64_t Size) { VarSize = Size; }
427 GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
428 void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
429 static bool classof(const OffloadEntryInfo *Info) {
430 return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
431 }
432 };
433
434 /// Initialize device global variable entry.
435 /// This is ONLY used for DEVICE compilation.
437 StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order);
438
439 /// Register device global variable entry.
441 StringRef VarName, Constant *Addr, int64_t VarSize,
443 /// Checks if the variable with the given name has been registered already.
445 return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
446 }
447 /// Applies action \a Action on all registered entries.
448 typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
452
453private:
454 /// Return the count of entries at a particular source location.
455 unsigned
456 getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
457
458 /// Update the count of entries at a particular source location.
459 void
460 incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
461
463 getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
464 return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
465 EntryInfo.FileID, EntryInfo.Line, 0);
466 }
467
468 // Count of entries at a location.
469 std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
470
471 // Storage for target region entries kind.
472 typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
473 OffloadEntriesTargetRegionTy;
474 OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
475 /// Storage for device global variable entries kind. The storage is to be
476 /// indexed by mangled name.
478 OffloadEntriesDeviceGlobalVarTy;
479 OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
480};
481
482/// An interface to create LLVM-IR for OpenMP directives.
483///
484/// Each OpenMP directive has a corresponding public generator method.
486public:
487 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
488 /// not have an effect on \p M (see initialize)
490 : M(M), Builder(M.getContext()), OffloadInfoManager(this),
491 T(M.getTargetTriple()), IsFinalized(false) {}
493
495 llvm::Value *AtomicVar;
496
497 public:
504 AtomicVar(AtomicVar) {}
505
506 llvm::Value *getAtomicPointer() const override { return AtomicVar; }
509 const llvm::Twine &Name) const override {
510 llvm::AllocaInst *allocaInst = Builder->CreateAlloca(Ty);
511 allocaInst->setName(Name);
512 return allocaInst;
513 }
514 };
515 /// Initialize the internal state, this will put structures types and
516 /// potentially other helpers into the underlying module. Must be called
517 /// before any other method and only once! This internal state includes types
518 /// used in the OpenMPIRBuilder generated from OMPKinds.def.
519 LLVM_ABI void initialize();
520
522
523 /// Finalize the underlying module, e.g., by outlining regions.
524 /// \param Fn The function to be finalized. If not used,
525 /// all functions are finalized.
526 LLVM_ABI void finalize(Function *Fn = nullptr);
527
528 /// Check whether the finalize function has already run
529 /// \return true if the finalize function has already run
530 LLVM_ABI bool isFinalized();
531
532 /// Add attributes known for \p FnID to \p Fn.
534
535 /// Type used throughout for insertion points.
537
538 /// Type used to represent an insertion point or an error value.
540
541 /// Get the create a name using the platform specific separators.
542 /// \param Parts parts of the final name that needs separation
543 /// The created name has a first separator between the first and second part
544 /// and a second separator between all other parts.
545 /// E.g. with FirstSeparator "$" and Separator "." and
546 /// parts: "p1", "p2", "p3", "p4"
547 /// The resulting name is "p1$p2.p3.p4"
548 /// The separators are retrieved from the OpenMPIRBuilderConfig.
549 LLVM_ABI std::string
551
552 /// Callback type for variable finalization (think destructors).
553 ///
554 /// \param CodeGenIP is the insertion point at which the finalization code
555 /// should be placed.
556 ///
557 /// A finalize callback knows about all objects that need finalization, e.g.
558 /// destruction, when the scope of the currently generated construct is left
559 /// at the time, and location, the callback is invoked.
560 using FinalizeCallbackTy = std::function<Error(InsertPointTy CodeGenIP)>;
561
563 /// The finalization callback provided by the last in-flight invocation of
564 /// createXXXX for the directive of kind DK.
566
567 /// The directive kind of the innermost directive that has an associated
568 /// region which might require finalization when it is left.
569 omp::Directive DK;
570
571 /// Flag to indicate if the directive is cancellable.
573 };
574
575 /// Push a finalization callback on the finalization stack.
576 ///
577 /// NOTE: Temporary solution until Clang CG is gone.
579 FinalizationStack.push_back(FI);
580 }
581
582 /// Pop the last finalization callback from the finalization stack.
583 ///
584 /// NOTE: Temporary solution until Clang CG is gone.
586
587 /// Callback type for body (=inner region) code generation
588 ///
589 /// The callback takes code locations as arguments, each describing a
590 /// location where additional instructions can be inserted.
591 ///
592 /// The CodeGenIP may be in the middle of a basic block or point to the end of
593 /// it. The basic block may have a terminator or be degenerate. The callback
594 /// function may just insert instructions at that position, but also split the
595 /// block (without the Before argument of BasicBlock::splitBasicBlock such
596 /// that the identify of the split predecessor block is preserved) and insert
597 /// additional control flow, including branches that do not lead back to what
598 /// follows the CodeGenIP. Note that since the callback is allowed to split
599 /// the block, callers must assume that InsertPoints to positions in the
600 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
601 /// such InsertPoints need to be preserved, it can split the block itself
602 /// before calling the callback.
603 ///
604 /// AllocaIP and CodeGenIP must not point to the same position.
605 ///
606 /// \param AllocaIP is the insertion point at which new alloca instructions
607 /// should be placed. The BasicBlock it is pointing to must
608 /// not be split.
609 /// \param CodeGenIP is the insertion point at which the body code should be
610 /// placed.
611 ///
612 /// \return an error, if any were triggered during execution.
614 function_ref<Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
615
616 // This is created primarily for sections construct as llvm::function_ref
617 // (BodyGenCallbackTy) is not storable (as described in the comments of
618 // function_ref class - function_ref contains non-ownable reference
619 // to the callable.
620 ///
621 /// \return an error, if any were triggered during execution.
623 std::function<Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
624
625 /// Callback type for loop body code generation.
626 ///
627 /// \param CodeGenIP is the insertion point where the loop's body code must be
628 /// placed. This will be a dedicated BasicBlock with a
629 /// conditional branch from the loop condition check and
630 /// terminated with an unconditional branch to the loop
631 /// latch.
632 /// \param IndVar is the induction variable usable at the insertion point.
633 ///
634 /// \return an error, if any were triggered during execution.
636 function_ref<Error(InsertPointTy CodeGenIP, Value *IndVar)>;
637
638 /// Callback type for variable privatization (think copy & default
639 /// constructor).
640 ///
641 /// \param AllocaIP is the insertion point at which new alloca instructions
642 /// should be placed.
643 /// \param CodeGenIP is the insertion point at which the privatization code
644 /// should be placed.
645 /// \param Original The value being copied/created, should not be used in the
646 /// generated IR.
647 /// \param Inner The equivalent of \p Original that should be used in the
648 /// generated IR; this is equal to \p Original if the value is
649 /// a pointer and can thus be passed directly, otherwise it is
650 /// an equivalent but different value.
651 /// \param ReplVal The replacement value, thus a copy or new created version
652 /// of \p Inner.
653 ///
654 /// \returns The new insertion point where code generation continues and
655 /// \p ReplVal the replacement value.
657 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
658 Value &Inner, Value *&ReplVal)>;
659
660 /// Description of a LLVM-IR insertion point (IP) and a debug/source location
661 /// (filename, line, column, ...).
664 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
667 : IP(IP), DL(DL) {}
670 };
671
672 /// Emitter methods for OpenMP directives.
673 ///
674 ///{
675
676 /// Generator for '#omp barrier'
677 ///
678 /// \param Loc The location where the barrier directive was encountered.
679 /// \param Kind The kind of directive that caused the barrier.
680 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
681 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
682 /// should be checked and acted upon.
683 /// \param ThreadID Optional parameter to pass in any existing ThreadID value.
684 ///
685 /// \returns The insertion point after the barrier.
687 omp::Directive Kind,
688 bool ForceSimpleCall = false,
689 bool CheckCancelFlag = true);
690
691 /// Generator for '#omp cancel'
692 ///
693 /// \param Loc The location where the directive was encountered.
694 /// \param IfCondition The evaluated 'if' clause expression, if any.
695 /// \param CanceledDirective The kind of directive that is cancled.
696 ///
697 /// \returns The insertion point after the barrier.
699 Value *IfCondition,
700 omp::Directive CanceledDirective);
701
702 /// Generator for '#omp cancellation point'
703 ///
704 /// \param Loc The location where the directive was encountered.
705 /// \param CanceledDirective The kind of directive that is cancled.
706 ///
707 /// \returns The insertion point after the barrier.
709 const LocationDescription &Loc, omp::Directive CanceledDirective);
710
711 /// Creates a ScanInfo object, allocates and returns the pointer.
713
714 /// Generator for '#omp parallel'
715 ///
716 /// \param Loc The insert and source location description.
717 /// \param AllocaIP The insertion points to be used for alloca instructions.
718 /// \param BodyGenCB Callback that will generate the region code.
719 /// \param PrivCB Callback to copy a given variable (think copy constructor).
720 /// \param FiniCB Callback to finalize variable copies.
721 /// \param IfCondition The evaluated 'if' clause expression, if any.
722 /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
723 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
724 /// \param IsCancellable Flag to indicate a cancellable parallel region.
725 ///
726 /// \returns The insertion position *after* the parallel.
728 const LocationDescription &Loc, InsertPointTy AllocaIP,
729 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
730 FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
731 omp::ProcBindKind ProcBind, bool IsCancellable);
732
733 /// Generator for the control flow structure of an OpenMP canonical loop.
734 ///
735 /// This generator operates on the logical iteration space of the loop, i.e.
736 /// the caller only has to provide a loop trip count of the loop as defined by
737 /// base language semantics. The trip count is interpreted as an unsigned
738 /// integer. The induction variable passed to \p BodyGenCB will be of the same
739 /// type and run from 0 to \p TripCount - 1. It is up to the callback to
740 /// convert the logical iteration variable to the loop counter variable in the
741 /// loop body.
742 ///
743 /// \param Loc The insert and source location description. The insert
744 /// location can be between two instructions or the end of a
745 /// degenerate block (e.g. a BB under construction).
746 /// \param BodyGenCB Callback that will generate the loop body code.
747 /// \param TripCount Number of iterations the loop body is executed.
748 /// \param Name Base name used to derive BB and instruction names.
749 ///
750 /// \returns An object representing the created control flow structure which
751 /// can be used for loop-associated directives.
754 LoopBodyGenCallbackTy BodyGenCB, Value *TripCount,
755 const Twine &Name = "loop");
756
757 /// Generator for the control flow structure of an OpenMP canonical loops if
758 /// the parent directive has an `inscan` modifier specified.
759 /// If the `inscan` modifier is specified, the region of the parent is
760 /// expected to have a `scan` directive. Based on the clauses in
761 /// scan directive, the body of the loop is split into two loops: Input loop
762 /// and Scan Loop. Input loop contains the code generated for input phase of
763 /// scan and Scan loop contains the code generated for scan phase of scan.
764 /// From the bodyGen callback of these loops, `createScan` would be called
765 /// when a scan directive is encountered from the loop body. `createScan`
766 /// based on whether 1. inclusive or exclusive scan is specified and, 2. input
767 /// loop or scan loop is generated, lowers the body of the for loop
768 /// accordingly.
769 ///
770 /// \param Loc The insert and source location description.
771 /// \param BodyGenCB Callback that will generate the loop body code.
772 /// \param Start Value of the loop counter for the first iterations.
773 /// \param Stop Loop counter values past this will stop the loop.
774 /// \param Step Loop counter increment after each iteration; negative
775 /// means counting down.
776 /// \param IsSigned Whether Start, Stop and Step are signed integers.
777 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
778 /// counter.
779 /// \param ComputeIP Insertion point for instructions computing the trip
780 /// count. Can be used to ensure the trip count is available
781 /// at the outermost loop of a loop nest. If not set,
782 /// defaults to the preheader of the generated loop.
783 /// \param Name Base name used to derive BB and instruction names.
784 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
785 /// `ScanInfoInitialize`.
786 ///
787 /// \returns A vector containing Loop Info of Input Loop and Scan Loop.
790 LoopBodyGenCallbackTy BodyGenCB, Value *Start,
791 Value *Stop, Value *Step, bool IsSigned,
792 bool InclusiveStop, InsertPointTy ComputeIP,
793 const Twine &Name, ScanInfo *ScanRedInfo);
794
795 /// Calculate the trip count of a canonical loop.
796 ///
797 /// This allows specifying user-defined loop counter values using increment,
798 /// upper- and lower bounds. To disambiguate the terminology when counting
799 /// downwards, instead of lower bounds we use \p Start for the loop counter
800 /// value in the first body iteration.
801 ///
802 /// Consider the following limitations:
803 ///
804 /// * A loop counter space over all integer values of its bit-width cannot be
805 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
806 /// stored into an 8 bit integer):
807 ///
808 /// DO I = 0, 255, 1
809 ///
810 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
811 /// effectively counting downwards:
812 ///
813 /// for (uint8_t i = 100u; i > 0; i += 127u)
814 ///
815 ///
816 /// TODO: May need to add additional parameters to represent:
817 ///
818 /// * Allow representing downcounting with unsigned integers.
819 ///
820 /// * Sign of the step and the comparison operator might disagree:
821 ///
822 /// for (int i = 0; i < 42; i -= 1u)
823 ///
824 /// \param Loc The insert and source location description.
825 /// \param Start Value of the loop counter for the first iterations.
826 /// \param Stop Loop counter values past this will stop the loop.
827 /// \param Step Loop counter increment after each iteration; negative
828 /// means counting down.
829 /// \param IsSigned Whether Start, Stop and Step are signed integers.
830 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
831 /// counter.
832 /// \param Name Base name used to derive instruction names.
833 ///
834 /// \returns The value holding the calculated trip count.
836 const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step,
837 bool IsSigned, bool InclusiveStop, const Twine &Name = "loop");
838
839 /// Generator for the control flow structure of an OpenMP canonical loop.
840 ///
841 /// Instead of a logical iteration space, this allows specifying user-defined
842 /// loop counter values using increment, upper- and lower bounds. To
843 /// disambiguate the terminology when counting downwards, instead of lower
844 /// bounds we use \p Start for the loop counter value in the first body
845 ///
846 /// It calls \see calculateCanonicalLoopTripCount for trip count calculations,
847 /// so limitations of that method apply here as well.
848 ///
849 /// \param Loc The insert and source location description.
850 /// \param BodyGenCB Callback that will generate the loop body code.
851 /// \param Start Value of the loop counter for the first iterations.
852 /// \param Stop Loop counter values past this will stop the loop.
853 /// \param Step Loop counter increment after each iteration; negative
854 /// means counting down.
855 /// \param IsSigned Whether Start, Stop and Step are signed integers.
856 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
857 /// counter.
858 /// \param ComputeIP Insertion point for instructions computing the trip
859 /// count. Can be used to ensure the trip count is available
860 /// at the outermost loop of a loop nest. If not set,
861 /// defaults to the preheader of the generated loop.
862 /// \param Name Base name used to derive BB and instruction names.
863 /// \param InScan Whether loop has a scan reduction specified.
864 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
865 /// `ScanInfoInitialize`.
866 ///
867 /// \returns An object representing the created control flow structure which
868 /// can be used for loop-associated directives.
870 const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
871 Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
872 InsertPointTy ComputeIP = {}, const Twine &Name = "loop",
873 bool InScan = false, ScanInfo *ScanRedInfo = nullptr);
874
875 /// Collapse a loop nest into a single loop.
876 ///
877 /// Merges loops of a loop nest into a single CanonicalLoopNest representation
878 /// that has the same number of innermost loop iterations as the origin loop
879 /// nest. The induction variables of the input loops are derived from the
880 /// collapsed loop's induction variable. This is intended to be used to
881 /// implement OpenMP's collapse clause. Before applying a directive,
882 /// collapseLoops normalizes a loop nest to contain only a single loop and the
883 /// directive's implementation does not need to handle multiple loops itself.
884 /// This does not remove the need to handle all loop nest handling by
885 /// directives, such as the ordered(<n>) clause or the simd schedule-clause
886 /// modifier of the worksharing-loop directive.
887 ///
888 /// Example:
889 /// \code
890 /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
891 /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
892 /// body(i, j);
893 /// \endcode
894 ///
895 /// After collapsing with Loops={i,j}, the loop is changed to
896 /// \code
897 /// for (int ij = 0; ij < 63; ++ij) {
898 /// int i = ij / 9;
899 /// int j = ij % 9;
900 /// body(i, j);
901 /// }
902 /// \endcode
903 ///
904 /// In the current implementation, the following limitations apply:
905 ///
906 /// * All input loops have an induction variable of the same type.
907 ///
908 /// * The collapsed loop will have the same trip count integer type as the
909 /// input loops. Therefore it is possible that the collapsed loop cannot
910 /// represent all iterations of the input loops. For instance, assuming a
911 /// 32 bit integer type, and two input loops both iterating 2^16 times, the
912 /// theoretical trip count of the collapsed loop would be 2^32 iteration,
913 /// which cannot be represented in an 32-bit integer. Behavior is undefined
914 /// in this case.
915 ///
916 /// * The trip counts of every input loop must be available at \p ComputeIP.
917 /// Non-rectangular loops are not yet supported.
918 ///
919 /// * At each nest level, code between a surrounding loop and its nested loop
920 /// is hoisted into the loop body, and such code will be executed more
921 /// often than before collapsing (or not at all if any inner loop iteration
922 /// has a trip count of 0). This is permitted by the OpenMP specification.
923 ///
924 /// \param DL Debug location for instructions added for collapsing,
925 /// such as instructions to compute/derive the input loop's
926 /// induction variables.
927 /// \param Loops Loops in the loop nest to collapse. Loops are specified
928 /// from outermost-to-innermost and every control flow of a
929 /// loop's body must pass through its directly nested loop.
930 /// \param ComputeIP Where additional instruction that compute the collapsed
931 /// trip count. If not set, defaults to before the generated
932 /// loop.
933 ///
934 /// \returns The CanonicalLoopInfo object representing the collapsed loop.
937 InsertPointTy ComputeIP);
938
939 /// Get the default alignment value for given target
940 ///
941 /// \param TargetTriple Target triple
942 /// \param Features StringMap which describes extra CPU features
943 LLVM_ABI static unsigned
944 getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
945 const StringMap<bool> &Features);
946
947 /// Retrieve (or create if non-existent) the address of a declare
948 /// target variable, used in conjunction with registerTargetGlobalVariable
949 /// to create declare target global variables.
950 ///
951 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
952 /// clause used in conjunction with the variable being registered (link,
953 /// to, enter).
954 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
955 /// clause used in conjunction with the variable being registered (nohost,
956 /// host, any)
957 /// \param IsDeclaration - boolean stating if the variable being registered
958 /// is a declaration-only and not a definition
959 /// \param IsExternallyVisible - boolean stating if the variable is externally
960 /// visible
961 /// \param EntryInfo - Unique entry information for the value generated
962 /// using getTargetEntryUniqueInfo, used to name generated pointer references
963 /// to the declare target variable
964 /// \param MangledName - the mangled name of the variable being registered
965 /// \param GeneratedRefs - references generated by invocations of
966 /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar,
967 /// these are required by Clang for book keeping.
968 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
969 /// \param TargetTriple - The OpenMP device target triple we are compiling
970 /// for
971 /// \param LlvmPtrTy - The type of the variable we are generating or
972 /// retrieving an address for
973 /// \param GlobalInitializer - a lambda function which creates a constant
974 /// used for initializing a pointer reference to the variable in certain
975 /// cases. If a nullptr is passed, it will default to utilising the original
976 /// variable to initialize the pointer reference.
977 /// \param VariableLinkage - a lambda function which returns the variables
978 /// linkage type, if unspecified and a nullptr is given, it will instead
979 /// utilise the linkage stored on the existing global variable in the
980 /// LLVMModule.
984 bool IsDeclaration, bool IsExternallyVisible,
985 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
986 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
987 std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
988 std::function<Constant *()> GlobalInitializer,
989 std::function<GlobalValue::LinkageTypes()> VariableLinkage);
990
991 /// Registers a target variable for device or host.
992 ///
993 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
994 /// clause used in conjunction with the variable being registered (link,
995 /// to, enter).
996 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
997 /// clause used in conjunction with the variable being registered (nohost,
998 /// host, any)
999 /// \param IsDeclaration - boolean stating if the variable being registered
1000 /// is a declaration-only and not a definition
1001 /// \param IsExternallyVisible - boolean stating if the variable is externally
1002 /// visible
1003 /// \param EntryInfo - Unique entry information for the value generated
1004 /// using getTargetEntryUniqueInfo, used to name generated pointer references
1005 /// to the declare target variable
1006 /// \param MangledName - the mangled name of the variable being registered
1007 /// \param GeneratedRefs - references generated by invocations of
1008 /// registerTargetGlobalVariable these are required by Clang for book
1009 /// keeping.
1010 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
1011 /// \param TargetTriple - The OpenMP device target triple we are compiling
1012 /// for
1013 /// \param GlobalInitializer - a lambda function which creates a constant
1014 /// used for initializing a pointer reference to the variable in certain
1015 /// cases. If a nullptr is passed, it will default to utilising the original
1016 /// variable to initialize the pointer reference.
1017 /// \param VariableLinkage - a lambda function which returns the variables
1018 /// linkage type, if unspecified and a nullptr is given, it will instead
1019 /// utilise the linkage stored on the existing global variable in the
1020 /// LLVMModule.
1021 /// \param LlvmPtrTy - The type of the variable we are generating or
1022 /// retrieving an address for
1023 /// \param Addr - the original llvm value (addr) of the variable to be
1024 /// registered
1028 bool IsDeclaration, bool IsExternallyVisible,
1029 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
1030 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
1031 std::vector<Triple> TargetTriple,
1032 std::function<Constant *()> GlobalInitializer,
1033 std::function<GlobalValue::LinkageTypes()> VariableLinkage,
1034 Type *LlvmPtrTy, Constant *Addr);
1035
1036 /// Get the offset of the OMP_MAP_MEMBER_OF field.
1037 LLVM_ABI unsigned getFlagMemberOffset();
1038
1039 /// Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on
1040 /// the position given.
1041 /// \param Position - A value indicating the position of the parent
1042 /// of the member in the kernel argument structure, often retrieved
1043 /// by the parents position in the combined information vectors used
1044 /// to generate the structure itself. Multiple children (member's of)
1045 /// with the same parent will use the same returned member flag.
1047
1048 /// Given an initial flag set, this function modifies it to contain
1049 /// the passed in MemberOfFlag generated from the getMemberOfFlag
1050 /// function. The results are dependent on the existing flag bits
1051 /// set in the original flag set.
1052 /// \param Flags - The original set of flags to be modified with the
1053 /// passed in MemberOfFlag.
1054 /// \param MemberOfFlag - A modified OMP_MAP_MEMBER_OF flag, adjusted
1055 /// slightly based on the getMemberOfFlag which adjusts the flag bits
1056 /// based on the members position in its parent.
1057 LLVM_ABI void
1059 omp::OpenMPOffloadMappingFlags MemberOfFlag);
1060
1061private:
1062 /// Modifies the canonical loop to be a statically-scheduled workshare loop
1063 /// which is executed on the device
1064 ///
1065 /// This takes a \p CLI representing a canonical loop, such as the one
1066 /// created by \see createCanonicalLoop and emits additional instructions to
1067 /// turn it into a workshare loop. In particular, it calls to an OpenMP
1068 /// runtime function in the preheader to call OpenMP device rtl function
1069 /// which handles worksharing of loop body interations.
1070 ///
1071 /// \param DL Debug location for instructions added for the
1072 /// workshare-loop construct itself.
1073 /// \param CLI A descriptor of the canonical loop to workshare.
1074 /// \param AllocaIP An insertion point for Alloca instructions usable in the
1075 /// preheader of the loop.
1076 /// \param LoopType Information about type of loop worksharing.
1077 /// It corresponds to type of loop workshare OpenMP pragma.
1078 ///
1079 /// \returns Point where to insert code after the workshare construct.
1080 InsertPointTy applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
1081 InsertPointTy AllocaIP,
1082 omp::WorksharingLoopType LoopType);
1083
1084 /// Modifies the canonical loop to be a statically-scheduled workshare loop.
1085 ///
1086 /// This takes a \p LoopInfo representing a canonical loop, such as the one
1087 /// created by \p createCanonicalLoop and emits additional instructions to
1088 /// turn it into a workshare loop. In particular, it calls to an OpenMP
1089 /// runtime function in the preheader to obtain the loop bounds to be used in
1090 /// the current thread, updates the relevant instructions in the canonical
1091 /// loop and calls to an OpenMP runtime finalization function after the loop.
1092 ///
1093 /// \param DL Debug location for instructions added for the
1094 /// workshare-loop construct itself.
1095 /// \param CLI A descriptor of the canonical loop to workshare.
1096 /// \param AllocaIP An insertion point for Alloca instructions usable in the
1097 /// preheader of the loop.
1098 /// \param NeedsBarrier Indicates whether a barrier must be inserted after
1099 /// the loop.
1100 /// \param LoopType Type of workshare loop.
1101 ///
1102 /// \returns Point where to insert code after the workshare construct.
1103 InsertPointOrErrorTy applyStaticWorkshareLoop(
1105 omp::WorksharingLoopType LoopType, bool NeedsBarrier);
1106
1107 /// Modifies the canonical loop a statically-scheduled workshare loop with a
1108 /// user-specified chunk size.
1109 ///
1110 /// \param DL Debug location for instructions added for the
1111 /// workshare-loop construct itself.
1112 /// \param CLI A descriptor of the canonical loop to workshare.
1113 /// \param AllocaIP An insertion point for Alloca instructions usable in
1114 /// the preheader of the loop.
1115 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
1116 /// loop.
1117 /// \param ChunkSize The user-specified chunk size.
1118 ///
1119 /// \returns Point where to insert code after the workshare construct.
1120 InsertPointOrErrorTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
1121 CanonicalLoopInfo *CLI,
1122 InsertPointTy AllocaIP,
1123 bool NeedsBarrier,
1124 Value *ChunkSize);
1125
1126 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
1127 ///
1128 /// This takes a \p LoopInfo representing a canonical loop, such as the one
1129 /// created by \p createCanonicalLoop and emits additional instructions to
1130 /// turn it into a workshare loop. In particular, it calls to an OpenMP
1131 /// runtime function in the preheader to obtain, and then in each iteration
1132 /// to update the loop counter.
1133 ///
1134 /// \param DL Debug location for instructions added for the
1135 /// workshare-loop construct itself.
1136 /// \param CLI A descriptor of the canonical loop to workshare.
1137 /// \param AllocaIP An insertion point for Alloca instructions usable in the
1138 /// preheader of the loop.
1139 /// \param SchedType Type of scheduling to be passed to the init function.
1140 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
1141 /// the loop.
1142 /// \param Chunk The size of loop chunk considered as a unit when
1143 /// scheduling. If \p nullptr, defaults to 1.
1144 ///
1145 /// \returns Point where to insert code after the workshare construct.
1146 InsertPointOrErrorTy applyDynamicWorkshareLoop(DebugLoc DL,
1147 CanonicalLoopInfo *CLI,
1148 InsertPointTy AllocaIP,
1149 omp::OMPScheduleType SchedType,
1150 bool NeedsBarrier,
1151 Value *Chunk = nullptr);
1152
1153 /// Create alternative version of the loop to support if clause
1154 ///
1155 /// OpenMP if clause can require to generate second loop. This loop
1156 /// will be executed when if clause condition is not met. createIfVersion
1157 /// adds branch instruction to the copied loop if \p ifCond is not met.
1158 ///
1159 /// \param Loop Original loop which should be versioned.
1160 /// \param IfCond Value which corresponds to if clause condition
1161 /// \param VMap Value to value map to define relation between
1162 /// original and copied loop values and loop blocks.
1163 /// \param NamePrefix Optional name prefix for if.then if.else blocks.
1164 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
1166 LoopAnalysis &LIA, LoopInfo &LI, llvm::Loop *L,
1167 const Twine &NamePrefix = "");
1168
1169public:
1170 /// Modifies the canonical loop to be a workshare loop.
1171 ///
1172 /// This takes a \p LoopInfo representing a canonical loop, such as the one
1173 /// created by \p createCanonicalLoop and emits additional instructions to
1174 /// turn it into a workshare loop. In particular, it calls to an OpenMP
1175 /// runtime function in the preheader to obtain the loop bounds to be used in
1176 /// the current thread, updates the relevant instructions in the canonical
1177 /// loop and calls to an OpenMP runtime finalization function after the loop.
1178 ///
1179 /// The concrete transformation is done by applyStaticWorkshareLoop,
1180 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
1181 /// on the value of \p SchedKind and \p ChunkSize.
1182 ///
1183 /// \param DL Debug location for instructions added for the
1184 /// workshare-loop construct itself.
1185 /// \param CLI A descriptor of the canonical loop to workshare.
1186 /// \param AllocaIP An insertion point for Alloca instructions usable in the
1187 /// preheader of the loop.
1188 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
1189 /// the loop.
1190 /// \param SchedKind Scheduling algorithm to use.
1191 /// \param ChunkSize The chunk size for the inner loop.
1192 /// \param HasSimdModifier Whether the simd modifier is present in the
1193 /// schedule clause.
1194 /// \param HasMonotonicModifier Whether the monotonic modifier is present in
1195 /// the schedule clause.
1196 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
1197 /// present in the schedule clause.
1198 /// \param HasOrderedClause Whether the (parameterless) ordered clause is
1199 /// present.
1200 /// \param LoopType Information about type of loop worksharing.
1201 /// It corresponds to type of loop workshare OpenMP pragma.
1202 ///
1203 /// \returns Point where to insert code after the workshare construct.
1206 bool NeedsBarrier,
1207 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
1208 Value *ChunkSize = nullptr, bool HasSimdModifier = false,
1209 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
1210 bool HasOrderedClause = false,
1211 omp::WorksharingLoopType LoopType =
1213
1214 /// Tile a loop nest.
1215 ///
1216 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
1217 /// \p/ Loops must be perfectly nested, from outermost to innermost loop
1218 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
1219 /// of every loop and every tile sizes must be usable in the outermost
1220 /// loop's preheader. This implies that the loop nest is rectangular.
1221 ///
1222 /// Example:
1223 /// \code
1224 /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
1225 /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
1226 /// body(i, j);
1227 /// \endcode
1228 ///
1229 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
1230 /// \code
1231 /// for (int i1 = 0; i1 < 3; ++i1)
1232 /// for (int j1 = 0; j1 < 2; ++j1)
1233 /// for (int i2 = 0; i2 < 5; ++i2)
1234 /// for (int j2 = 0; j2 < 7; ++j2)
1235 /// body(i1*3+i2, j1*3+j2);
1236 /// \endcode
1237 ///
1238 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
1239 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
1240 /// handles non-constant trip counts, non-constant tile sizes and trip counts
1241 /// that are not multiples of the tile size. In the latter case the tile loop
1242 /// of the last floor-loop iteration will have fewer iterations than specified
1243 /// as its tile size.
1244 ///
1245 ///
1246 /// @param DL Debug location for instructions added by tiling, for
1247 /// instance the floor- and tile trip count computation.
1248 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
1249 /// invalidated by this method, i.e. should not used after
1250 /// tiling.
1251 /// @param TileSizes For each loop in \p Loops, the tile size for that
1252 /// dimensions.
1253 ///
1254 /// \returns A list of generated loops. Contains twice as many loops as the
1255 /// input loop nest; the first half are the floor loops and the
1256 /// second half are the tile loops.
1257 LLVM_ABI std::vector<CanonicalLoopInfo *>
1259 ArrayRef<Value *> TileSizes);
1260
1261 /// Fully unroll a loop.
1262 ///
1263 /// Instead of unrolling the loop immediately (and duplicating its body
1264 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
1265 /// metadata.
1266 ///
1267 /// \param DL Debug location for instructions added by unrolling.
1268 /// \param Loop The loop to unroll. The loop will be invalidated.
1270
1271 /// Fully or partially unroll a loop. How the loop is unrolled is determined
1272 /// using LLVM's LoopUnrollPass.
1273 ///
1274 /// \param DL Debug location for instructions added by unrolling.
1275 /// \param Loop The loop to unroll. The loop will be invalidated.
1277
1278 /// Partially unroll a loop.
1279 ///
1280 /// The CanonicalLoopInfo of the unrolled loop for use with chained
1281 /// loop-associated directive can be requested using \p UnrolledCLI. Not
1282 /// needing the CanonicalLoopInfo allows more efficient code generation by
1283 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
1284 /// A loop-associated directive applied to the unrolled loop needs to know the
1285 /// new trip count which means that if using a heuristically determined unroll
1286 /// factor (\p Factor == 0), that factor must be computed immediately. We are
1287 /// using the same logic as the LoopUnrollPass to derived the unroll factor,
1288 /// but which assumes that some canonicalization has taken place (e.g.
1289 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
1290 /// better when the unrolled loop's CanonicalLoopInfo is not needed.
1291 ///
1292 /// \param DL Debug location for instructions added by unrolling.
1293 /// \param Loop The loop to unroll. The loop will be invalidated.
1294 /// \param Factor The factor to unroll the loop by. A factor of 0
1295 /// indicates that a heuristic should be used to determine
1296 /// the unroll-factor.
1297 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
1298 /// partially unrolled loop. Otherwise, uses loop metadata
1299 /// to defer unrolling to the LoopUnrollPass.
1301 int32_t Factor,
1302 CanonicalLoopInfo **UnrolledCLI);
1303
1304 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
1305 /// is cloned. The metadata which prevents vectorization is added to
1306 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
1307 /// to false.
1308 ///
1309 /// \param Loop The loop to simd-ize.
1310 /// \param AlignedVars The map which containts pairs of the pointer
1311 /// and its corresponding alignment.
1312 /// \param IfCond The value which corresponds to the if clause
1313 /// condition.
1314 /// \param Order The enum to map order clause.
1315 /// \param Simdlen The Simdlen length to apply to the simd loop.
1316 /// \param Safelen The Safelen length to apply to the simd loop.
1318 MapVector<Value *, Value *> AlignedVars,
1319 Value *IfCond, omp::OrderKind Order,
1320 ConstantInt *Simdlen, ConstantInt *Safelen);
1321
1322 /// Generator for '#omp flush'
1323 ///
1324 /// \param Loc The location where the flush directive was encountered
1325 LLVM_ABI void createFlush(const LocationDescription &Loc);
1326
1327 /// Generator for '#omp taskwait'
1328 ///
1329 /// \param Loc The location where the taskwait directive was encountered.
1330 LLVM_ABI void createTaskwait(const LocationDescription &Loc);
1331
1332 /// Generator for '#omp taskyield'
1333 ///
1334 /// \param Loc The location where the taskyield directive was encountered.
1335 LLVM_ABI void createTaskyield(const LocationDescription &Loc);
1336
1337 /// A struct to pack the relevant information for an OpenMP depend clause.
1338 struct DependData {
1342 explicit DependData() = default;
1344 Value *DepVal)
1346 };
1347
1348 /// Generator for `#omp task`
1349 ///
1350 /// \param Loc The location where the task construct was encountered.
1351 /// \param AllocaIP The insertion point to be used for alloca instructions.
1352 /// \param BodyGenCB Callback that will generate the region code.
1353 /// \param Tied True if the task is tied, false if the task is untied.
1354 /// \param Final i1 value which is `true` if the task is final, `false` if the
1355 /// task is not final.
1356 /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
1357 /// task is generated, and the encountering thread must
1358 /// suspend the current task region, for which execution
1359 /// cannot be resumed until execution of the structured
1360 /// block that is associated with the generated task is
1361 /// completed.
1362 /// \param EventHandle If present, signifies the event handle as part of
1363 /// the detach clause
1364 /// \param Mergeable If the given task is `mergeable`
1365 /// \param priority `priority-value' specifies the execution order of the
1366 /// tasks that is generated by the construct
1368 createTask(const LocationDescription &Loc, InsertPointTy AllocaIP,
1369 BodyGenCallbackTy BodyGenCB, bool Tied = true,
1370 Value *Final = nullptr, Value *IfCondition = nullptr,
1371 SmallVector<DependData> Dependencies = {}, bool Mergeable = false,
1372 Value *EventHandle = nullptr, Value *Priority = nullptr);
1373
1374 /// Generator for the taskgroup construct
1375 ///
1376 /// \param Loc The location where the taskgroup construct was encountered.
1377 /// \param AllocaIP The insertion point to be used for alloca instructions.
1378 /// \param BodyGenCB Callback that will generate the region code.
1379 LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc,
1380 InsertPointTy AllocaIP,
1381 BodyGenCallbackTy BodyGenCB);
1382
1384 std::function<std::tuple<std::string, uint64_t>()>;
1385
1386 /// Creates a unique info for a target entry when provided a filename and
1387 /// line number from.
1388 ///
1389 /// \param CallBack A callback function which should return filename the entry
1390 /// resides in as well as the line number for the target entry
1391 /// \param ParentName The name of the parent the target entry resides in, if
1392 /// any.
1395 StringRef ParentName = "");
1396
1397 /// Enum class for the RedctionGen CallBack type to be used.
1399
1400 /// ReductionGen CallBack for Clang
1401 ///
1402 /// \param CodeGenIP InsertPoint for CodeGen.
1403 /// \param Index Index of the ReductionInfo to generate code for.
1404 /// \param LHSPtr Optionally used by Clang to return the LHSPtr it used for
1405 /// codegen, used for fixup later.
1406 /// \param RHSPtr Optionally used by Clang to
1407 /// return the RHSPtr it used for codegen, used for fixup later.
1408 /// \param CurFn Optionally used by Clang to pass in the Current Function as
1409 /// Clang context may be old.
1411 std::function<InsertPointTy(InsertPointTy CodeGenIP, unsigned Index,
1412 Value **LHS, Value **RHS, Function *CurFn)>;
1413
1414 /// ReductionGen CallBack for MLIR
1415 ///
1416 /// \param CodeGenIP InsertPoint for CodeGen.
1417 /// \param LHS Pass in the LHS Value to be used for CodeGen.
1418 /// \param RHS Pass in the RHS Value to be used for CodeGen.
1420 InsertPointTy CodeGenIP, Value *LHS, Value *RHS, Value *&Res)>;
1421
1422 /// Functions used to generate atomic reductions. Such functions take two
1423 /// Values representing pointers to LHS and RHS of the reduction, as well as
1424 /// the element type of these pointers. They are expected to atomically
1425 /// update the LHS to the reduced value.
1427 InsertPointTy, Type *, Value *, Value *)>;
1428
1429 /// Enum class for reduction evaluation types scalar, complex and aggregate.
1431
1432 /// Information about an OpenMP reduction.
1443 : ElementType(nullptr), Variable(nullptr),
1446
1447 /// Reduction element type, must match pointee type of variable.
1449
1450 /// Reduction variable of pointer type.
1452
1453 /// Thread-private partial reduction variable.
1455
1456 /// Reduction evaluation kind - scalar, complex or aggregate.
1458
1459 /// Callback for generating the reduction body. The IR produced by this will
1460 /// be used to combine two values in a thread-safe context, e.g., under
1461 /// lock or within the same thread, and therefore need not be atomic.
1463
1464 /// Clang callback for generating the reduction body. The IR produced by
1465 /// this will be used to combine two values in a thread-safe context, e.g.,
1466 /// under lock or within the same thread, and therefore need not be atomic.
1468
1469 /// Callback for generating the atomic reduction body, may be null. The IR
1470 /// produced by this will be used to atomically combine two values during
1471 /// reduction. If null, the implementation will use the non-atomic version
1472 /// along with the appropriate synchronization mechanisms.
1474 };
1475
1476 enum class CopyAction : unsigned {
1477 // RemoteLaneToThread: Copy over a Reduce list from a remote lane in
1478 // the warp using shuffle instructions.
1480 // ThreadCopy: Make a copy of a Reduce list on the thread's stack.
1481 ThreadCopy,
1482 };
1483
1488 };
1489
1490 /// Supporting functions for Reductions CodeGen.
1491private:
1492 /// Get the id of the current thread on the GPU.
1493 Value *getGPUThreadID();
1494
1495 /// Get the GPU warp size.
1496 Value *getGPUWarpSize();
1497
1498 /// Get the id of the warp in the block.
1499 /// We assume that the warp size is 32, which is always the case
1500 /// on the NVPTX device, to generate more efficient code.
1501 Value *getNVPTXWarpID();
1502
1503 /// Get the id of the current lane in the Warp.
1504 /// We assume that the warp size is 32, which is always the case
1505 /// on the NVPTX device, to generate more efficient code.
1506 Value *getNVPTXLaneID();
1507
1508 /// Cast value to the specified type.
1509 Value *castValueToType(InsertPointTy AllocaIP, Value *From, Type *ToType);
1510
1511 /// This function creates calls to one of two shuffle functions to copy
1512 /// variables between lanes in a warp.
1513 Value *createRuntimeShuffleFunction(InsertPointTy AllocaIP, Value *Element,
1514 Type *ElementType, Value *Offset);
1515
1516 /// Function to shuffle over the value from the remote lane.
1517 void shuffleAndStore(InsertPointTy AllocaIP, Value *SrcAddr, Value *DstAddr,
1518 Type *ElementType, Value *Offset,
1519 Type *ReductionArrayTy);
1520
1521 /// Emit instructions to copy a Reduce list, which contains partially
1522 /// aggregated values, in the specified direction.
1523 void emitReductionListCopy(
1524 InsertPointTy AllocaIP, CopyAction Action, Type *ReductionArrayTy,
1525 ArrayRef<ReductionInfo> ReductionInfos, Value *SrcBase, Value *DestBase,
1526 CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr});
1527
1528 /// Emit a helper that reduces data across two OpenMP threads (lanes)
1529 /// in the same warp. It uses shuffle instructions to copy over data from
1530 /// a remote lane's stack. The reduction algorithm performed is specified
1531 /// by the fourth parameter.
1532 ///
1533 /// Algorithm Versions.
1534 /// Full Warp Reduce (argument value 0):
1535 /// This algorithm assumes that all 32 lanes are active and gathers
1536 /// data from these 32 lanes, producing a single resultant value.
1537 /// Contiguous Partial Warp Reduce (argument value 1):
1538 /// This algorithm assumes that only a *contiguous* subset of lanes
1539 /// are active. This happens for the last warp in a parallel region
1540 /// when the user specified num_threads is not an integer multiple of
1541 /// 32. This contiguous subset always starts with the zeroth lane.
1542 /// Partial Warp Reduce (argument value 2):
1543 /// This algorithm gathers data from any number of lanes at any position.
1544 /// All reduced values are stored in the lowest possible lane. The set
1545 /// of problems every algorithm addresses is a super set of those
1546 /// addressable by algorithms with a lower version number. Overhead
1547 /// increases as algorithm version increases.
1548 ///
1549 /// Terminology
1550 /// Reduce element:
1551 /// Reduce element refers to the individual data field with primitive
1552 /// data types to be combined and reduced across threads.
1553 /// Reduce list:
1554 /// Reduce list refers to a collection of local, thread-private
1555 /// reduce elements.
1556 /// Remote Reduce list:
1557 /// Remote Reduce list refers to a collection of remote (relative to
1558 /// the current thread) reduce elements.
1559 ///
1560 /// We distinguish between three states of threads that are important to
1561 /// the implementation of this function.
1562 /// Alive threads:
1563 /// Threads in a warp executing the SIMT instruction, as distinguished from
1564 /// threads that are inactive due to divergent control flow.
1565 /// Active threads:
1566 /// The minimal set of threads that has to be alive upon entry to this
1567 /// function. The computation is correct iff active threads are alive.
1568 /// Some threads are alive but they are not active because they do not
1569 /// contribute to the computation in any useful manner. Turning them off
1570 /// may introduce control flow overheads without any tangible benefits.
1571 /// Effective threads:
1572 /// In order to comply with the argument requirements of the shuffle
1573 /// function, we must keep all lanes holding data alive. But at most
1574 /// half of them perform value aggregation; we refer to this half of
1575 /// threads as effective. The other half is simply handing off their
1576 /// data.
1577 ///
1578 /// Procedure
1579 /// Value shuffle:
1580 /// In this step active threads transfer data from higher lane positions
1581 /// in the warp to lower lane positions, creating Remote Reduce list.
1582 /// Value aggregation:
1583 /// In this step, effective threads combine their thread local Reduce list
1584 /// with Remote Reduce list and store the result in the thread local
1585 /// Reduce list.
1586 /// Value copy:
1587 /// In this step, we deal with the assumption made by algorithm 2
1588 /// (i.e. contiguity assumption). When we have an odd number of lanes
1589 /// active, say 2k+1, only k threads will be effective and therefore k
1590 /// new values will be produced. However, the Reduce list owned by the
1591 /// (2k+1)th thread is ignored in the value aggregation. Therefore
1592 /// we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so
1593 /// that the contiguity assumption still holds.
1594 ///
1595 /// \param ReductionInfos Array type containing the ReductionOps.
1596 /// \param ReduceFn The reduction function.
1597 /// \param FuncAttrs Optional param to specify any function attributes that
1598 /// need to be copied to the new function.
1599 ///
1600 /// \return The ShuffleAndReduce function.
1601 Function *emitShuffleAndReduceFunction(
1603 Function *ReduceFn, AttributeList FuncAttrs);
1604
1605 /// Helper function for CreateCanonicalScanLoops to create InputLoop
1606 /// in the firstGen and Scan Loop in the SecondGen
1607 /// \param InputLoopGen Callback for generating the loop for input phase
1608 /// \param ScanLoopGen Callback for generating the loop for scan phase
1609 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
1610 /// `ScanInfoInitialize`.
1611 ///
1612 /// \return error if any produced, else return success.
1613 Error emitScanBasedDirectiveIR(
1614 llvm::function_ref<Error()> InputLoopGen,
1615 llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen,
1616 ScanInfo *ScanRedInfo);
1617
1618 /// Creates the basic blocks required for scan reduction.
1619 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
1620 /// `ScanInfoInitialize`.
1621 void createScanBBs(ScanInfo *ScanRedInfo);
1622
1623 /// Dynamically allocates the buffer needed for scan reduction.
1624 /// \param AllocaIP The IP where possibly-shared pointer of buffer needs to
1625 /// be declared.
1626 /// \param ScanVars Scan Variables.
1627 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
1628 /// `ScanInfoInitialize`.
1629 ///
1630 /// \return error if any produced, else return success.
1631 Error emitScanBasedDirectiveDeclsIR(InsertPointTy AllocaIP,
1632 ArrayRef<llvm::Value *> ScanVars,
1633 ArrayRef<llvm::Type *> ScanVarsType,
1634 ScanInfo *ScanRedInfo);
1635
1636 /// Copies the result back to the reduction variable.
1637 /// \param ReductionInfos Array type containing the ReductionOps.
1638 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
1639 /// `ScanInfoInitialize`.
1640 ///
1641 /// \return error if any produced, else return success.
1642 Error emitScanBasedDirectiveFinalsIR(
1645
1646 /// This function emits a helper that gathers Reduce lists from the first
1647 /// lane of every active warp to lanes in the first warp.
1648 ///
1649 /// void inter_warp_copy_func(void* reduce_data, num_warps)
1650 /// shared smem[warp_size];
1651 /// For all data entries D in reduce_data:
1652 /// sync
1653 /// If (I am the first lane in each warp)
1654 /// Copy my local D to smem[warp_id]
1655 /// sync
1656 /// if (I am the first warp)
1657 /// Copy smem[thread_id] to my local D
1658 ///
1659 /// \param Loc The insert and source location description.
1660 /// \param ReductionInfos Array type containing the ReductionOps.
1661 /// \param FuncAttrs Optional param to specify any function attributes that
1662 /// need to be copied to the new function.
1663 ///
1664 /// \return The InterWarpCopy function.
1666 emitInterWarpCopyFunction(const LocationDescription &Loc,
1667 ArrayRef<ReductionInfo> ReductionInfos,
1668 AttributeList FuncAttrs);
1669
1670 /// This function emits a helper that copies all the reduction variables from
1671 /// the team into the provided global buffer for the reduction variables.
1672 ///
1673 /// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
1674 /// For all data entries D in reduce_data:
1675 /// Copy local D to buffer.D[Idx]
1676 ///
1677 /// \param ReductionInfos Array type containing the ReductionOps.
1678 /// \param ReductionsBufferTy The StructTy for the reductions buffer.
1679 /// \param FuncAttrs Optional param to specify any function attributes that
1680 /// need to be copied to the new function.
1681 ///
1682 /// \return The ListToGlobalCopy function.
1683 Function *emitListToGlobalCopyFunction(ArrayRef<ReductionInfo> ReductionInfos,
1684 Type *ReductionsBufferTy,
1685 AttributeList FuncAttrs);
1686
1687 /// This function emits a helper that copies all the reduction variables from
1688 /// the team into the provided global buffer for the reduction variables.
1689 ///
1690 /// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
1691 /// For all data entries D in reduce_data:
1692 /// Copy buffer.D[Idx] to local D;
1693 ///
1694 /// \param ReductionInfos Array type containing the ReductionOps.
1695 /// \param ReductionsBufferTy The StructTy for the reductions buffer.
1696 /// \param FuncAttrs Optional param to specify any function attributes that
1697 /// need to be copied to the new function.
1698 ///
1699 /// \return The GlobalToList function.
1700 Function *emitGlobalToListCopyFunction(ArrayRef<ReductionInfo> ReductionInfos,
1701 Type *ReductionsBufferTy,
1702 AttributeList FuncAttrs);
1703
1704 /// This function emits a helper that reduces all the reduction variables from
1705 /// the team into the provided global buffer for the reduction variables.
1706 ///
1707 /// void list_to_global_reduce_func(void *buffer, int Idx, void *reduce_data)
1708 /// void *GlobPtrs[];
1709 /// GlobPtrs[0] = (void*)&buffer.D0[Idx];
1710 /// ...
1711 /// GlobPtrs[N] = (void*)&buffer.DN[Idx];
1712 /// reduce_function(GlobPtrs, reduce_data);
1713 ///
1714 /// \param ReductionInfos Array type containing the ReductionOps.
1715 /// \param ReduceFn The reduction function.
1716 /// \param ReductionsBufferTy The StructTy for the reductions buffer.
1717 /// \param FuncAttrs Optional param to specify any function attributes that
1718 /// need to be copied to the new function.
1719 ///
1720 /// \return The ListToGlobalReduce function.
1721 Function *
1722 emitListToGlobalReduceFunction(ArrayRef<ReductionInfo> ReductionInfos,
1723 Function *ReduceFn, Type *ReductionsBufferTy,
1724 AttributeList FuncAttrs);
1725
1726 /// This function emits a helper that reduces all the reduction variables from
1727 /// the team into the provided global buffer for the reduction variables.
1728 ///
1729 /// void global_to_list_reduce_func(void *buffer, int Idx, void *reduce_data)
1730 /// void *GlobPtrs[];
1731 /// GlobPtrs[0] = (void*)&buffer.D0[Idx];
1732 /// ...
1733 /// GlobPtrs[N] = (void*)&buffer.DN[Idx];
1734 /// reduce_function(reduce_data, GlobPtrs);
1735 ///
1736 /// \param ReductionInfos Array type containing the ReductionOps.
1737 /// \param ReduceFn The reduction function.
1738 /// \param ReductionsBufferTy The StructTy for the reductions buffer.
1739 /// \param FuncAttrs Optional param to specify any function attributes that
1740 /// need to be copied to the new function.
1741 ///
1742 /// \return The GlobalToListReduce function.
1743 Function *
1744 emitGlobalToListReduceFunction(ArrayRef<ReductionInfo> ReductionInfos,
1745 Function *ReduceFn, Type *ReductionsBufferTy,
1746 AttributeList FuncAttrs);
1747
1748 /// Get the function name of a reduction function.
1749 std::string getReductionFuncName(StringRef Name) const;
1750
1751 /// Emits reduction function.
1752 /// \param ReducerName Name of the function calling the reduction.
1753 /// \param ReductionInfos Array type containing the ReductionOps.
1754 /// \param ReductionGenCBKind Optional param to specify Clang or MLIR
1755 /// CodeGenCB kind.
1756 /// \param FuncAttrs Optional param to specify any function attributes that
1757 /// need to be copied to the new function.
1758 ///
1759 /// \return The reduction function.
1760 Expected<Function *> createReductionFunction(
1761 StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos,
1763 AttributeList FuncAttrs = {});
1764
1765public:
1766 ///
1767 /// Design of OpenMP reductions on the GPU
1768 ///
1769 /// Consider a typical OpenMP program with one or more reduction
1770 /// clauses:
1771 ///
1772 /// float foo;
1773 /// double bar;
1774 /// #pragma omp target teams distribute parallel for \
1775 /// reduction(+:foo) reduction(*:bar)
1776 /// for (int i = 0; i < N; i++) {
1777 /// foo += A[i]; bar *= B[i];
1778 /// }
1779 ///
1780 /// where 'foo' and 'bar' are reduced across all OpenMP threads in
1781 /// all teams. In our OpenMP implementation on the NVPTX device an
1782 /// OpenMP team is mapped to a CUDA threadblock and OpenMP threads
1783 /// within a team are mapped to CUDA threads within a threadblock.
1784 /// Our goal is to efficiently aggregate values across all OpenMP
1785 /// threads such that:
1786 ///
1787 /// - the compiler and runtime are logically concise, and
1788 /// - the reduction is performed efficiently in a hierarchical
1789 /// manner as follows: within OpenMP threads in the same warp,
1790 /// across warps in a threadblock, and finally across teams on
1791 /// the NVPTX device.
1792 ///
1793 /// Introduction to Decoupling
1794 ///
1795 /// We would like to decouple the compiler and the runtime so that the
1796 /// latter is ignorant of the reduction variables (number, data types)
1797 /// and the reduction operators. This allows a simpler interface
1798 /// and implementation while still attaining good performance.
1799 ///
1800 /// Pseudocode for the aforementioned OpenMP program generated by the
1801 /// compiler is as follows:
1802 ///
1803 /// 1. Create private copies of reduction variables on each OpenMP
1804 /// thread: 'foo_private', 'bar_private'
1805 /// 2. Each OpenMP thread reduces the chunk of 'A' and 'B' assigned
1806 /// to it and writes the result in 'foo_private' and 'bar_private'
1807 /// respectively.
1808 /// 3. Call the OpenMP runtime on the GPU to reduce within a team
1809 /// and store the result on the team master:
1810 ///
1811 /// __kmpc_nvptx_parallel_reduce_nowait_v2(...,
1812 /// reduceData, shuffleReduceFn, interWarpCpyFn)
1813 ///
1814 /// where:
1815 /// struct ReduceData {
1816 /// double *foo;
1817 /// double *bar;
1818 /// } reduceData
1819 /// reduceData.foo = &foo_private
1820 /// reduceData.bar = &bar_private
1821 ///
1822 /// 'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two
1823 /// auxiliary functions generated by the compiler that operate on
1824 /// variables of type 'ReduceData'. They aid the runtime perform
1825 /// algorithmic steps in a data agnostic manner.
1826 ///
1827 /// 'shuffleReduceFn' is a pointer to a function that reduces data
1828 /// of type 'ReduceData' across two OpenMP threads (lanes) in the
1829 /// same warp. It takes the following arguments as input:
1830 ///
1831 /// a. variable of type 'ReduceData' on the calling lane,
1832 /// b. its lane_id,
1833 /// c. an offset relative to the current lane_id to generate a
1834 /// remote_lane_id. The remote lane contains the second
1835 /// variable of type 'ReduceData' that is to be reduced.
1836 /// d. an algorithm version parameter determining which reduction
1837 /// algorithm to use.
1838 ///
1839 /// 'shuffleReduceFn' retrieves data from the remote lane using
1840 /// efficient GPU shuffle intrinsics and reduces, using the
1841 /// algorithm specified by the 4th parameter, the two operands
1842 /// element-wise. The result is written to the first operand.
1843 ///
1844 /// Different reduction algorithms are implemented in different
1845 /// runtime functions, all calling 'shuffleReduceFn' to perform
1846 /// the essential reduction step. Therefore, based on the 4th
1847 /// parameter, this function behaves slightly differently to
1848 /// cooperate with the runtime to ensure correctness under
1849 /// different circumstances.
1850 ///
1851 /// 'InterWarpCpyFn' is a pointer to a function that transfers
1852 /// reduced variables across warps. It tunnels, through CUDA
1853 /// shared memory, the thread-private data of type 'ReduceData'
1854 /// from lane 0 of each warp to a lane in the first warp.
1855 /// 4. Call the OpenMP runtime on the GPU to reduce across teams.
1856 /// The last team writes the global reduced value to memory.
1857 ///
1858 /// ret = __kmpc_nvptx_teams_reduce_nowait(...,
1859 /// reduceData, shuffleReduceFn, interWarpCpyFn,
1860 /// scratchpadCopyFn, loadAndReduceFn)
1861 ///
1862 /// 'scratchpadCopyFn' is a helper that stores reduced
1863 /// data from the team master to a scratchpad array in
1864 /// global memory.
1865 ///
1866 /// 'loadAndReduceFn' is a helper that loads data from
1867 /// the scratchpad array and reduces it with the input
1868 /// operand.
1869 ///
1870 /// These compiler generated functions hide address
1871 /// calculation and alignment information from the runtime.
1872 /// 5. if ret == 1:
1873 /// The team master of the last team stores the reduced
1874 /// result to the globals in memory.
1875 /// foo += reduceData.foo; bar *= reduceData.bar
1876 ///
1877 ///
1878 /// Warp Reduction Algorithms
1879 ///
1880 /// On the warp level, we have three algorithms implemented in the
1881 /// OpenMP runtime depending on the number of active lanes:
1882 ///
1883 /// Full Warp Reduction
1884 ///
1885 /// The reduce algorithm within a warp where all lanes are active
1886 /// is implemented in the runtime as follows:
1887 ///
1888 /// full_warp_reduce(void *reduce_data,
1889 /// kmp_ShuffleReductFctPtr ShuffleReduceFn) {
1890 /// for (int offset = WARPSIZE/2; offset > 0; offset /= 2)
1891 /// ShuffleReduceFn(reduce_data, 0, offset, 0);
1892 /// }
1893 ///
1894 /// The algorithm completes in log(2, WARPSIZE) steps.
1895 ///
1896 /// 'ShuffleReduceFn' is used here with lane_id set to 0 because it is
1897 /// not used therefore we save instructions by not retrieving lane_id
1898 /// from the corresponding special registers. The 4th parameter, which
1899 /// represents the version of the algorithm being used, is set to 0 to
1900 /// signify full warp reduction.
1901 ///
1902 /// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
1903 ///
1904 /// #reduce_elem refers to an element in the local lane's data structure
1905 /// #remote_elem is retrieved from a remote lane
1906 /// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
1907 /// reduce_elem = reduce_elem REDUCE_OP remote_elem;
1908 ///
1909 /// Contiguous Partial Warp Reduction
1910 ///
1911 /// This reduce algorithm is used within a warp where only the first
1912 /// 'n' (n <= WARPSIZE) lanes are active. It is typically used when the
1913 /// number of OpenMP threads in a parallel region is not a multiple of
1914 /// WARPSIZE. The algorithm is implemented in the runtime as follows:
1915 ///
1916 /// void
1917 /// contiguous_partial_reduce(void *reduce_data,
1918 /// kmp_ShuffleReductFctPtr ShuffleReduceFn,
1919 /// int size, int lane_id) {
1920 /// int curr_size;
1921 /// int offset;
1922 /// curr_size = size;
1923 /// mask = curr_size/2;
1924 /// while (offset>0) {
1925 /// ShuffleReduceFn(reduce_data, lane_id, offset, 1);
1926 /// curr_size = (curr_size+1)/2;
1927 /// offset = curr_size/2;
1928 /// }
1929 /// }
1930 ///
1931 /// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
1932 ///
1933 /// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
1934 /// if (lane_id < offset)
1935 /// reduce_elem = reduce_elem REDUCE_OP remote_elem
1936 /// else
1937 /// reduce_elem = remote_elem
1938 ///
1939 /// This algorithm assumes that the data to be reduced are located in a
1940 /// contiguous subset of lanes starting from the first. When there is
1941 /// an odd number of active lanes, the data in the last lane is not
1942 /// aggregated with any other lane's dat but is instead copied over.
1943 ///
1944 /// Dispersed Partial Warp Reduction
1945 ///
1946 /// This algorithm is used within a warp when any discontiguous subset of
1947 /// lanes are active. It is used to implement the reduction operation
1948 /// across lanes in an OpenMP simd region or in a nested parallel region.
1949 ///
1950 /// void
1951 /// dispersed_partial_reduce(void *reduce_data,
1952 /// kmp_ShuffleReductFctPtr ShuffleReduceFn) {
1953 /// int size, remote_id;
1954 /// int logical_lane_id = number_of_active_lanes_before_me() * 2;
1955 /// do {
1956 /// remote_id = next_active_lane_id_right_after_me();
1957 /// # the above function returns 0 of no active lane
1958 /// # is present right after the current lane.
1959 /// size = number_of_active_lanes_in_this_warp();
1960 /// logical_lane_id /= 2;
1961 /// ShuffleReduceFn(reduce_data, logical_lane_id,
1962 /// remote_id-1-threadIdx.x, 2);
1963 /// } while (logical_lane_id % 2 == 0 && size > 1);
1964 /// }
1965 ///
1966 /// There is no assumption made about the initial state of the reduction.
1967 /// Any number of lanes (>=1) could be active at any position. The reduction
1968 /// result is returned in the first active lane.
1969 ///
1970 /// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
1971 ///
1972 /// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
1973 /// if (lane_id % 2 == 0 && offset > 0)
1974 /// reduce_elem = reduce_elem REDUCE_OP remote_elem
1975 /// else
1976 /// reduce_elem = remote_elem
1977 ///
1978 ///
1979 /// Intra-Team Reduction
1980 ///
1981 /// This function, as implemented in the runtime call
1982 /// '__kmpc_nvptx_parallel_reduce_nowait_v2', aggregates data across OpenMP
1983 /// threads in a team. It first reduces within a warp using the
1984 /// aforementioned algorithms. We then proceed to gather all such
1985 /// reduced values at the first warp.
1986 ///
1987 /// The runtime makes use of the function 'InterWarpCpyFn', which copies
1988 /// data from each of the "warp master" (zeroth lane of each warp, where
1989 /// warp-reduced data is held) to the zeroth warp. This step reduces (in
1990 /// a mathematical sense) the problem of reduction across warp masters in
1991 /// a block to the problem of warp reduction.
1992 ///
1993 ///
1994 /// Inter-Team Reduction
1995 ///
1996 /// Once a team has reduced its data to a single value, it is stored in
1997 /// a global scratchpad array. Since each team has a distinct slot, this
1998 /// can be done without locking.
1999 ///
2000 /// The last team to write to the scratchpad array proceeds to reduce the
2001 /// scratchpad array. One or more workers in the last team use the helper
2002 /// 'loadAndReduceDataFn' to load and reduce values from the array, i.e.,
2003 /// the k'th worker reduces every k'th element.
2004 ///
2005 /// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait_v2' to
2006 /// reduce across workers and compute a globally reduced value.
2007 ///
2008 /// \param Loc The location where the reduction was
2009 /// encountered. Must be within the associate
2010 /// directive and after the last local access to the
2011 /// reduction variables.
2012 /// \param AllocaIP An insertion point suitable for allocas usable
2013 /// in reductions.
2014 /// \param CodeGenIP An insertion point suitable for code
2015 /// generation. \param ReductionInfos A list of info on each reduction
2016 /// variable. \param IsNoWait Optional flag set if the reduction is
2017 /// marked as
2018 /// nowait.
2019 /// \param IsTeamsReduction Optional flag set if it is a teams
2020 /// reduction.
2021 /// \param GridValue Optional GPU grid value.
2022 /// \param ReductionBufNum Optional OpenMPCUDAReductionBufNumValue to be
2023 /// used for teams reduction.
2024 /// \param SrcLocInfo Source location information global.
2026 const LocationDescription &Loc, InsertPointTy AllocaIP,
2027 InsertPointTy CodeGenIP, ArrayRef<ReductionInfo> ReductionInfos,
2028 bool IsNoWait = false, bool IsTeamsReduction = false,
2030 std::optional<omp::GV> GridValue = {}, unsigned ReductionBufNum = 1024,
2031 Value *SrcLocInfo = nullptr);
2032
2033 // TODO: provide atomic and non-atomic reduction generators for reduction
2034 // operators defined by the OpenMP specification.
2035
2036 /// Generator for '#omp reduction'.
2037 ///
2038 /// Emits the IR instructing the runtime to perform the specific kind of
2039 /// reductions. Expects reduction variables to have been privatized and
2040 /// initialized to reduction-neutral values separately. Emits the calls to
2041 /// runtime functions as well as the reduction function and the basic blocks
2042 /// performing the reduction atomically and non-atomically.
2043 ///
2044 /// The code emitted for the following:
2045 ///
2046 /// \code
2047 /// type var_1;
2048 /// type var_2;
2049 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
2050 /// /* body */;
2051 /// \endcode
2052 ///
2053 /// corresponds to the following sketch.
2054 ///
2055 /// \code
2056 /// void _outlined_par() {
2057 /// // N is the number of different reductions.
2058 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
2059 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
2060 /// _omp_reduction_func,
2061 /// _gomp_critical_user.reduction.var)) {
2062 /// case 1: {
2063 /// var_1 = var_1 <reduction-op> privatized_var_1;
2064 /// var_2 = var_2 <reduction-op> privatized_var_2;
2065 /// // ...
2066 /// __kmpc_end_reduce(...);
2067 /// break;
2068 /// }
2069 /// case 2: {
2070 /// _Atomic<ReductionOp>(var_1, privatized_var_1);
2071 /// _Atomic<ReductionOp>(var_2, privatized_var_2);
2072 /// // ...
2073 /// break;
2074 /// }
2075 /// default: break;
2076 /// }
2077 /// }
2078 ///
2079 /// void _omp_reduction_func(void **lhs, void **rhs) {
2080 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
2081 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
2082 /// // ...
2083 /// }
2084 /// \endcode
2085 ///
2086 /// \param Loc The location where the reduction was
2087 /// encountered. Must be within the associate
2088 /// directive and after the last local access to the
2089 /// reduction variables.
2090 /// \param AllocaIP An insertion point suitable for allocas usable
2091 /// in reductions.
2092 /// \param ReductionInfos A list of info on each reduction variable.
2093 /// \param IsNoWait A flag set if the reduction is marked as nowait.
2094 /// \param IsByRef A flag set if the reduction is using reference
2095 /// or direct value.
2096 /// \param IsTeamsReduction Optional flag set if it is a teams
2097 /// reduction.
2099 const LocationDescription &Loc, InsertPointTy AllocaIP,
2100 ArrayRef<ReductionInfo> ReductionInfos, ArrayRef<bool> IsByRef,
2101 bool IsNoWait = false, bool IsTeamsReduction = false);
2102
2103 ///}
2104
2105 /// Return the insertion point used by the underlying IRBuilder.
2107
2108 /// Update the internal location to \p Loc.
2110 Builder.restoreIP(Loc.IP);
2112 return Loc.IP.getBlock() != nullptr;
2113 }
2114
2115 /// Return the function declaration for the runtime function with \p FnID.
2118
2120
2121 /// Return the (LLVM-IR) string describing the source location \p LocStr.
2123 uint32_t &SrcLocStrSize);
2124
2125 /// Return the (LLVM-IR) string describing the default source location.
2127
2128 /// Return the (LLVM-IR) string describing the source location identified by
2129 /// the arguments.
2131 StringRef FileName, unsigned Line,
2132 unsigned Column,
2133 uint32_t &SrcLocStrSize);
2134
2135 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
2136 /// fallback if \p DL does not specify the function name.
2138 Function *F = nullptr);
2139
2140 /// Return the (LLVM-IR) string describing the source location \p Loc.
2141 LLVM_ABI Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
2142 uint32_t &SrcLocStrSize);
2143
2144 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
2145 /// TODO: Create a enum class for the Reserve2Flags
2147 uint32_t SrcLocStrSize,
2148 omp::IdentFlag Flags = omp::IdentFlag(0),
2149 unsigned Reserve2Flags = 0);
2150
2151 /// Create a hidden global flag \p Name in the module with initial value \p
2152 /// Value.
2154
2155 /// Emit the llvm.used metadata.
2157
2158 /// Emit the kernel execution mode.
2161
2162 /// Generate control flow and cleanup for cancellation.
2163 ///
2164 /// \param CancelFlag Flag indicating if the cancellation is performed.
2165 /// \param CanceledDirective The kind of directive that is cancled.
2166 /// \param ExitCB Extra code to be generated in the exit block.
2167 ///
2168 /// \return an error, if any were triggered during execution.
2170 omp::Directive CanceledDirective,
2171 FinalizeCallbackTy ExitCB = {});
2172
2173 /// Generate a target region entry call.
2174 ///
2175 /// \param Loc The location at which the request originated and is fulfilled.
2176 /// \param AllocaIP The insertion point to be used for alloca instructions.
2177 /// \param Return Return value of the created function returned by reference.
2178 /// \param DeviceID Identifier for the device via the 'device' clause.
2179 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
2180 /// or 0 if unspecified and -1 if there is no 'teams' clause.
2181 /// \param NumThreads Number of threads via the 'thread_limit' clause.
2182 /// \param HostPtr Pointer to the host-side pointer of the target kernel.
2183 /// \param KernelArgs Array of arguments to the kernel.
2184 LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc,
2185 InsertPointTy AllocaIP,
2186 Value *&Return, Value *Ident,
2187 Value *DeviceID, Value *NumTeams,
2188 Value *NumThreads, Value *HostPtr,
2189 ArrayRef<Value *> KernelArgs);
2190
2191 /// Generate a flush runtime call.
2192 ///
2193 /// \param Loc The location at which the request originated and is fulfilled.
2194 LLVM_ABI void emitFlush(const LocationDescription &Loc);
2195
2196 /// The finalization stack made up of finalize callbacks currently in-flight,
2197 /// wrapped into FinalizationInfo objects that reference also the finalization
2198 /// target block and the kind of cancellable directive.
2200
2201 /// Return true if the last entry in the finalization stack is of kind \p DK
2202 /// and cancellable.
2203 bool isLastFinalizationInfoCancellable(omp::Directive DK) {
2204 return !FinalizationStack.empty() &&
2205 FinalizationStack.back().IsCancellable &&
2206 FinalizationStack.back().DK == DK;
2207 }
2208
2209 /// Generate a taskwait runtime call.
2210 ///
2211 /// \param Loc The location at which the request originated and is fulfilled.
2212 LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc);
2213
2214 /// Generate a taskyield runtime call.
2215 ///
2216 /// \param Loc The location at which the request originated and is fulfilled.
2217 LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc);
2218
2219 /// Return the current thread ID.
2220 ///
2221 /// \param Ident The ident (ident_t*) describing the query origin.
2223
2224 /// The OpenMPIRBuilder Configuration
2226
2227 /// The underlying LLVM-IR module
2229
2230 /// The LLVM-IR Builder used to create IR.
2232
2233 /// Map to remember source location strings
2235
2236 /// Map to remember existing ident_t*.
2238
2239 /// Info manager to keep track of target regions.
2241
2242 /// The target triple of the underlying module.
2243 const Triple T;
2244
2245 /// Helper that contains information about regions we need to outline
2246 /// during finalization.
2248 using PostOutlineCBTy = std::function<void(Function &)>;
2252
2253 /// Collect all blocks in between EntryBB and ExitBB in both the given
2254 /// vector and set.
2256 SmallVectorImpl<BasicBlock *> &BlockVector);
2257
2258 /// Return the function that contains the region to be outlined.
2259 Function *getFunction() const { return EntryBB->getParent(); }
2260 };
2261
2262 /// Collection of regions that need to be outlined during finalization.
2264
2265 /// A collection of candidate target functions that's constant allocas will
2266 /// attempt to be raised on a call of finalize after all currently enqueued
2267 /// outline info's have been processed.
2269
2270 /// Collection of owned canonical loop objects that eventually need to be
2271 /// free'd.
2272 std::forward_list<CanonicalLoopInfo> LoopInfos;
2273
2274 /// Collection of owned ScanInfo objects that eventually need to be free'd.
2275 std::forward_list<ScanInfo> ScanInfos;
2276
2277 /// Add a new region that will be outlined later.
2278 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
2279
2280 /// An ordered map of auto-generated variables to their unique names.
2281 /// It stores variables with the following names: 1) ".gomp_critical_user_" +
2282 /// <critical_section_name> + ".var" for "omp critical" directives; 2)
2283 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
2284 /// variables.
2286
2287 /// Computes the size of type in bytes.
2289
2290 // Emit a branch from the current block to the Target block only if
2291 // the current block has a terminator.
2293
2294 // If BB has no use then delete it and return. Else place BB after the current
2295 // block, if possible, or else at the end of the function. Also add a branch
2296 // from current block to BB if current block does not have a terminator.
2297 LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn,
2298 bool IsFinished = false);
2299
2300 /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
2301 /// Here is the logic:
2302 /// if (Cond) {
2303 /// ThenGen();
2304 /// } else {
2305 /// ElseGen();
2306 /// }
2307 ///
2308 /// \return an error, if any were triggered during execution.
2310 BodyGenCallbackTy ElseGen,
2311 InsertPointTy AllocaIP = {});
2312
2313 /// Create the global variable holding the offload mappings information.
2316 std::string VarName);
2317
2318 /// Create the global variable holding the offload names information.
2321 std::string VarName);
2322
2325 AllocaInst *Args = nullptr;
2327 };
2328
2329 /// Create the allocas instruction used in call to mapper functions.
2331 InsertPointTy AllocaIP,
2332 unsigned NumOperands,
2334
2335 /// Create the call for the target mapper function.
2336 /// \param Loc The source location description.
2337 /// \param MapperFunc Function to be called.
2338 /// \param SrcLocInfo Source location information global.
2339 /// \param MaptypesArg The argument types.
2340 /// \param MapnamesArg The argument names.
2341 /// \param MapperAllocas The AllocaInst used for the call.
2342 /// \param DeviceID Device ID for the call.
2343 /// \param NumOperands Number of operands in the call.
2345 Function *MapperFunc, Value *SrcLocInfo,
2346 Value *MaptypesArg, Value *MapnamesArg,
2348 int64_t DeviceID, unsigned NumOperands);
2349
2350 /// Container for the arguments used to pass data to the runtime library.
2352 /// The array of base pointer passed to the runtime library.
2354 /// The array of section pointers passed to the runtime library.
2356 /// The array of sizes passed to the runtime library.
2357 Value *SizesArray = nullptr;
2358 /// The array of map types passed to the runtime library for the beginning
2359 /// of the region or for the entire region if there are no separate map
2360 /// types for the region end.
2362 /// The array of map types passed to the runtime library for the end of the
2363 /// region, or nullptr if there are no separate map types for the region
2364 /// end.
2366 /// The array of user-defined mappers passed to the runtime library.
2368 /// The array of original declaration names of mapped pointers sent to the
2369 /// runtime library for debugging
2371
2372 explicit TargetDataRTArgs() {}
2381 };
2382
2383 /// Container to pass the default attributes with which a kernel must be
2384 /// launched, used to set kernel attributes and populate associated static
2385 /// structures.
2386 ///
2387 /// For max values, < 0 means unset, == 0 means set but unknown at compile
2388 /// time. The number of max values will be 1 except for the case where
2389 /// ompx_bare is set.
2394 int32_t MinTeams = 1;
2396 int32_t MinThreads = 1;
2399 };
2400
2401 /// Container to pass LLVM IR runtime values or constants related to the
2402 /// number of teams and threads with which the kernel must be launched, as
2403 /// well as the trip count of the loop, if it is an SPMD or Generic-SPMD
2404 /// kernel. These must be defined in the host prior to the call to the kernel
2405 /// launch OpenMP RTL function.
2408 Value *MinTeams = nullptr;
2411
2412 /// 'parallel' construct 'num_threads' clause value, if present and it is an
2413 /// SPMD kernel.
2414 Value *MaxThreads = nullptr;
2415
2416 /// Total number of iterations of the SPMD or Generic-SPMD kernel or null if
2417 /// it is a generic kernel.
2419 };
2420
2421 /// Data structure that contains the needed information to construct the
2422 /// kernel args vector.
2424 /// Number of arguments passed to the runtime library.
2425 unsigned NumTargetItems = 0;
2426 /// Arguments passed to the runtime library
2428 /// The number of iterations
2430 /// The number of teams.
2432 /// The number of threads.
2434 /// The size of the dynamic shared memory.
2436 /// True if the kernel has 'no wait' clause.
2437 bool HasNoWait = false;
2438
2439 // Constructors for TargetKernelArgs.
2444 bool HasNoWait)
2449 };
2450
2451 /// Create the kernel args vector used by emitTargetKernel. This function
2452 /// creates various constant values that are used in the resulting args
2453 /// vector.
2454 LLVM_ABI static void getKernelArgsVector(TargetKernelArgs &KernelArgs,
2456 SmallVector<Value *> &ArgsVector);
2457
2458 /// Struct that keeps the information that should be kept throughout
2459 /// a 'target data' region.
2461 /// Set to true if device pointer information have to be obtained.
2462 bool RequiresDevicePointerInfo = false;
2463 /// Set to true if Clang emits separate runtime calls for the beginning and
2464 /// end of the region. These calls might have separate map type arrays.
2465 bool SeparateBeginEndCalls = false;
2466
2467 public:
2469
2472
2473 /// Indicate whether any user-defined mapper exists.
2474 bool HasMapper = false;
2475 /// The total number of pointers passed to the runtime library.
2476 unsigned NumberOfPtrs = 0u;
2477
2478 bool EmitDebug = false;
2479
2480 /// Whether the `target ... data` directive has a `nowait` clause.
2481 bool HasNoWait = false;
2482
2483 explicit TargetDataInfo() {}
2484 explicit TargetDataInfo(bool RequiresDevicePointerInfo,
2485 bool SeparateBeginEndCalls)
2486 : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
2487 SeparateBeginEndCalls(SeparateBeginEndCalls) {}
2488 /// Clear information about the data arrays.
2491 HasMapper = false;
2492 NumberOfPtrs = 0u;
2493 }
2494 /// Return true if the current target data information has valid arrays.
2495 bool isValid() {
2499 }
2500 bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
2501 bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
2502 };
2503
2511
2512 /// This structure contains combined information generated for mappable
2513 /// clauses, including base pointers, pointers, sizes, map types, user-defined
2514 /// mappers, and non-contiguous information.
2515 struct MapInfosTy {
2517 bool IsNonContiguous = false;
2522 };
2530
2531 /// Append arrays in \a CurInfo.
2532 void append(MapInfosTy &CurInfo) {
2534 CurInfo.BasePointers.end());
2535 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
2537 CurInfo.DevicePointers.end());
2538 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
2539 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
2540 Names.append(CurInfo.Names.begin(), CurInfo.Names.end());
2542 CurInfo.NonContigInfo.Dims.end());
2544 CurInfo.NonContigInfo.Offsets.end());
2546 CurInfo.NonContigInfo.Counts.end());
2548 CurInfo.NonContigInfo.Strides.end());
2549 }
2550 };
2552
2553 /// Callback function type for functions emitting the host fallback code that
2554 /// is executed when the kernel launch fails. It takes an insertion point as
2555 /// parameter where the code should be emitted. It returns an insertion point
2556 /// that points right after after the emitted code.
2559
2560 // Callback function type for emitting and fetching user defined custom
2561 // mappers.
2563 function_ref<Expected<Function *>(unsigned int)>;
2564
2565 /// Generate a target region entry call and host fallback call.
2566 ///
2567 /// \param Loc The location at which the request originated and is fulfilled.
2568 /// \param OutlinedFnID The ooulined function ID.
2569 /// \param EmitTargetCallFallbackCB Call back function to generate host
2570 /// fallback code.
2571 /// \param Args Data structure holding information about the kernel arguments.
2572 /// \param DeviceID Identifier for the device via the 'device' clause.
2573 /// \param RTLoc Source location identifier
2574 /// \param AllocaIP The insertion point to be used for alloca instructions.
2576 const LocationDescription &Loc, Value *OutlinedFnID,
2577 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
2578 Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP);
2579
2580 /// Callback type for generating the bodies of device directives that require
2581 /// outer target tasks (e.g. in case of having `nowait` or `depend` clauses).
2582 ///
2583 /// \param DeviceID The ID of the device on which the target region will
2584 /// execute.
2585 /// \param RTLoc Source location identifier
2586 /// \Param TargetTaskAllocaIP Insertion point for the alloca block of the
2587 /// generated task.
2588 ///
2589 /// \return an error, if any were triggered during execution.
2591 function_ref<Error(Value *DeviceID, Value *RTLoc,
2592 IRBuilderBase::InsertPoint TargetTaskAllocaIP)>;
2593
2594 /// Generate a target-task for the target construct
2595 ///
2596 /// \param TaskBodyCB Callback to generate the actual body of the target task.
2597 /// \param DeviceID Identifier for the device via the 'device' clause.
2598 /// \param RTLoc Source location identifier
2599 /// \param AllocaIP The insertion point to be used for alloca instructions.
2600 /// \param Dependencies Vector of DependData objects holding information of
2601 /// dependencies as specified by the 'depend' clause.
2602 /// \param HasNoWait True if the target construct had 'nowait' on it, false
2603 /// otherwise
2605 TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc,
2608 const TargetDataRTArgs &RTArgs, bool HasNoWait);
2609
2610 /// Emit the arguments to be passed to the runtime library based on the
2611 /// arrays of base pointers, pointers, sizes, map types, and mappers. If
2612 /// ForEndCall, emit map types to be passed for the end of the region instead
2613 /// of the beginning.
2616 OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall = false);
2617
2618 /// Emit an array of struct descriptors to be assigned to the offload args.
2620 InsertPointTy CodeGenIP,
2621 MapInfosTy &CombinedInfo,
2623
2624 /// Emit the arrays used to pass the captures and map information to the
2625 /// offloading runtime library. If there is no map or capture information,
2626 /// return nullptr by reference. Accepts a reference to a MapInfosTy object
2627 /// that contains information generated for mappable clauses,
2628 /// including base pointers, pointers, sizes, map types, user-defined mappers.
2630 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
2632 bool IsNonContiguous = false,
2633 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr);
2634
2635 /// Allocates memory for and populates the arrays required for offloading
2636 /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
2637 /// emits their base addresses as arguments to be passed to the runtime
2638 /// library. In essence, this function is a combination of
2639 /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
2640 /// be preferred by clients of OpenMPIRBuilder.
2642 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
2643 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
2644 CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous = false,
2645 bool ForEndCall = false,
2646 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr);
2647
2648 /// Creates offloading entry for the provided entry ID \a ID, address \a
2649 /// Addr, size \a Size, and flags \a Flags.
2651 int32_t Flags, GlobalValue::LinkageTypes,
2652 StringRef Name = "");
2653
2654 /// The kind of errors that can occur when emitting the offload entries and
2655 /// metadata.
2661
2662 /// Callback function type
2664 std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
2665
2666 // Emit the offloading entries and metadata so that the device codegen side
2667 // can easily figure out what to emit. The produced metadata looks like
2668 // this:
2669 //
2670 // !omp_offload.info = !{!1, ...}
2671 //
2672 // We only generate metadata for function that contain target regions.
2674 EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
2675
2676public:
2677 /// Generator for __kmpc_copyprivate
2678 ///
2679 /// \param Loc The source location description.
2680 /// \param BufSize Number of elements in the buffer.
2681 /// \param CpyBuf List of pointers to data to be copied.
2682 /// \param CpyFn function to call for copying data.
2683 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
2684 ///
2685 /// \return The insertion position *after* the CopyPrivate call.
2686
2688 llvm::Value *BufSize,
2689 llvm::Value *CpyBuf,
2690 llvm::Value *CpyFn,
2691 llvm::Value *DidIt);
2692
2693 /// Generator for '#omp single'
2694 ///
2695 /// \param Loc The source location description.
2696 /// \param BodyGenCB Callback that will generate the region code.
2697 /// \param FiniCB Callback to finalize variable copies.
2698 /// \param IsNowait If false, a barrier is emitted.
2699 /// \param CPVars copyprivate variables.
2700 /// \param CPFuncs copy functions to use for each copyprivate variable.
2701 ///
2702 /// \returns The insertion position *after* the single call.
2705 FinalizeCallbackTy FiniCB, bool IsNowait,
2706 ArrayRef<llvm::Value *> CPVars = {},
2707 ArrayRef<llvm::Function *> CPFuncs = {});
2708
2709 /// Generator for '#omp master'
2710 ///
2711 /// \param Loc The insert and source location description.
2712 /// \param BodyGenCB Callback that will generate the region code.
2713 /// \param FiniCB Callback to finalize variable copies.
2714 ///
2715 /// \returns The insertion position *after* the master.
2716 LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc,
2717 BodyGenCallbackTy BodyGenCB,
2718 FinalizeCallbackTy FiniCB);
2719
2720 /// Generator for '#omp masked'
2721 ///
2722 /// \param Loc The insert and source location description.
2723 /// \param BodyGenCB Callback that will generate the region code.
2724 /// \param FiniCB Callback to finialize variable copies.
2725 ///
2726 /// \returns The insertion position *after* the masked.
2727 LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc,
2728 BodyGenCallbackTy BodyGenCB,
2729 FinalizeCallbackTy FiniCB,
2730 Value *Filter);
2731
2732 /// This function performs the scan reduction of the values updated in
2733 /// the input phase. The reduction logic needs to be emitted between input
2734 /// and scan loop returned by `CreateCanonicalScanLoops`. The following
2735 /// is the code that is generated, `buffer` and `span` are expected to be
2736 /// populated before executing the generated code.
2737 /// \code{c}
2738 /// for (int k = 0; k != ceil(log2(span)); ++k) {
2739 /// i=pow(2,k)
2740 /// for (size cnt = last_iter; cnt >= i; --cnt)
2741 /// buffer[cnt] op= buffer[cnt-i];
2742 /// }
2743 /// \endcode
2744 /// \param Loc The insert and source location description.
2745 /// \param ReductionInfos Array type containing the ReductionOps.
2746 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
2747 /// `ScanInfoInitialize`.
2748 ///
2749 /// \returns The insertion position *after* the masked.
2751 const LocationDescription &Loc,
2752 ArrayRef<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos,
2753 ScanInfo *ScanRedInfo);
2754
2755 /// This directive split and directs the control flow to input phase
2756 /// blocks or scan phase blocks based on 1. whether input loop or scan loop
2757 /// is executed, 2. whether exclusive or inclusive scan is used.
2758 ///
2759 /// \param Loc The insert and source location description.
2760 /// \param AllocaIP The IP where the temporary buffer for scan reduction
2761 // needs to be allocated.
2762 /// \param ScanVars Scan Variables.
2763 /// \param IsInclusive Whether it is an inclusive or exclusive scan.
2764 /// \param ScanRedInfo Pointer to the ScanInfo objected created using
2765 /// `ScanInfoInitialize`.
2766 ///
2767 /// \returns The insertion position *after* the scan.
2768 LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc,
2769 InsertPointTy AllocaIP,
2770 ArrayRef<llvm::Value *> ScanVars,
2771 ArrayRef<llvm::Type *> ScanVarsType,
2772 bool IsInclusive,
2773 ScanInfo *ScanRedInfo);
2774
2775 /// Generator for '#omp critical'
2776 ///
2777 /// \param Loc The insert and source location description.
2778 /// \param BodyGenCB Callback that will generate the region body code.
2779 /// \param FiniCB Callback to finalize variable copies.
2780 /// \param CriticalName name of the lock used by the critical directive
2781 /// \param HintInst Hint Instruction for hint clause associated with critical
2782 ///
2783 /// \returns The insertion position *after* the critical.
2784 LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc,
2785 BodyGenCallbackTy BodyGenCB,
2786 FinalizeCallbackTy FiniCB,
2787 StringRef CriticalName,
2788 Value *HintInst);
2789
2790 /// Generator for '#omp ordered depend (source | sink)'
2791 ///
2792 /// \param Loc The insert and source location description.
2793 /// \param AllocaIP The insertion point to be used for alloca instructions.
2794 /// \param NumLoops The number of loops in depend clause.
2795 /// \param StoreValues The value will be stored in vector address.
2796 /// \param Name The name of alloca instruction.
2797 /// \param IsDependSource If true, depend source; otherwise, depend sink.
2798 ///
2799 /// \return The insertion position *after* the ordered.
2801 createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP,
2802 unsigned NumLoops, ArrayRef<llvm::Value *> StoreValues,
2803 const Twine &Name, bool IsDependSource);
2804
2805 /// Generator for '#omp ordered [threads | simd]'
2806 ///
2807 /// \param Loc The insert and source location description.
2808 /// \param BodyGenCB Callback that will generate the region code.
2809 /// \param FiniCB Callback to finalize variable copies.
2810 /// \param IsThreads If true, with threads clause or without clause;
2811 /// otherwise, with simd clause;
2812 ///
2813 /// \returns The insertion position *after* the ordered.
2815 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
2816 FinalizeCallbackTy FiniCB, bool IsThreads);
2817
2818 /// Generator for '#omp sections'
2819 ///
2820 /// \param Loc The insert and source location description.
2821 /// \param AllocaIP The insertion points to be used for alloca instructions.
2822 /// \param SectionCBs Callbacks that will generate body of each section.
2823 /// \param PrivCB Callback to copy a given variable (think copy constructor).
2824 /// \param FiniCB Callback to finalize variable copies.
2825 /// \param IsCancellable Flag to indicate a cancellable parallel region.
2826 /// \param IsNowait If true, barrier - to ensure all sections are executed
2827 /// before moving forward will not be generated.
2828 /// \returns The insertion position *after* the sections.
2830 createSections(const LocationDescription &Loc, InsertPointTy AllocaIP,
2831 ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
2833 bool IsCancellable, bool IsNowait);
2834
2835 /// Generator for '#omp section'
2836 ///
2837 /// \param Loc The insert and source location description.
2838 /// \param BodyGenCB Callback that will generate the region body code.
2839 /// \param FiniCB Callback to finalize variable copies.
2840 /// \returns The insertion position *after* the section.
2841 LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc,
2842 BodyGenCallbackTy BodyGenCB,
2843 FinalizeCallbackTy FiniCB);
2844
2845 /// Generator for `#omp teams`
2846 ///
2847 /// \param Loc The location where the teams construct was encountered.
2848 /// \param BodyGenCB Callback that will generate the region code.
2849 /// \param NumTeamsLower Lower bound on number of teams. If this is nullptr,
2850 /// it is as if lower bound is specified as equal to upperbound. If
2851 /// this is non-null, then upperbound must also be non-null.
2852 /// \param NumTeamsUpper Upper bound on the number of teams.
2853 /// \param ThreadLimit on the number of threads that may participate in a
2854 /// contention group created by each team.
2855 /// \param IfExpr is the integer argument value of the if condition on the
2856 /// teams clause.
2857 LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc,
2858 BodyGenCallbackTy BodyGenCB,
2859 Value *NumTeamsLower = nullptr,
2860 Value *NumTeamsUpper = nullptr,
2861 Value *ThreadLimit = nullptr,
2862 Value *IfExpr = nullptr);
2863
2864 /// Generator for `#omp distribute`
2865 ///
2866 /// \param Loc The location where the distribute construct was encountered.
2867 /// \param AllocaIP The insertion points to be used for alloca instructions.
2868 /// \param BodyGenCB Callback that will generate the region code.
2869 LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc,
2870 InsertPointTy AllocaIP,
2871 BodyGenCallbackTy BodyGenCB);
2872
2873 /// Generate conditional branch and relevant BasicBlocks through which private
2874 /// threads copy the 'copyin' variables from Master copy to threadprivate
2875 /// copies.
2876 ///
2877 /// \param IP insertion block for copyin conditional
2878 /// \param MasterVarPtr a pointer to the master variable
2879 /// \param PrivateVarPtr a pointer to the threadprivate variable
2880 /// \param IntPtrTy Pointer size type
2881 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
2882 // and copy.in.end block
2883 ///
2884 /// \returns The insertion point where copying operation to be emitted.
2886 Value *MasterAddr,
2887 Value *PrivateAddr,
2888 llvm::IntegerType *IntPtrTy,
2889 bool BranchtoEnd = true);
2890
2891 /// Create a runtime call for kmpc_Alloc
2892 ///
2893 /// \param Loc The insert and source location description.
2894 /// \param Size Size of allocated memory space
2895 /// \param Allocator Allocator information instruction
2896 /// \param Name Name of call Instruction for OMP_alloc
2897 ///
2898 /// \returns CallInst to the OMP_Alloc call
2899 LLVM_ABI CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
2900 Value *Allocator, std::string Name = "");
2901
2902 /// Create a runtime call for kmpc_free
2903 ///
2904 /// \param Loc The insert and source location description.
2905 /// \param Addr Address of memory space to be freed
2906 /// \param Allocator Allocator information instruction
2907 /// \param Name Name of call Instruction for OMP_Free
2908 ///
2909 /// \returns CallInst to the OMP_Free call
2910 LLVM_ABI CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
2911 Value *Allocator, std::string Name = "");
2912
2913 /// Create a runtime call for kmpc_threadprivate_cached
2914 ///
2915 /// \param Loc The insert and source location description.
2916 /// \param Pointer pointer to data to be cached
2917 /// \param Size size of data to be cached
2918 /// \param Name Name of call Instruction for callinst
2919 ///
2920 /// \returns CallInst to the thread private cache call.
2921 LLVM_ABI CallInst *
2922 createCachedThreadPrivate(const LocationDescription &Loc,
2924 const llvm::Twine &Name = Twine(""));
2925
2926 /// Create a runtime call for __tgt_interop_init
2927 ///
2928 /// \param Loc The insert and source location description.
2929 /// \param InteropVar variable to be allocated
2930 /// \param InteropType type of interop operation
2931 /// \param Device devide to which offloading will occur
2932 /// \param NumDependences number of dependence variables
2933 /// \param DependenceAddress pointer to dependence variables
2934 /// \param HaveNowaitClause does nowait clause exist
2935 ///
2936 /// \returns CallInst to the __tgt_interop_init call
2937 LLVM_ABI CallInst *createOMPInteropInit(const LocationDescription &Loc,
2938 Value *InteropVar,
2939 omp::OMPInteropType InteropType,
2940 Value *Device, Value *NumDependences,
2941 Value *DependenceAddress,
2942 bool HaveNowaitClause);
2943
2944 /// Create a runtime call for __tgt_interop_destroy
2945 ///
2946 /// \param Loc The insert and source location description.
2947 /// \param InteropVar variable to be allocated
2948 /// \param Device devide to which offloading will occur
2949 /// \param NumDependences number of dependence variables
2950 /// \param DependenceAddress pointer to dependence variables
2951 /// \param HaveNowaitClause does nowait clause exist
2952 ///
2953 /// \returns CallInst to the __tgt_interop_destroy call
2954 LLVM_ABI CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
2955 Value *InteropVar, Value *Device,
2956 Value *NumDependences,
2957 Value *DependenceAddress,
2958 bool HaveNowaitClause);
2959
2960 /// Create a runtime call for __tgt_interop_use
2961 ///
2962 /// \param Loc The insert and source location description.
2963 /// \param InteropVar variable to be allocated
2964 /// \param Device devide to which offloading will occur
2965 /// \param NumDependences number of dependence variables
2966 /// \param DependenceAddress pointer to dependence variables
2967 /// \param HaveNowaitClause does nowait clause exist
2968 ///
2969 /// \returns CallInst to the __tgt_interop_use call
2970 LLVM_ABI CallInst *createOMPInteropUse(const LocationDescription &Loc,
2971 Value *InteropVar, Value *Device,
2972 Value *NumDependences,
2973 Value *DependenceAddress,
2974 bool HaveNowaitClause);
2975
2976 /// The `omp target` interface
2977 ///
2978 /// For more information about the usage of this interface,
2979 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
2980 ///
2981 ///{
2982
2983 /// Create a runtime call for kmpc_target_init
2984 ///
2985 /// \param Loc The insert and source location description.
2986 /// \param Attrs Structure containing the default attributes, including
2987 /// numbers of threads and teams to launch the kernel with.
2989 const LocationDescription &Loc,
2991
2992 /// Create a runtime call for kmpc_target_deinit
2993 ///
2994 /// \param Loc The insert and source location description.
2995 /// \param TeamsReductionDataSize The maximal size of all the reduction data
2996 /// for teams reduction.
2997 /// \param TeamsReductionBufferLength The number of elements (each of up to
2998 /// \p TeamsReductionDataSize size), in the teams reduction buffer.
2999 LLVM_ABI void createTargetDeinit(const LocationDescription &Loc,
3000 int32_t TeamsReductionDataSize = 0,
3001 int32_t TeamsReductionBufferLength = 1024);
3002
3003 ///}
3004
3005 /// Helpers to read/write kernel annotations from the IR.
3006 ///
3007 ///{
3008
3009 /// Read/write a bounds on threads for \p Kernel. Read will return 0 if none
3010 /// is set.
3011 LLVM_ABI static std::pair<int32_t, int32_t>
3012 readThreadBoundsForKernel(const Triple &T, Function &Kernel);
3013 LLVM_ABI static void writeThreadBoundsForKernel(const Triple &T,
3014 Function &Kernel, int32_t LB,
3015 int32_t UB);
3016
3017 /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none
3018 /// is set.
3019 LLVM_ABI static std::pair<int32_t, int32_t>
3020 readTeamBoundsForKernel(const Triple &T, Function &Kernel);
3021 LLVM_ABI static void writeTeamsForKernel(const Triple &T, Function &Kernel,
3022 int32_t LB, int32_t UB);
3023 ///}
3024
3025private:
3026 // Sets the function attributes expected for the outlined function
3027 void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn);
3028
3029 // Creates the function ID/Address for the given outlined function.
3030 // In the case of an embedded device function the address of the function is
3031 // used, in the case of a non-offload function a constant is created.
3032 Constant *createOutlinedFunctionID(Function *OutlinedFn,
3033 StringRef EntryFnIDName);
3034
3035 // Creates the region entry address for the outlined function
3036 Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
3037 StringRef EntryFnName);
3038
3039public:
3040 /// Functions used to generate a function with the given name.
3042 std::function<Expected<Function *>(StringRef FunctionName)>;
3043
3044 /// Create a unique name for the entry function using the source location
3045 /// information of the current target region. The name will be something like:
3046 ///
3047 /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
3048 ///
3049 /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
3050 /// mangled name of the function that encloses the target region and BB is the
3051 /// line number of the target region. CC is a count added when more than one
3052 /// region is located at the same location.
3053 ///
3054 /// If this target outline function is not an offload entry, we don't need to
3055 /// register it. This may happen if it is guarded by an if clause that is
3056 /// false at compile time, or no target archs have been specified.
3057 ///
3058 /// The created target region ID is used by the runtime library to identify
3059 /// the current target region, so it only has to be unique and not
3060 /// necessarily point to anything. It could be the pointer to the outlined
3061 /// function that implements the target region, but we aren't using that so
3062 /// that the compiler doesn't need to keep that, and could therefore inline
3063 /// the host function if proven worthwhile during optimization. In the other
3064 /// hand, if emitting code for the device, the ID has to be the function
3065 /// address so that it can retrieved from the offloading entry and launched
3066 /// by the runtime library. We also mark the outlined function to have
3067 /// external linkage in case we are emitting code for the device, because
3068 /// these functions will be entry points to the device.
3069 ///
3070 /// \param InfoManager The info manager keeping track of the offload entries
3071 /// \param EntryInfo The entry information about the function
3072 /// \param GenerateFunctionCallback The callback function to generate the code
3073 /// \param OutlinedFunction Pointer to the outlined function
3074 /// \param EntryFnIDName Name of the ID o be created
3076 TargetRegionEntryInfo &EntryInfo,
3077 FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry,
3078 Function *&OutlinedFn, Constant *&OutlinedFnID);
3079
3080 /// Registers the given function and sets up the attribtues of the function
3081 /// Returns the FunctionID.
3082 ///
3083 /// \param InfoManager The info manager keeping track of the offload entries
3084 /// \param EntryInfo The entry information about the function
3085 /// \param OutlinedFunction Pointer to the outlined function
3086 /// \param EntryFnName Name of the outlined function
3087 /// \param EntryFnIDName Name of the ID o be created
3090 Function *OutlinedFunction,
3091 StringRef EntryFnName, StringRef EntryFnIDName);
3092
3093 /// Type of BodyGen to use for region codegen
3094 ///
3095 /// Priv: If device pointer privatization is required, emit the body of the
3096 /// region here. It will have to be duplicated: with and without
3097 /// privatization.
3098 /// DupNoPriv: If we need device pointer privatization, we need
3099 /// to emit the body of the region with no privatization in the 'else' branch
3100 /// of the conditional.
3101 /// NoPriv: If we don't require privatization of device
3102 /// pointers, we emit the body in between the runtime calls. This avoids
3103 /// duplicating the body code.
3105
3106 /// Callback type for creating the map infos for the kernel parameters.
3107 /// \param CodeGenIP is the insertion point where code should be generated,
3108 /// if any.
3111
3112private:
3113 /// Emit the array initialization or deletion portion for user-defined mapper
3114 /// code generation. First, it evaluates whether an array section is mapped
3115 /// and whether the \a MapType instructs to delete this section. If \a IsInit
3116 /// is true, and \a MapType indicates to not delete this array, array
3117 /// initialization code is generated. If \a IsInit is false, and \a MapType
3118 /// indicates to delete this array, array deletion code is generated.
3119 void emitUDMapperArrayInitOrDel(Function *MapperFn, llvm::Value *MapperHandle,
3120 llvm::Value *Base, llvm::Value *Begin,
3121 llvm::Value *Size, llvm::Value *MapType,
3122 llvm::Value *MapName, TypeSize ElementSize,
3123 llvm::BasicBlock *ExitBB, bool IsInit);
3124
3125public:
3126 /// Emit the user-defined mapper function. The code generation follows the
3127 /// pattern in the example below.
3128 /// \code
3129 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
3130 /// void *base, void *begin,
3131 /// int64_t size, int64_t type,
3132 /// void *name = nullptr) {
3133 /// // Allocate space for an array section first or add a base/begin for
3134 /// // pointer dereference.
3135 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
3136 /// !maptype.IsDelete)
3137 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
3138 /// size*sizeof(Ty), clearToFromMember(type));
3139 /// // Map members.
3140 /// for (unsigned i = 0; i < size; i++) {
3141 /// // For each component specified by this mapper:
3142 /// for (auto c : begin[i]->all_components) {
3143 /// if (c.hasMapper())
3144 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin,
3145 /// c.arg_size,
3146 /// c.arg_type, c.arg_name);
3147 /// else
3148 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
3149 /// c.arg_begin, c.arg_size, c.arg_type,
3150 /// c.arg_name);
3151 /// }
3152 /// }
3153 /// // Delete the array section.
3154 /// if (size > 1 && maptype.IsDelete)
3155 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
3156 /// size*sizeof(Ty), clearToFromMember(type));
3157 /// }
3158 /// \endcode
3159 ///
3160 /// \param PrivAndGenMapInfoCB Callback that privatizes code and populates the
3161 /// MapInfos and returns.
3162 /// \param ElemTy DeclareMapper element type.
3163 /// \param FuncName Optional param to specify mapper function name.
3164 /// \param CustomMapperCB Optional callback to generate code related to
3165 /// custom mappers.
3168 InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)>
3169 PrivAndGenMapInfoCB,
3170 llvm::Type *ElemTy, StringRef FuncName,
3171 CustomMapperCallbackTy CustomMapperCB);
3172
3173 /// Generator for '#omp target data'
3174 ///
3175 /// \param Loc The location where the target data construct was encountered.
3176 /// \param AllocaIP The insertion points to be used for alloca instructions.
3177 /// \param CodeGenIP The insertion point at which the target directive code
3178 /// should be placed.
3179 /// \param IsBegin If true then emits begin mapper call otherwise emits
3180 /// end mapper call.
3181 /// \param DeviceID Stores the DeviceID from the device clause.
3182 /// \param IfCond Value which corresponds to the if clause condition.
3183 /// \param Info Stores all information realted to the Target Data directive.
3184 /// \param GenMapInfoCB Callback that populates the MapInfos and returns.
3185 /// \param CustomMapperCB Callback to generate code related to
3186 /// custom mappers.
3187 /// \param BodyGenCB Optional Callback to generate the region code.
3188 /// \param DeviceAddrCB Optional callback to generate code related to
3189 /// use_device_ptr and use_device_addr.
3191 const LocationDescription &Loc, InsertPointTy AllocaIP,
3192 InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
3194 CustomMapperCallbackTy CustomMapperCB,
3195 omp::RuntimeFunction *MapperFunc = nullptr,
3197 BodyGenTy BodyGenType)>
3198 BodyGenCB = nullptr,
3199 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
3200 Value *SrcLocInfo = nullptr);
3201
3203 InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
3204
3206 Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP,
3207 InsertPointTy CodeGenIP)>;
3208
3209 /// Generator for '#omp target'
3210 ///
3211 /// \param Loc where the target data construct was encountered.
3212 /// \param IsOffloadEntry whether it is an offload entry.
3213 /// \param CodeGenIP The insertion point where the call to the outlined
3214 /// function should be emitted.
3215 /// \param Info Stores all information realted to the Target directive.
3216 /// \param EntryInfo The entry information about the function.
3217 /// \param DefaultAttrs Structure containing the default attributes, including
3218 /// numbers of threads and teams to launch the kernel with.
3219 /// \param RuntimeAttrs Structure containing the runtime numbers of threads
3220 /// and teams to launch the kernel with.
3221 /// \param IfCond value of the `if` clause.
3222 /// \param Inputs The input values to the region that will be passed.
3223 /// as arguments to the outlined function.
3224 /// \param BodyGenCB Callback that will generate the region code.
3225 /// \param ArgAccessorFuncCB Callback that will generate accessors
3226 /// instructions for passed in target arguments where neccessary
3227 /// \param CustomMapperCB Callback to generate code related to
3228 /// custom mappers.
3229 /// \param Dependencies A vector of DependData objects that carry
3230 /// dependency information as passed in the depend clause
3231 /// \param HasNowait Whether the target construct has a `nowait` clause or
3232 /// not.
3234 const LocationDescription &Loc, bool IsOffloadEntry,
3237 TargetRegionEntryInfo &EntryInfo,
3238 const TargetKernelDefaultAttrs &DefaultAttrs,
3239 const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond,
3240 SmallVectorImpl<Value *> &Inputs, GenMapInfoCallbackTy GenMapInfoCB,
3241 TargetBodyGenCallbackTy BodyGenCB,
3242 TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
3243 CustomMapperCallbackTy CustomMapperCB,
3244 const SmallVector<DependData> &Dependencies, bool HasNowait = false);
3245
3246 /// Returns __kmpc_for_static_init_* runtime function for the specified
3247 /// size \a IVSize and sign \a IVSigned. Will create a distribute call
3248 /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
3250 bool IVSigned,
3251 bool IsGPUDistribute);
3252
3253 /// Returns __kmpc_dispatch_init_* runtime function for the specified
3254 /// size \a IVSize and sign \a IVSigned.
3256 bool IVSigned);
3257
3258 /// Returns __kmpc_dispatch_next_* runtime function for the specified
3259 /// size \a IVSize and sign \a IVSigned.
3261 bool IVSigned);
3262
3263 /// Returns __kmpc_dispatch_fini_* runtime function for the specified
3264 /// size \a IVSize and sign \a IVSigned.
3266 bool IVSigned);
3267
3268 /// Returns __kmpc_dispatch_deinit runtime function.
3270
3271 /// Declarations for LLVM-IR types (simple, array, function and structure) are
3272 /// generated below. Their names are defined and used in OpenMPKinds.def. Here
3273 /// we provide the declarations, the initializeTypes function will provide the
3274 /// values.
3275 ///
3276 ///{
3277#define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
3278#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
3279 ArrayType *VarName##Ty = nullptr; \
3280 PointerType *VarName##PtrTy = nullptr;
3281#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
3282 FunctionType *VarName = nullptr; \
3283 PointerType *VarName##Ptr = nullptr;
3284#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
3285 StructType *VarName = nullptr; \
3286 PointerType *VarName##Ptr = nullptr;
3287#include "llvm/Frontend/OpenMP/OMPKinds.def"
3288
3289 ///}
3290
3291private:
3292 /// Create all simple and struct types exposed by the runtime and remember
3293 /// the llvm::PointerTypes of them for easy access later.
3294 void initializeTypes(Module &M);
3295
3296 /// Common interface for generating entry calls for OMP Directives.
3297 /// if the directive has a region/body, It will set the insertion
3298 /// point to the body
3299 ///
3300 /// \param OMPD Directive to generate entry blocks for
3301 /// \param EntryCall Call to the entry OMP Runtime Function
3302 /// \param ExitBB block where the region ends.
3303 /// \param Conditional indicate if the entry call result will be used
3304 /// to evaluate a conditional of whether a thread will execute
3305 /// body code or not.
3306 ///
3307 /// \return The insertion position in exit block
3308 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
3309 BasicBlock *ExitBB,
3310 bool Conditional = false);
3311
3312 /// Common interface to finalize the region
3313 ///
3314 /// \param OMPD Directive to generate exiting code for
3315 /// \param FinIP Insertion point for emitting Finalization code and exit call
3316 /// \param ExitCall Call to the ending OMP Runtime Function
3317 /// \param HasFinalize indicate if the directive will require finalization
3318 /// and has a finalization callback in the stack that
3319 /// should be called.
3320 ///
3321 /// \return The insertion position in exit block
3322 InsertPointOrErrorTy emitCommonDirectiveExit(omp::Directive OMPD,
3323 InsertPointTy FinIP,
3324 Instruction *ExitCall,
3325 bool HasFinalize = true);
3326
3327 /// Common Interface to generate OMP inlined regions
3328 ///
3329 /// \param OMPD Directive to generate inlined region for
3330 /// \param EntryCall Call to the entry OMP Runtime Function
3331 /// \param ExitCall Call to the ending OMP Runtime Function
3332 /// \param BodyGenCB Body code generation callback.
3333 /// \param FiniCB Finalization Callback. Will be called when finalizing region
3334 /// \param Conditional indicate if the entry call result will be used
3335 /// to evaluate a conditional of whether a thread will execute
3336 /// body code or not.
3337 /// \param HasFinalize indicate if the directive will require finalization
3338 /// and has a finalization callback in the stack that
3339 /// should be called.
3340 /// \param IsCancellable if HasFinalize is set to true, indicate if the
3341 /// the directive should be cancellable.
3342 /// \return The insertion point after the region
3344 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
3345 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
3346 FinalizeCallbackTy FiniCB, bool Conditional = false,
3347 bool HasFinalize = true, bool IsCancellable = false);
3348
3349 /// Get the platform-specific name separator.
3350 /// \param Parts different parts of the final name that needs separation
3351 /// \param FirstSeparator First separator used between the initial two
3352 /// parts of the name.
3353 /// \param Separator separator used between all of the rest consecutive
3354 /// parts of the name
3355 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
3356 StringRef FirstSeparator,
3357 StringRef Separator);
3358
3359 /// Returns corresponding lock object for the specified critical region
3360 /// name. If the lock object does not exist it is created, otherwise the
3361 /// reference to the existing copy is returned.
3362 /// \param CriticalName Name of the critical region.
3363 ///
3364 Value *getOMPCriticalRegionLock(StringRef CriticalName);
3365
3366 /// Callback type for Atomic Expression update
3367 /// ex:
3368 /// \code{.cpp}
3369 /// unsigned x = 0;
3370 /// #pragma omp atomic update
3371 /// x = Expr(x_old); //Expr() is any legal operation
3372 /// \endcode
3373 ///
3374 /// \param XOld the value of the atomic memory address to use for update
3375 /// \param IRB reference to the IRBuilder to use
3376 ///
3377 /// \returns Value to update X to.
3378 using AtomicUpdateCallbackTy =
3379 const function_ref<Expected<Value *>(Value *XOld, IRBuilder<> &IRB)>;
3380
3381private:
3382 enum AtomicKind { Read, Write, Update, Capture, Compare };
3383
3384 /// Determine whether to emit flush or not
3385 ///
3386 /// \param Loc The insert and source location description.
3387 /// \param AO The required atomic ordering
3388 /// \param AK The OpenMP atomic operation kind used.
3389 ///
3390 /// \returns wether a flush was emitted or not
3391 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
3392 AtomicOrdering AO, AtomicKind AK);
3393
3394 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
3395 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
3396 /// Only Scalar data types.
3397 ///
3398 /// \param AllocaIP The insertion point to be used for alloca
3399 /// instructions.
3400 /// \param X The target atomic pointer to be updated
3401 /// \param XElemTy The element type of the atomic pointer.
3402 /// \param Expr The value to update X with.
3403 /// \param AO Atomic ordering of the generated atomic
3404 /// instructions.
3405 /// \param RMWOp The binary operation used for update. If
3406 /// operation is not supported by atomicRMW,
3407 /// or belong to {FADD, FSUB, BAD_BINOP}.
3408 /// Then a `cmpExch` based atomic will be generated.
3409 /// \param UpdateOp Code generator for complex expressions that cannot be
3410 /// expressed through atomicrmw instruction.
3411 /// \param VolatileX true if \a X volatile?
3412 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
3413 /// update expression, false otherwise.
3414 /// (e.g. true for X = X BinOp Expr)
3415 ///
3416 /// \returns A pair of the old value of X before the update, and the value
3417 /// used for the update.
3418 Expected<std::pair<Value *, Value *>>
3419 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
3421 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
3422 bool IsXBinopExpr, bool IsIgnoreDenormalMode,
3423 bool IsFineGrainedMemory, bool IsRemoteMemory);
3424
3425 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
3426 ///
3427 /// \Return The instruction
3428 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
3429 AtomicRMWInst::BinOp RMWOp);
3430
3431 bool IsFinalized;
3432
3433public:
3434 /// a struct to pack relevant information while generating atomic Ops
3436 Value *Var = nullptr;
3437 Type *ElemTy = nullptr;
3438 bool IsSigned = false;
3439 bool IsVolatile = false;
3440 };
3441
3442 /// Emit atomic Read for : V = X --- Only Scalar data types.
3443 ///
3444 /// \param Loc The insert and source location description.
3445 /// \param X The target pointer to be atomically read
3446 /// \param V Memory address where to store atomically read
3447 /// value
3448 /// \param AO Atomic ordering of the generated atomic
3449 /// instructions.
3450 /// \param AllocaIP Insert point for allocas
3451 //
3452 /// \return Insertion point after generated atomic read IR.
3455 AtomicOrdering AO,
3456 InsertPointTy AllocaIP);
3457
3458 /// Emit atomic write for : X = Expr --- Only Scalar data types.
3459 ///
3460 /// \param Loc The insert and source location description.
3461 /// \param X The target pointer to be atomically written to
3462 /// \param Expr The value to store.
3463 /// \param AO Atomic ordering of the generated atomic
3464 /// instructions.
3465 /// \param AllocaIP Insert point for allocas
3466 ///
3467 /// \return Insertion point after generated atomic Write IR.
3469 AtomicOpValue &X, Value *Expr,
3470 AtomicOrdering AO,
3471 InsertPointTy AllocaIP);
3472
3473 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
3474 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
3475 /// Only Scalar data types.
3476 ///
3477 /// \param Loc The insert and source location description.
3478 /// \param AllocaIP The insertion point to be used for alloca instructions.
3479 /// \param X The target atomic pointer to be updated
3480 /// \param Expr The value to update X with.
3481 /// \param AO Atomic ordering of the generated atomic instructions.
3482 /// \param RMWOp The binary operation used for update. If operation
3483 /// is not supported by atomicRMW, or belong to
3484 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
3485 /// atomic will be generated.
3486 /// \param UpdateOp Code generator for complex expressions that cannot be
3487 /// expressed through atomicrmw instruction.
3488 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
3489 /// update expression, false otherwise.
3490 /// (e.g. true for X = X BinOp Expr)
3491 ///
3492 /// \return Insertion point after generated atomic update IR.
3494 const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
3495 Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
3496 AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr,
3497 bool IsIgnoreDenormalMode = false, bool IsFineGrainedMemory = false,
3498 bool IsRemoteMemory = false);
3499
3500 /// Emit atomic update for constructs: --- Only Scalar data types
3501 /// V = X; X = X BinOp Expr ,
3502 /// X = X BinOp Expr; V = X,
3503 /// V = X; X = Expr BinOp X,
3504 /// X = Expr BinOp X; V = X,
3505 /// V = X; X = UpdateOp(X),
3506 /// X = UpdateOp(X); V = X,
3507 ///
3508 /// \param Loc The insert and source location description.
3509 /// \param AllocaIP The insertion point to be used for alloca instructions.
3510 /// \param X The target atomic pointer to be updated
3511 /// \param V Memory address where to store captured value
3512 /// \param Expr The value to update X with.
3513 /// \param AO Atomic ordering of the generated atomic instructions
3514 /// \param RMWOp The binary operation used for update. If
3515 /// operation is not supported by atomicRMW, or belong to
3516 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
3517 /// atomic will be generated.
3518 /// \param UpdateOp Code generator for complex expressions that cannot be
3519 /// expressed through atomicrmw instruction.
3520 /// \param UpdateExpr true if X is an in place update of the form
3521 /// X = X BinOp Expr or X = Expr BinOp X
3522 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
3523 /// update expression, false otherwise.
3524 /// (e.g. true for X = X BinOp Expr)
3525 /// \param IsPostfixUpdate true if original value of 'x' must be stored in
3526 /// 'v', not an updated one.
3527 ///
3528 /// \return Insertion point after generated atomic capture IR.
3530 const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
3531 AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
3533 bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr,
3534 bool IsIgnoreDenormalMode = false, bool IsFineGrainedMemory = false,
3535 bool IsRemoteMemory = false);
3536
3537 /// Emit atomic compare for constructs: --- Only scalar data types
3538 /// cond-expr-stmt:
3539 /// x = x ordop expr ? expr : x;
3540 /// x = expr ordop x ? expr : x;
3541 /// x = x == e ? d : x;
3542 /// x = e == x ? d : x; (this one is not in the spec)
3543 /// cond-update-stmt:
3544 /// if (x ordop expr) { x = expr; }
3545 /// if (expr ordop x) { x = expr; }
3546 /// if (x == e) { x = d; }
3547 /// if (e == x) { x = d; } (this one is not in the spec)
3548 /// conditional-update-capture-atomic:
3549 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
3550 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
3551 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
3552 /// IsFailOnly=true)
3553 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
3554 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
3555 /// IsFailOnly=true)
3556 ///
3557 /// \param Loc The insert and source location description.
3558 /// \param X The target atomic pointer to be updated.
3559 /// \param V Memory address where to store captured value (for
3560 /// compare capture only).
3561 /// \param R Memory address where to store comparison result
3562 /// (for compare capture with '==' only).
3563 /// \param E The expected value ('e') for forms that use an
3564 /// equality comparison or an expression ('expr') for
3565 /// forms that use 'ordop' (logically an atomic maximum or
3566 /// minimum).
3567 /// \param D The desired value for forms that use an equality
3568 /// comparison. If forms that use 'ordop', it should be
3569 /// \p nullptr.
3570 /// \param AO Atomic ordering of the generated atomic instructions.
3571 /// \param Op Atomic compare operation. It can only be ==, <, or >.
3572 /// \param IsXBinopExpr True if the conditional statement is in the form where
3573 /// x is on LHS. It only matters for < or >.
3574 /// \param IsPostfixUpdate True if original value of 'x' must be stored in
3575 /// 'v', not an updated one (for compare capture
3576 /// only).
3577 /// \param IsFailOnly True if the original value of 'x' is stored to 'v'
3578 /// only when the comparison fails. This is only valid for
3579 /// the case the comparison is '=='.
3580 ///
3581 /// \return Insertion point after generated atomic capture IR.
3586 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
3590 omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
3591 bool IsFailOnly, AtomicOrdering Failure);
3592
3593 /// Create the control flow structure of a canonical OpenMP loop.
3594 ///
3595 /// The emitted loop will be disconnected, i.e. no edge to the loop's
3596 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
3597 /// IRBuilder location is not preserved.
3598 ///
3599 /// \param DL DebugLoc used for the instructions in the skeleton.
3600 /// \param TripCount Value to be used for the trip count.
3601 /// \param F Function in which to insert the BasicBlocks.
3602 /// \param PreInsertBefore Where to insert BBs that execute before the body,
3603 /// typically the body itself.
3604 /// \param PostInsertBefore Where to insert BBs that execute after the body.
3605 /// \param Name Base name used to derive BB
3606 /// and instruction names.
3607 ///
3608 /// \returns The CanonicalLoopInfo that represents the emitted loop.
3610 Function *F,
3611 BasicBlock *PreInsertBefore,
3612 BasicBlock *PostInsertBefore,
3613 const Twine &Name = {});
3614 /// OMP Offload Info Metadata name string
3615 const std::string ompOffloadInfoName = "omp_offload.info";
3616
3617 /// Loads all the offload entries information from the host IR
3618 /// metadata. This function is only meant to be used with device code
3619 /// generation.
3620 ///
3621 /// \param M Module to load Metadata info from. Module passed maybe
3622 /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
3624
3625 /// Loads all the offload entries information from the host IR
3626 /// metadata read from the file passed in as the HostFilePath argument. This
3627 /// function is only meant to be used with device code generation.
3628 ///
3629 /// \param HostFilePath The path to the host IR file,
3630 /// used to load in offload metadata for the device, allowing host and device
3631 /// to maintain the same metadata mapping.
3632 LLVM_ABI void loadOffloadInfoMetadata(StringRef HostFilePath);
3633
3634 /// Gets (if variable with the given name already exist) or creates
3635 /// internal global variable with the specified Name. The created variable has
3636 /// linkage CommonLinkage by default and is initialized by null value.
3637 /// \param Ty Type of the global variable. If it is exist already the type
3638 /// must be the same.
3639 /// \param Name Name of the variable.
3642 unsigned AddressSpace = 0);
3643};
3644
3645/// Class to represented the control flow structure of an OpenMP canonical loop.
3646///
3647/// The control-flow structure is standardized for easy consumption by
3648/// directives associated with loops. For instance, the worksharing-loop
3649/// construct may change this control flow such that each loop iteration is
3650/// executed on only one thread. The constraints of a canonical loop in brief
3651/// are:
3652///
3653/// * The number of loop iterations must have been computed before entering the
3654/// loop.
3655///
3656/// * Has an (unsigned) logical induction variable that starts at zero and
3657/// increments by one.
3658///
3659/// * The loop's CFG itself has no side-effects. The OpenMP specification
3660/// itself allows side-effects, but the order in which they happen, including
3661/// how often or whether at all, is unspecified. We expect that the frontend
3662/// will emit those side-effect instructions somewhere (e.g. before the loop)
3663/// such that the CanonicalLoopInfo itself can be side-effect free.
3664///
3665/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
3666/// execution of a loop body that satifies these constraints. It does NOT
3667/// represent arbitrary SESE regions that happen to contain a loop. Do not use
3668/// CanonicalLoopInfo for such purposes.
3669///
3670/// The control flow can be described as follows:
3671///
3672/// Preheader
3673/// |
3674/// /-> Header
3675/// | |
3676/// | Cond---\
3677/// | | |
3678/// | Body |
3679/// | | | |
3680/// | <...> |
3681/// | | | |
3682/// \--Latch |
3683/// |
3684/// Exit
3685/// |
3686/// After
3687///
3688/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
3689/// including) and end at AfterIP (at the After's first instruction, excluding).
3690/// That is, instructions in the Preheader and After blocks (except the
3691/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
3692/// side-effects. Typically, the Preheader is used to compute the loop's trip
3693/// count. The instructions from BodyIP (at the Body block's first instruction,
3694/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
3695/// control and thus can have side-effects. The body block is the single entry
3696/// point into the loop body, which may contain arbitrary control flow as long
3697/// as all control paths eventually branch to the Latch block.
3698///
3699/// TODO: Consider adding another standardized BasicBlock between Body CFG and
3700/// Latch to guarantee that there is only a single edge to the latch. It would
3701/// make loop transformations easier to not needing to consider multiple
3702/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
3703/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
3704/// executes after each body iteration.
3705///
3706/// There must be no loop-carried dependencies through llvm::Values. This is
3707/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
3708/// for the induction variable.
3709///
3710/// All code in Header, Cond, Latch and Exit (plus the terminator of the
3711/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
3712/// by assertOK(). They are expected to not be modified unless explicitly
3713/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
3714/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
3715/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
3716/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
3717/// anymore as its underlying control flow may not exist anymore.
3718/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
3719/// may also return a new CanonicalLoopInfo that can be passed to other
3720/// loop-associated construct implementing methods. These loop-transforming
3721/// methods may either create a new CanonicalLoopInfo usually using
3722/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
3723/// modify one of the input CanonicalLoopInfo and return it as representing the
3724/// modified loop. What is done is an implementation detail of
3725/// transformation-implementing method and callers should always assume that the
3726/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
3727/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
3728/// created by createCanonicalLoop, such that transforming methods do not have
3729/// to special case where the CanonicalLoopInfo originated from.
3730///
3731/// Generally, methods consuming CanonicalLoopInfo do not need an
3732/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
3733/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
3734/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
3735/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
3736/// any InsertPoint in the Preheader, After or Block can still be used after
3737/// calling such a method.
3738///
3739/// TODO: Provide mechanisms for exception handling and cancellation points.
3740///
3741/// Defined outside OpenMPIRBuilder because nested classes cannot be
3742/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
3744 friend class OpenMPIRBuilder;
3745
3746private:
3747 BasicBlock *Header = nullptr;
3748 BasicBlock *Cond = nullptr;
3749 BasicBlock *Latch = nullptr;
3750 BasicBlock *Exit = nullptr;
3751
3752 // Hold the MLIR value for the `lastiter` of the canonical loop.
3753 Value *LastIter = nullptr;
3754
3755 /// Add the control blocks of this loop to \p BBs.
3756 ///
3757 /// This does not include any block from the body, including the one returned
3758 /// by getBody().
3759 ///
3760 /// FIXME: This currently includes the Preheader and After blocks even though
3761 /// their content is (mostly) not under CanonicalLoopInfo's control.
3762 /// Re-evaluated whether this makes sense.
3763 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
3764
3765 /// Sets the number of loop iterations to the given value. This value must be
3766 /// valid in the condition block (i.e., defined in the preheader) and is
3767 /// interpreted as an unsigned integer.
3768 void setTripCount(Value *TripCount);
3769
3770 /// Replace all uses of the canonical induction variable in the loop body with
3771 /// a new one.
3772 ///
3773 /// The intended use case is to update the induction variable for an updated
3774 /// iteration space such that it can stay normalized in the 0...tripcount-1
3775 /// range.
3776 ///
3777 /// The \p Updater is called with the (presumable updated) current normalized
3778 /// induction variable and is expected to return the value that uses of the
3779 /// pre-updated induction values should use instead, typically dependent on
3780 /// the new induction variable. This is a lambda (instead of e.g. just passing
3781 /// the new value) to be able to distinguish the uses of the pre-updated
3782 /// induction variable and uses of the induction varible to compute the
3783 /// updated induction variable value.
3784 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
3785
3786public:
3787 /// Sets the last iteration variable for this loop.
3788 void setLastIter(Value *IterVar) { LastIter = std::move(IterVar); }
3789
3790 /// Returns the last iteration variable for this loop.
3791 /// Certain use-cases (like translation of linear clause) may access
3792 /// this variable even after a loop transformation. Hence, do not guard
3793 /// this getter function by `isValid`. It is the responsibility of the
3794 /// callee to ensure this functionality is not invoked by a non-outlined
3795 /// CanonicalLoopInfo object (in which case, `setLastIter` will never be
3796 /// invoked and `LastIter` will be by default `nullptr`).
3797 Value *getLastIter() { return LastIter; }
3798
3799 /// Returns whether this object currently represents the IR of a loop. If
3800 /// returning false, it may have been consumed by a loop transformation or not
3801 /// been intialized. Do not use in this case;
3802 bool isValid() const { return Header; }
3803
3804 /// The preheader ensures that there is only a single edge entering the loop.
3805 /// Code that must be execute before any loop iteration can be emitted here,
3806 /// such as computing the loop trip count and begin lifetime markers. Code in
3807 /// the preheader is not considered part of the canonical loop.
3809
3810 /// The header is the entry for each iteration. In the canonical control flow,
3811 /// it only contains the PHINode for the induction variable.
3813 assert(isValid() && "Requires a valid canonical loop");
3814 return Header;
3815 }
3816
3817 /// The condition block computes whether there is another loop iteration. If
3818 /// yes, branches to the body; otherwise to the exit block.
3820 assert(isValid() && "Requires a valid canonical loop");
3821 return Cond;
3822 }
3823
3824 /// The body block is the single entry for a loop iteration and not controlled
3825 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
3826 /// eventually branch to the \p Latch block.
3828 assert(isValid() && "Requires a valid canonical loop");
3829 return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
3830 }
3831
3832 /// Reaching the latch indicates the end of the loop body code. In the
3833 /// canonical control flow, it only contains the increment of the induction
3834 /// variable.
3836 assert(isValid() && "Requires a valid canonical loop");
3837 return Latch;
3838 }
3839
3840 /// Reaching the exit indicates no more iterations are being executed.
3842 assert(isValid() && "Requires a valid canonical loop");
3843 return Exit;
3844 }
3845
3846 /// The after block is intended for clean-up code such as lifetime end
3847 /// markers. It is separate from the exit block to ensure, analogous to the
3848 /// preheader, it having just a single entry edge and being free from PHI
3849 /// nodes should there be multiple loop exits (such as from break
3850 /// statements/cancellations).
3852 assert(isValid() && "Requires a valid canonical loop");
3853 return Exit->getSingleSuccessor();
3854 }
3855
3856 /// Returns the llvm::Value containing the number of loop iterations. It must
3857 /// be valid in the preheader and always interpreted as an unsigned integer of
3858 /// any bit-width.
3860 assert(isValid() && "Requires a valid canonical loop");
3861 Instruction *CmpI = &Cond->front();
3862 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
3863 return CmpI->getOperand(1);
3864 }
3865
3866 /// Returns the instruction representing the current logical induction
3867 /// variable. Always unsigned, always starting at 0 with an increment of one.
3869 assert(isValid() && "Requires a valid canonical loop");
3870 Instruction *IndVarPHI = &Header->front();
3871 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
3872 return IndVarPHI;
3873 }
3874
3875 /// Return the type of the induction variable (and the trip count).
3877 assert(isValid() && "Requires a valid canonical loop");
3878 return getIndVar()->getType();
3879 }
3880
3881 /// Return the insertion point for user code before the loop.
3883 assert(isValid() && "Requires a valid canonical loop");
3884 BasicBlock *Preheader = getPreheader();
3885 return {Preheader, std::prev(Preheader->end())};
3886 };
3887
3888 /// Return the insertion point for user code in the body.
3890 assert(isValid() && "Requires a valid canonical loop");
3891 BasicBlock *Body = getBody();
3892 return {Body, Body->begin()};
3893 };
3894
3895 /// Return the insertion point for user code after the loop.
3897 assert(isValid() && "Requires a valid canonical loop");
3898 BasicBlock *After = getAfter();
3899 return {After, After->begin()};
3900 };
3901
3903 assert(isValid() && "Requires a valid canonical loop");
3904 return Header->getParent();
3905 }
3906
3907 /// Consistency self-check.
3908 LLVM_ABI void assertOK() const;
3909
3910 /// Invalidate this loop. That is, the underlying IR does not fulfill the
3911 /// requirements of an OpenMP canonical loop anymore.
3912 LLVM_ABI void invalidate();
3913};
3914
3915/// ScanInfo holds the information to assist in lowering of Scan reduction.
3916/// Before lowering, the body of the for loop specifying scan reduction is
3917/// expected to have the following structure
3918///
3919/// Loop Body Entry
3920/// |
3921/// Code before the scan directive
3922/// |
3923/// Scan Directive
3924/// |
3925/// Code after the scan directive
3926/// |
3927/// Loop Body Exit
3928/// When `createCanonicalScanLoops` is executed, the bodyGen callback of it
3929/// transforms the body to:
3930///
3931/// Loop Body Entry
3932/// |
3933/// OMPScanDispatch
3934///
3935/// OMPBeforeScanBlock
3936/// |
3937/// OMPScanLoopExit
3938/// |
3939/// Loop Body Exit
3940///
3941/// The insert point is updated to the first insert point of OMPBeforeScanBlock.
3942/// It dominates the control flow of code generated until
3943/// scan directive is encountered and OMPAfterScanBlock dominates the
3944/// control flow of code generated after scan is encountered. The successor
3945/// of OMPScanDispatch can be OMPBeforeScanBlock or OMPAfterScanBlock based
3946/// on 1.whether it is in Input phase or Scan Phase , 2. whether it is an
3947/// exclusive or inclusive scan. This jump is added when `createScan` is
3948/// executed. If input loop is being generated, if it is inclusive scan,
3949/// `OMPAfterScanBlock` succeeds `OMPScanDispatch` , if exclusive,
3950/// `OMPBeforeScanBlock` succeeds `OMPDispatch` and vice versa for scan loop. At
3951/// the end of the input loop, temporary buffer is populated and at the
3952/// beginning of the scan loop, temporary buffer is read. After scan directive
3953/// is encountered, insertion point is updated to `OMPAfterScanBlock` as it is
3954/// expected to dominate the code after the scan directive. Both Before and
3955/// After scan blocks are succeeded by `OMPScanLoopExit`.
3956/// Temporary buffer allocations are done in `ScanLoopInit` block before the
3957/// lowering of for-loop. The results are copied back to reduction variable in
3958/// `ScanLoopFinish` block.
3960public:
3961 /// Dominates the body of the loop before scan directive
3963
3964 /// Dominates the body of the loop before scan directive
3966
3967 /// Controls the flow to before or after scan blocks
3969
3970 /// Exit block of loop body
3972
3973 /// Block before loop body where scan initializations are done
3975
3976 /// Block after loop body where scan finalizations are done
3978
3979 /// If true, it indicates Input phase is lowered; else it indicates
3980 /// ScanPhase is lowered
3981 bool OMPFirstScanLoop = false;
3982
3983 /// Maps the private reduction variable to the pointer of the temporary
3984 /// buffer
3986
3987 /// Keeps track of value of iteration variable for input/scan loop to be
3988 /// used for Scan directive lowering
3990
3991 /// Stores the span of canonical loop being lowered to be used for temporary
3992 /// buffer allocation or Finalization.
3994
3997 }
3998
3999 ~ScanInfo() { delete (ScanBuffPtrs); }
4000};
4001
4002} // end namespace llvm
4003
4004#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
arc branch finalize
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the BumpPtrAllocator interface.
BlockVerifier::State From
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_ABI
Definition: Compiler.h:213
DXIL Finalize Linkage
uint64_t Addr
std::string Name
uint32_t Index
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Hardware Loops
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file defines constans and helpers used when dealing with OpenMP.
Provides definitions for Target specific Grid Values.
const SmallVectorImpl< MachineOperand > & Cond
Basic Register Allocator
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
std::unordered_set< BasicBlock * > BlockSet
Value * RHS
Value * LHS
an instruction to allocate memory on the stack
Definition: Instructions.h:64
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Align AtomicAlign
Definition: Atomic.h:23
bool UseLibcall
Definition: Atomic.h:25
IRBuilderBase * Builder
Definition: Atomic.h:19
uint64_t AtomicSizeInBits
Definition: Atomic.h:21
uint64_t ValueSizeInBits
Definition: Atomic.h:22
IRBuilderBase::InsertPoint AllocaIP
Definition: Atomic.h:26
Align ValueAlign
Definition: Atomic.h:24
Type * Ty
Definition: Atomic.h:20
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:721
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
Value * getLastIter()
Returns the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
Lightweight error class with error context and mandatory checking.
Definition: Error.h:159
Tagged union holding either a T or a Error.
Definition: Error.h:485
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:52
InsertPoint - A saved insertion point.
Definition: IRBuilder.h:291
BasicBlock * getBlock() const
Definition: IRBuilder.h:306
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1830
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:247
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:311
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:323
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
Class to represent integer types.
Definition: DerivedTypes.h:42
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:570
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
OffloadEntryInfoDeviceGlobalVar(unsigned Order, OMPTargetGlobalVarEntryKind Flags)
Definition: OMPIRBuilder.h:411
OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage, const std::string &VarName)
Definition: OMPIRBuilder.h:414
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:429
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:334
OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Definition: OMPIRBuilder.h:321
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
Definition: OMPIRBuilder.h:255
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
Definition: OMPIRBuilder.h:257
OffloadingEntryInfoKinds getKind() const
Definition: OMPIRBuilder.h:273
OffloadEntryInfo(OffloadingEntryInfoKinds Kind)
Definition: OMPIRBuilder.h:264
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:281
OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, uint32_t Flags)
Definition: OMPIRBuilder.h:265
Class that manages information about offload code regions and data.
Definition: OMPIRBuilder.h:243
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
Definition: OMPIRBuilder.h:449
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
Definition: OMPIRBuilder.h:390
@ OMPTargetDeviceClauseNoHost
The target is marked for non-host devices.
Definition: OMPIRBuilder.h:394
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
Definition: OMPIRBuilder.h:392
@ OMPTargetDeviceClauseNone
The target is marked as having no clause.
Definition: OMPIRBuilder.h:398
@ OMPTargetDeviceClauseHost
The target is marked for host devices.
Definition: OMPIRBuilder.h:396
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
Definition: OMPIRBuilder.h:308
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
Definition: OMPIRBuilder.h:310
OffloadEntriesInfoManager(OpenMPIRBuilder *builder)
Definition: OMPIRBuilder.h:301
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
Definition: OMPIRBuilder.h:299
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
Definition: OMPIRBuilder.h:370
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
Definition: OMPIRBuilder.h:376
@ OMPTargetGlobalVarEntryNone
Mark the entry as having no declare target entry kind.
Definition: OMPIRBuilder.h:378
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
Definition: OMPIRBuilder.h:382
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
Definition: OMPIRBuilder.h:380
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
Definition: OMPIRBuilder.h:374
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
Definition: OMPIRBuilder.h:372
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
Definition: OMPIRBuilder.h:361
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
Definition: OMPIRBuilder.h:444
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
Captures attributes that affect generating LLVM-IR using the OpenMPIRBuilder and related classes.
Definition: OMPIRBuilder.h:98
void setIsGPU(bool Value)
Definition: OMPIRBuilder.h:195
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
Definition: OMPIRBuilder.h:104
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
Definition: OMPIRBuilder.h:114
void setGridValue(omp::GV G)
Definition: OMPIRBuilder.h:200
std::optional< StringRef > FirstSeparator
First separator used between the initial two parts of a name.
Definition: OMPIRBuilder.h:123
StringRef separator() const
Definition: OMPIRBuilder.h:186
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
void setFirstSeparator(StringRef FS)
Definition: OMPIRBuilder.h:198
StringRef firstSeparator() const
Definition: OMPIRBuilder.h:176
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
Definition: OMPIRBuilder.h:120
std::optional< bool > EmitLLVMUsedMetaInfo
Flag for specifying if LLVMUsed information should be emitted.
Definition: OMPIRBuilder.h:117
omp::GV getGridValue() const
Definition: OMPIRBuilder.h:159
SmallVector< Triple > TargetTriples
When compilation is being done for the OpenMP host (i.e.
Definition: OMPIRBuilder.h:132
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
std::optional< StringRef > Separator
Separator used between all of the rest consecutive parts of s name.
Definition: OMPIRBuilder.h:125
LLVM_ABI bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
Definition: OMPIRBuilder.h:153
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
void setOpenMPOffloadMandatory(bool Value)
Definition: OMPIRBuilder.h:197
void setIsTargetDevice(bool Value)
Definition: OMPIRBuilder.h:194
void setSeparator(StringRef S)
Definition: OMPIRBuilder.h:199
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
void setEmitLLVMUsed(bool Value=true)
Definition: OMPIRBuilder.h:196
std::optional< omp::GV > GridValue
Definition: OMPIRBuilder.h:128
LLVM_ABI bool hasRequiresReverseOffload() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
llvm::AllocaInst * CreateAlloca(llvm::Type *Ty, const llvm::Twine &Name) const override
Definition: OMPIRBuilder.h:508
void decorateWithTBAA(llvm::Instruction *I) override
Definition: OMPIRBuilder.h:507
AtomicInfo(IRBuilder<> *Builder, llvm::Type *Ty, uint64_t AtomicSizeInBits, uint64_t ValueSizeInBits, llvm::Align AtomicAlign, llvm::Align ValueAlign, bool UseLibcall, IRBuilderBase::InsertPoint AllocaIP, llvm::Value *AtomicVar)
Definition: OMPIRBuilder.h:498
llvm::Value * getAtomicPointer() const override
Definition: OMPIRBuilder.h:506
Struct that keeps the information that should be kept throughout a 'target data' region.
TargetDataInfo(bool RequiresDevicePointerInfo, bool SeparateBeginEndCalls)
SmallMapVector< const Value *, std::pair< Value *, Value * >, 4 > DevicePtrInfoMap
void clearArrayInfo()
Clear information about the data arrays.
unsigned NumberOfPtrs
The total number of pointers passed to the runtime library.
bool HasNoWait
Whether the target ... data directive has a nowait clause.
bool isValid()
Return true if the current target data information has valid arrays.
bool HasMapper
Indicate whether any user-defined mapper exists.
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:485
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp task
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:560
LLVM_ABI void emitBranch(BasicBlock *Target)
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
EvalKind
Enum class for reduction evaluation types scalar, complex and aggregate.
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
std::function< InsertPointOrErrorTy(InsertPointTy CodeGenIP, Value *LHS, Value *RHS, Value *&Res)> ReductionGenCBTy
ReductionGen CallBack for MLIR.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
void setConfig(OpenMPIRBuilderConfig C)
Definition: OMPIRBuilder.h:521
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for '#omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const SmallVector< DependData > &Dependencies, bool HasNowait=false)
Generator for '#omp target'.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
Expected< MapInfosTy & > MapInfosOrErrorTy
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
std::function< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> StorableBodyGenCallbackTy
Definition: OMPIRBuilder.h:623
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
std::function< InsertPointOrErrorTy(InsertPointTy, Type *, Value *, Value *)> ReductionGenAtomicCBTy
Functions used to generate atomic reductions.
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for '#omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
std::function< InsertPointTy(InsertPointTy CodeGenIP, unsigned Index, Value **LHS, Value **RHS, Function *CurFn)> ReductionGenClangCBTy
ReductionGen CallBack for Clang.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
void pushFinalizationCB(const FinalizationInfo &FI)
Push a finalization callback on the finalization stack.
Definition: OMPIRBuilder.h:578
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for '#omp target data'.
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
InsertPointTy getInsertionPoint()
}
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for '#omp cancellation point'.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:536
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
function_ref< InsertPointOrErrorTy(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
Definition: OMPIRBuilder.h:658
OpenMPIRBuilder(Module &M)
Create a new OpenMPIRBuilder operating on the given module M.
Definition: OMPIRBuilder.h:489
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB)
Emit the user-defined mapper function.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
Definition: OMPIRBuilder.h:539
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
void popFinalizationCB()
Pop the last finalization callback from the finalization stack.
Definition: OMPIRBuilder.h:585
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
Definition: OMPIRBuilder.h:614
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:133
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:280
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Value * getOperand(unsigned i) const
Definition: User.h:232
See the file comment.
Definition: ValueMap.h:84
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:390
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:205
bool pointsToAliveValue() const
Definition: ValueHandle.h:225
An efficient, type-erasing, non-owning reference to a callable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:81
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
Definition: OMPConstants.h:198
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:65
RTLDependenceKindTy
Dependence kind for RTL.
Definition: OMPConstants.h:276
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:45
WorksharingLoopType
A type of worksharing loop construct.
Definition: OMPConstants.h:286
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
Definition: OMPConstants.h:270
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
@ Offset
Definition: DWP.cpp:477
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
AtomicOrdering
Atomic ordering for LLVM's memory model.
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, Value *DepVal)
omp::RTLDependenceKindTy DepKind
bool IsCancellable
Flag to indicate if the directive is cancellable.
Definition: OMPIRBuilder.h:572
FinalizeCallbackTy FiniCB
The finalization callback provided by the last in-flight invocation of createXXXX for the directive o...
Definition: OMPIRBuilder.h:565
omp::Directive DK
The directive kind of the innermost directive that has an associated region which might require final...
Definition: OMPIRBuilder.h:569
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:662
LocationDescription(const InsertPointTy &IP)
Definition: OMPIRBuilder.h:665
LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
Definition: OMPIRBuilder.h:666
LocationDescription(const IRBuilderBase &IRB)
Definition: OMPIRBuilder.h:663
This structure contains combined information generated for mappable clauses, including base pointers,...
void append(MapInfosTy &CurInfo)
Append arrays in CurInfo.
MapDeviceInfoArrayTy DevicePointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Function * getFunction() const
Return the function that contains the region to be outlined.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
std::function< void(Function &)> PostOutlineCBTy
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
ReductionInfo(Value *PrivateVariable)
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, EvalKind EvaluationKind, ReductionGenCBTy ReductionGen, ReductionGenClangCBTy ReductionGenClang, ReductionGenAtomicCBTy AtomicReductionGen)
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray, Value *SizesArray, Value *MapTypesArray, Value *MapTypesArrayEnd, Value *MappersArray, Value *MapNamesArray)
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs, Value *NumIterations, ArrayRef< Value * > NumTeams, ArrayRef< Value * > NumThreads, Value *DynCGGroupMem, bool HasNoWait)
Value * DynCGGroupMem
The size of the dynamic shared memory.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:249
Data structure to contain the information needed to uniquely identify a target entry.
Definition: OMPIRBuilder.h:214
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
Definition: OMPIRBuilder.h:216
bool operator<(const TargetRegionEntryInfo &RHS) const
Definition: OMPIRBuilder.h:235
TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count=0)
Definition: OMPIRBuilder.h:225
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...
Definition: OMPGridValues.h:57