20#include "llvm/IR/IntrinsicsNVPTX.h"
26#define DEBUG_TYPE "nvvm-intr-range"
40char NVVMIntrRange::ID = 0;
42 "Add !range metadata to NVVM intrinsics.",
false,
false)
47 if (
II->getMetadata(LLVMContext::MD_range))
53 if (
auto CurrentRange =
II->getRange())
74 if (!(OverallReqNTID || OverallMaxNTID || OverallClusterRank))
77 const unsigned FunctionNTID = OverallReqNTID.value_or(
78 OverallMaxNTID.value_or(std::numeric_limits<unsigned>::max()));
80 const unsigned FunctionClusterRank =
81 OverallClusterRank.value_or(std::numeric_limits<unsigned>::max());
83 const Vector3 MaxBlockSize{std::min(1024u, FunctionNTID),
84 std::min(1024u, FunctionNTID),
85 std::min(64u, FunctionNTID)};
89 const Vector3 MaxClusterRank{std::min(0x7fffffffu, FunctionClusterRank),
90 std::min(0xffffu, FunctionClusterRank),
91 std::min(0xffffu, FunctionClusterRank)};
94 switch (
II->getIntrinsicID()) {
96 case Intrinsic::nvvm_read_ptx_sreg_tid_x:
97 return addRangeAttr(0, MaxBlockSize.X,
II);
98 case Intrinsic::nvvm_read_ptx_sreg_tid_y:
99 return addRangeAttr(0, MaxBlockSize.Y,
II);
100 case Intrinsic::nvvm_read_ptx_sreg_tid_z:
101 return addRangeAttr(0, MaxBlockSize.Z,
II);
104 case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
105 return addRangeAttr(1, MaxBlockSize.X + 1,
II);
106 case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
107 return addRangeAttr(1, MaxBlockSize.Y + 1,
II);
108 case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
109 return addRangeAttr(1, MaxBlockSize.Z + 1,
II);
112 case Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x:
113 return addRangeAttr(0, MaxClusterRank.X,
II);
114 case Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y:
115 return addRangeAttr(0, MaxClusterRank.Y,
II);
116 case Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z:
117 return addRangeAttr(0, MaxClusterRank.Z,
II);
118 case Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x:
119 return addRangeAttr(1, MaxClusterRank.X + 1,
II);
120 case Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y:
121 return addRangeAttr(1, MaxClusterRank.Y + 1,
II);
122 case Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z:
123 return addRangeAttr(1, MaxClusterRank.Z + 1,
II);
125 case Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank:
126 if (OverallClusterRank)
127 return addRangeAttr(0, FunctionClusterRank,
II);
129 case Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank:
130 if (OverallClusterRank)
131 return addRangeAttr(1, FunctionClusterRank + 1,
II);
140 bool Changed =
false;
143 Changed |= ProccessIntrinsic(
II);
Expand Atomic instructions
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This header defines various interfaces for pass management in LLVM.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static bool runNVVMIntrRange(Function &F)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Class for arbitrary precision integers.
A container for analyses that lazily runs them and caches their results.
This class represents a range of values.
LLVM_ABI ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
A wrapper class for inspecting calls to intrinsic functions.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
std::optional< uint64_t > getOverallClusterRank(const Function &F)
std::optional< uint64_t > getOverallReqNTID(const Function &F)
bool isKernelFunction(const Function &F)
std::optional< uint64_t > getOverallMaxNTID(const Function &F)
FunctionPass * createNVVMIntrRangePass()
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)