LLVM 22.0.0git
AMDGPUSubtarget.cpp
Go to the documentation of this file.
1//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Implements the AMDGPU specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUSubtarget.h"
15#include "AMDGPUCallLowering.h"
17#include "AMDGPULegalizerInfo.h"
19#include "R600Subtarget.h"
26#include "llvm/IR/IntrinsicsAMDGPU.h"
27#include "llvm/IR/IntrinsicsR600.h"
28#include "llvm/IR/MDBuilder.h"
29#include <algorithm>
30
31using namespace llvm;
32
33#define DEBUG_TYPE "amdgpu-subtarget"
34
36
39}
40
41// Returns the maximum per-workgroup LDS allocation size (in bytes) that still
42// allows the given function to achieve an occupancy of NWaves waves per
43// SIMD / EU, taking into account only the function's *maximum* workgroup size.
44unsigned
46 const Function &F) const {
47 const unsigned WaveSize = getWavefrontSize();
48 const unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
49 const unsigned WavesPerWorkgroup =
50 std::max(1u, (WorkGroupSize + WaveSize - 1) / WaveSize);
51
52 const unsigned WorkGroupsPerCU =
53 std::max(1u, (NWaves * getEUsPerCU()) / WavesPerWorkgroup);
54
55 return getLocalMemorySize() / WorkGroupsPerCU;
56}
57
59 uint32_t LDSBytes, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
60
61 // FIXME: We should take into account the LDS allocation granularity.
62 const unsigned MaxWGsLDS = getLocalMemorySize() / std::max(LDSBytes, 1u);
63
64 // Queried LDS size may be larger than available on a CU, in which case we
65 // consider the only achievable occupancy to be 1, in line with what we
66 // consider the occupancy to be when the number of requested registers in a
67 // particular bank is higher than the number of available ones in that bank.
68 if (!MaxWGsLDS)
69 return {1, 1};
70
71 const unsigned WaveSize = getWavefrontSize(), WavesPerEU = getMaxWavesPerEU();
72
73 auto PropsFromWGSize = [=](unsigned WGSize)
74 -> std::tuple<const unsigned, const unsigned, unsigned> {
75 unsigned WavesPerWG = divideCeil(WGSize, WaveSize);
76 unsigned WGsPerCU = std::min(getMaxWorkGroupsPerCU(WGSize), MaxWGsLDS);
77 return {WavesPerWG, WGsPerCU, WavesPerWG * WGsPerCU};
78 };
79
80 // The maximum group size will generally yield the minimum number of
81 // workgroups, maximum number of waves, and minimum occupancy. The opposite is
82 // generally true for the minimum group size. LDS or barrier ressource
83 // limitations can flip those minimums/maximums.
84 const auto [MinWGSize, MaxWGSize] = FlatWorkGroupSizes;
85 auto [MinWavesPerWG, MaxWGsPerCU, MaxWavesPerCU] = PropsFromWGSize(MinWGSize);
86 auto [MaxWavesPerWG, MinWGsPerCU, MinWavesPerCU] = PropsFromWGSize(MaxWGSize);
87
88 // It is possible that we end up with flipped minimum and maximum number of
89 // waves per CU when the number of minimum/maximum concurrent groups on the CU
90 // is limited by LDS usage or barrier resources.
91 if (MinWavesPerCU >= MaxWavesPerCU) {
92 std::swap(MinWavesPerCU, MaxWavesPerCU);
93 } else {
94 const unsigned WaveSlotsPerCU = WavesPerEU * getEUsPerCU();
95
96 // Look for a potential smaller group size than the maximum which decreases
97 // the concurrent number of waves on the CU for the same number of
98 // concurrent workgroups on the CU.
99 unsigned MinWavesPerCUForWGSize =
100 divideCeil(WaveSlotsPerCU, MinWGsPerCU + 1) * MinWGsPerCU;
101 if (MinWavesPerCU > MinWavesPerCUForWGSize) {
102 unsigned ExcessSlots = MinWavesPerCU - MinWavesPerCUForWGSize;
103 if (unsigned ExcessSlotsPerWG = ExcessSlots / MinWGsPerCU) {
104 // There may exist a smaller group size than the maximum that achieves
105 // the minimum number of waves per CU. This group size is the largest
106 // possible size that requires MaxWavesPerWG - E waves where E is
107 // maximized under the following constraints.
108 // 1. 0 <= E <= ExcessSlotsPerWG
109 // 2. (MaxWavesPerWG - E) * WaveSize >= MinWGSize
110 MinWavesPerCU -= MinWGsPerCU * std::min(ExcessSlotsPerWG,
111 MaxWavesPerWG - MinWavesPerWG);
112 }
113 }
114
115 // Look for a potential larger group size than the minimum which increases
116 // the concurrent number of waves on the CU for the same number of
117 // concurrent workgroups on the CU.
118 unsigned LeftoverSlots = WaveSlotsPerCU - MaxWGsPerCU * MinWavesPerWG;
119 if (unsigned LeftoverSlotsPerWG = LeftoverSlots / MaxWGsPerCU) {
120 // There may exist a larger group size than the minimum that achieves the
121 // maximum number of waves per CU. This group size is the smallest
122 // possible size that requires MinWavesPerWG + L waves where L is
123 // maximized under the following constraints.
124 // 1. 0 <= L <= LeftoverSlotsPerWG
125 // 2. (MinWavesPerWG + L - 1) * WaveSize <= MaxWGSize
126 MaxWavesPerCU += MaxWGsPerCU * std::min(LeftoverSlotsPerWG,
127 ((MaxWGSize - 1) / WaveSize) + 1 -
128 MinWavesPerWG);
129 }
130 }
131
132 // Return the minimum/maximum number of waves on any EU, assuming that all
133 // wavefronts are spread across all EUs as evenly as possible.
134 return {std::clamp(MinWavesPerCU / getEUsPerCU(), 1U, WavesPerEU),
135 std::clamp(divideCeil(MaxWavesPerCU, getEUsPerCU()), 1U, WavesPerEU)};
136}
137
139 const MachineFunction &MF) const {
140 const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
141 return getOccupancyWithWorkGroupSizes(MFI->getLDSSize(), MF.getFunction());
142}
143
144std::pair<unsigned, unsigned>
146 switch (CC) {
153 return std::pair(1, getWavefrontSize());
154 default:
155 return std::pair(1u, getMaxFlatWorkGroupSize());
156 }
157}
158
159std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
160 const Function &F) const {
161 // Default minimum/maximum flat work group sizes.
162 std::pair<unsigned, unsigned> Default =
163 getDefaultFlatWorkGroupSize(F.getCallingConv());
164
165 // Requested minimum/maximum flat work group sizes.
166 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
167 F, "amdgpu-flat-work-group-size", Default);
168
169 // Make sure requested minimum is less than requested maximum.
170 if (Requested.first > Requested.second)
171 return Default;
172
173 // Make sure requested values do not violate subtarget's specifications.
174 if (Requested.first < getMinFlatWorkGroupSize())
175 return Default;
176 if (Requested.second > getMaxFlatWorkGroupSize())
177 return Default;
178
179 return Requested;
180}
181
182std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(
183 std::pair<unsigned, unsigned> RequestedWavesPerEU,
184 std::pair<unsigned, unsigned> FlatWorkGroupSizes, unsigned LDSBytes) const {
185 // Default minimum/maximum number of waves per EU. The range of flat workgroup
186 // sizes limits the achievable maximum, and we aim to support enough waves per
187 // EU so that we can concurrently execute all waves of a single workgroup of
188 // maximum size on a CU.
189 std::pair<unsigned, unsigned> Default = {
190 getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second),
191 getOccupancyWithWorkGroupSizes(LDSBytes, FlatWorkGroupSizes).second};
192 Default.first = std::min(Default.first, Default.second);
193
194 // Make sure requested minimum is within the default range and lower than the
195 // requested maximum. The latter must not violate target specification.
196 if (RequestedWavesPerEU.first < Default.first ||
197 RequestedWavesPerEU.first > Default.second ||
198 RequestedWavesPerEU.first > RequestedWavesPerEU.second ||
199 RequestedWavesPerEU.second > getMaxWavesPerEU())
200 return Default;
201
202 // We cannot exceed maximum occupancy implied by flat workgroup size and LDS.
203 RequestedWavesPerEU.second =
204 std::min(RequestedWavesPerEU.second, Default.second);
205 return RequestedWavesPerEU;
206}
207
208std::pair<unsigned, unsigned>
210 // Default/requested minimum/maximum flat work group sizes.
211 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
212 // Minimum number of bytes allocated in the LDS.
213 unsigned LDSBytes =
214 AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size", {0, UINT32_MAX},
215 /*OnlyFirstRequired=*/true)
216 .first;
217 return getWavesPerEU(FlatWorkGroupSizes, LDSBytes, F);
218}
219
220std::pair<unsigned, unsigned>
221AMDGPUSubtarget::getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes,
222 unsigned LDSBytes, const Function &F) const {
223 // Default minimum/maximum number of waves per execution unit.
224 std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
225
226 // Requested minimum/maximum number of waves per execution unit.
227 std::pair<unsigned, unsigned> Requested =
228 AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", Default, true);
229 return getEffectiveWavesPerEU(Requested, FlatWorkGroupSizes, LDSBytes);
230}
231
232std::optional<unsigned>
234 unsigned Dim) const {
235 auto *Node = Kernel.getMetadata("reqd_work_group_size");
236 if (Node && Node->getNumOperands() == 3)
237 return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();
238 return std::nullopt;
239}
240
242 const Function &F, bool RequiresUniformYZ) const {
243 auto *Node = F.getMetadata("reqd_work_group_size");
244 if (!Node || Node->getNumOperands() != 3)
245 return false;
246 unsigned XLen =
247 mdconst::extract<ConstantInt>(Node->getOperand(0))->getZExtValue();
248 unsigned YLen =
249 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue();
250 unsigned ZLen =
251 mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
252
253 bool Is1D = YLen <= 1 && ZLen <= 1;
254 bool IsXLargeEnough =
255 isPowerOf2_32(XLen) && (!RequiresUniformYZ || XLen >= getWavefrontSize());
256 return Is1D || IsXLargeEnough;
257}
258
260 return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
261}
262
264 unsigned Dimension) const {
265 std::optional<unsigned> ReqdSize = getReqdWorkGroupSize(Kernel, Dimension);
266 if (ReqdSize)
267 return *ReqdSize - 1;
268 return getFlatWorkGroupSizes(Kernel).second - 1;
269}
270
272 for (int I = 0; I < 3; ++I) {
273 if (getMaxWorkitemID(Func, I) > 0)
274 return false;
275 }
276
277 return true;
278}
279
281 Function *Kernel = I->getParent()->getParent();
282 unsigned MinSize = 0;
283 unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
284 bool IdQuery = false;
285
286 // If reqd_work_group_size is present it narrows value down.
287 if (auto *CI = dyn_cast<CallInst>(I)) {
288 const Function *F = CI->getCalledFunction();
289 if (F) {
290 unsigned Dim = UINT_MAX;
291 switch (F->getIntrinsicID()) {
292 case Intrinsic::amdgcn_workitem_id_x:
293 case Intrinsic::r600_read_tidig_x:
294 IdQuery = true;
295 [[fallthrough]];
296 case Intrinsic::r600_read_local_size_x:
297 Dim = 0;
298 break;
299 case Intrinsic::amdgcn_workitem_id_y:
300 case Intrinsic::r600_read_tidig_y:
301 IdQuery = true;
302 [[fallthrough]];
303 case Intrinsic::r600_read_local_size_y:
304 Dim = 1;
305 break;
306 case Intrinsic::amdgcn_workitem_id_z:
307 case Intrinsic::r600_read_tidig_z:
308 IdQuery = true;
309 [[fallthrough]];
310 case Intrinsic::r600_read_local_size_z:
311 Dim = 2;
312 break;
313 default:
314 break;
315 }
316
317 if (Dim <= 3) {
318 std::optional<unsigned> ReqdSize = getReqdWorkGroupSize(*Kernel, Dim);
319 if (ReqdSize)
320 MinSize = MaxSize = *ReqdSize;
321 }
322 }
323 }
324
325 if (!MaxSize)
326 return false;
327
328 // Range metadata is [Lo, Hi). For ID query we need to pass max size
329 // as Hi. For size query we need to pass Hi + 1.
330 if (IdQuery)
331 MinSize = 0;
332 else
333 ++MaxSize;
334
335 APInt Lower{32, MinSize};
336 APInt Upper{32, MaxSize};
337 if (auto *CI = dyn_cast<CallBase>(I)) {
339 CI->addRangeRetAttr(Range);
340 } else {
341 MDBuilder MDB(I->getContext());
342 MDNode *MaxWorkGroupSizeRange = MDB.createRange(Lower, Upper);
343 I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
344 }
345 return true;
346}
347
349 assert(AMDGPU::isKernel(F.getCallingConv()));
350
351 // We don't allocate the segment if we know the implicit arguments weren't
352 // used, even if the ABI implies we need them.
353 if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
354 return 0;
355
356 if (isMesaKernel(F))
357 return 16;
358
359 // Assume all implicit inputs are used by default
360 const Module *M = F.getParent();
361 unsigned NBytes =
363 return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",
364 NBytes);
365}
366
368 Align &MaxAlign) const {
369 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
370 F.getCallingConv() == CallingConv::SPIR_KERNEL);
371
372 const DataLayout &DL = F.getDataLayout();
373 uint64_t ExplicitArgBytes = 0;
374 MaxAlign = Align(1);
375
376 for (const Argument &Arg : F.args()) {
377 if (Arg.hasAttribute("amdgpu-hidden-argument"))
378 continue;
379
380 const bool IsByRef = Arg.hasByRefAttr();
381 Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
382 Align Alignment = DL.getValueOrABITypeAlignment(
383 IsByRef ? Arg.getParamAlign() : std::nullopt, ArgTy);
384 uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
385 ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;
386 MaxAlign = std::max(MaxAlign, Alignment);
387 }
388
389 return ExplicitArgBytes;
390}
391
393 Align &MaxAlign) const {
394 if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
395 F.getCallingConv() != CallingConv::SPIR_KERNEL)
396 return 0;
397
398 uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
399
400 unsigned ExplicitOffset = getExplicitKernelArgOffset();
401
402 uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
403 unsigned ImplicitBytes = getImplicitArgNumBytes(F);
404 if (ImplicitBytes != 0) {
405 const Align Alignment = getAlignmentForImplicitArgPtr();
406 TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
407 MaxAlign = std::max(MaxAlign, Alignment);
408 }
409
410 // Being able to dereference past the end is useful for emitting scalar loads.
411 return alignTo(TotalSize, 4);
412}
413
417}
418
421 return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
422 return static_cast<const AMDGPUSubtarget &>(MF.getSubtarget<R600Subtarget>());
423}
424
426 if (TM.getTargetTriple().isAMDGCN())
427 return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));
428 return static_cast<const AMDGPUSubtarget &>(
429 TM.getSubtarget<R600Subtarget>(F));
430}
431
432// FIXME: This has no reason to be in subtarget
435 return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3,
436 std::numeric_limits<uint32_t>::max());
437}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the InstructionSelector class for AMDGPU.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file describes how to lower LLVM inline asm to machine code INLINEASM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
if(PassOpts->AAPipeline)
AMDGPU R600 specific subclass of TargetSubtarget.
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
std::optional< unsigned > getReqdWorkGroupSize(const Function &F, unsigned Dim) const
Align getAlignmentForImplicitArgPtr() const
unsigned getEUsPerCU() const
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto whic...
bool isMesaKernel(const Function &F) const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getLocalMemorySize() const
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
SmallVector< unsigned > getMaxNumWorkGroups(const Function &F) const
Return the number of work groups for the function.
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
virtual unsigned getMaxFlatWorkGroupSize() const =0
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
unsigned getMaxWavesPerEU() const
bool hasWavefrontsEvenlySplittingXDim(const Function &F, bool REquiresUniformYZ=false) const
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
bool isSingleLaneExecution(const Function &Kernel) const
Return true if only a single workitem can be active in a wave.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
unsigned getWavefrontSize() const
virtual unsigned getMinFlatWorkGroupSize() const =0
std::pair< unsigned, unsigned > getEffectiveWavesPerEU(std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
Returns the target minimum/maximum number of waves per EU.
Class for arbitrary precision integers.
Definition: APInt.h:78
This class represents an incoming formal argument to a Function.
Definition: Argument.h:32
This class represents a range of values.
Definition: ConstantRange.h:47
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Value.h:576
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition: MDBuilder.cpp:96
Metadata node.
Definition: Metadata.h:1077
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
const Triple & getTargetTriple() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:47
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition: Triple.h:901
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:399
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1886
@ Default
The result values are uniform if and only if all operands are uniform.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:856
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39