LLVM 22.0.0git
AMDGPUSubtarget.cpp
Go to the documentation of this file.
1//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Implements the AMDGPU specific subclass of TargetSubtarget.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPUSubtarget.h"
15#include "AMDGPUCallLowering.h"
17#include "AMDGPULegalizerInfo.h"
19#include "R600Subtarget.h"
26#include "llvm/IR/IntrinsicsAMDGPU.h"
27#include "llvm/IR/IntrinsicsR600.h"
28#include "llvm/IR/MDBuilder.h"
29#include <algorithm>
30
31using namespace llvm;
32
33#define DEBUG_TYPE "amdgpu-subtarget"
34
36
40
44
45// Returns the maximum per-workgroup LDS allocation size (in bytes) that still
46// allows the given function to achieve an occupancy of NWaves waves per
47// SIMD / EU, taking into account only the function's *maximum* workgroup size.
48unsigned
50 const Function &F) const {
51 const unsigned WaveSize = getWavefrontSize();
52 const unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
53 const unsigned WavesPerWorkgroup =
54 std::max(1u, (WorkGroupSize + WaveSize - 1) / WaveSize);
55
56 const unsigned WorkGroupsPerCU =
57 std::max(1u, (NWaves * getEUsPerCU()) / WavesPerWorkgroup);
58
59 return getLocalMemorySize() / WorkGroupsPerCU;
60}
61
63 uint32_t LDSBytes, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
64
65 // FIXME: We should take into account the LDS allocation granularity.
66 const unsigned MaxWGsLDS = getLocalMemorySize() / std::max(LDSBytes, 1u);
67
68 // Queried LDS size may be larger than available on a CU, in which case we
69 // consider the only achievable occupancy to be 1, in line with what we
70 // consider the occupancy to be when the number of requested registers in a
71 // particular bank is higher than the number of available ones in that bank.
72 if (!MaxWGsLDS)
73 return {1, 1};
74
75 const unsigned WaveSize = getWavefrontSize(), WavesPerEU = getMaxWavesPerEU();
76
77 auto PropsFromWGSize = [=](unsigned WGSize)
78 -> std::tuple<const unsigned, const unsigned, unsigned> {
79 unsigned WavesPerWG = divideCeil(WGSize, WaveSize);
80 unsigned WGsPerCU = std::min(getMaxWorkGroupsPerCU(WGSize), MaxWGsLDS);
81 return {WavesPerWG, WGsPerCU, WavesPerWG * WGsPerCU};
82 };
83
84 // The maximum group size will generally yield the minimum number of
85 // workgroups, maximum number of waves, and minimum occupancy. The opposite is
86 // generally true for the minimum group size. LDS or barrier ressource
87 // limitations can flip those minimums/maximums.
88 const auto [MinWGSize, MaxWGSize] = FlatWorkGroupSizes;
89 auto [MinWavesPerWG, MaxWGsPerCU, MaxWavesPerCU] = PropsFromWGSize(MinWGSize);
90 auto [MaxWavesPerWG, MinWGsPerCU, MinWavesPerCU] = PropsFromWGSize(MaxWGSize);
91
92 // It is possible that we end up with flipped minimum and maximum number of
93 // waves per CU when the number of minimum/maximum concurrent groups on the CU
94 // is limited by LDS usage or barrier resources.
95 if (MinWavesPerCU >= MaxWavesPerCU) {
96 std::swap(MinWavesPerCU, MaxWavesPerCU);
97 } else {
98 const unsigned WaveSlotsPerCU = WavesPerEU * getEUsPerCU();
99
100 // Look for a potential smaller group size than the maximum which decreases
101 // the concurrent number of waves on the CU for the same number of
102 // concurrent workgroups on the CU.
103 unsigned MinWavesPerCUForWGSize =
104 divideCeil(WaveSlotsPerCU, MinWGsPerCU + 1) * MinWGsPerCU;
105 if (MinWavesPerCU > MinWavesPerCUForWGSize) {
106 unsigned ExcessSlots = MinWavesPerCU - MinWavesPerCUForWGSize;
107 if (unsigned ExcessSlotsPerWG = ExcessSlots / MinWGsPerCU) {
108 // There may exist a smaller group size than the maximum that achieves
109 // the minimum number of waves per CU. This group size is the largest
110 // possible size that requires MaxWavesPerWG - E waves where E is
111 // maximized under the following constraints.
112 // 1. 0 <= E <= ExcessSlotsPerWG
113 // 2. (MaxWavesPerWG - E) * WaveSize >= MinWGSize
114 MinWavesPerCU -= MinWGsPerCU * std::min(ExcessSlotsPerWG,
115 MaxWavesPerWG - MinWavesPerWG);
116 }
117 }
118
119 // Look for a potential larger group size than the minimum which increases
120 // the concurrent number of waves on the CU for the same number of
121 // concurrent workgroups on the CU.
122 unsigned LeftoverSlots = WaveSlotsPerCU - MaxWGsPerCU * MinWavesPerWG;
123 if (unsigned LeftoverSlotsPerWG = LeftoverSlots / MaxWGsPerCU) {
124 // There may exist a larger group size than the minimum that achieves the
125 // maximum number of waves per CU. This group size is the smallest
126 // possible size that requires MinWavesPerWG + L waves where L is
127 // maximized under the following constraints.
128 // 1. 0 <= L <= LeftoverSlotsPerWG
129 // 2. (MinWavesPerWG + L - 1) * WaveSize <= MaxWGSize
130 MaxWavesPerCU += MaxWGsPerCU * std::min(LeftoverSlotsPerWG,
131 ((MaxWGSize - 1) / WaveSize) + 1 -
132 MinWavesPerWG);
133 }
134 }
135
136 // Return the minimum/maximum number of waves on any EU, assuming that all
137 // wavefronts are spread across all EUs as evenly as possible.
138 return {std::clamp(MinWavesPerCU / getEUsPerCU(), 1U, WavesPerEU),
139 std::clamp(divideCeil(MaxWavesPerCU, getEUsPerCU()), 1U, WavesPerEU)};
140}
141
143 const MachineFunction &MF) const {
144 const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
145 return getOccupancyWithWorkGroupSizes(MFI->getLDSSize(), MF.getFunction());
146}
147
148std::pair<unsigned, unsigned>
150 switch (CC) {
157 return std::pair(1, getWavefrontSize());
158 default:
159 return std::pair(1u, getMaxFlatWorkGroupSize());
160 }
161}
162
163std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
164 const Function &F) const {
165 // Default minimum/maximum flat work group sizes.
166 std::pair<unsigned, unsigned> Default =
167 getDefaultFlatWorkGroupSize(F.getCallingConv());
168
169 // Requested minimum/maximum flat work group sizes.
170 std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
171 F, "amdgpu-flat-work-group-size", Default);
172
173 // Make sure requested minimum is less than requested maximum.
174 if (Requested.first > Requested.second)
175 return Default;
176
177 // Make sure requested values do not violate subtarget's specifications.
178 if (Requested.first < getMinFlatWorkGroupSize())
179 return Default;
180 if (Requested.second > getMaxFlatWorkGroupSize())
181 return Default;
182
183 return Requested;
184}
185
186std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(
187 std::pair<unsigned, unsigned> RequestedWavesPerEU,
188 std::pair<unsigned, unsigned> FlatWorkGroupSizes, unsigned LDSBytes) const {
189 // Default minimum/maximum number of waves per EU. The range of flat workgroup
190 // sizes limits the achievable maximum, and we aim to support enough waves per
191 // EU so that we can concurrently execute all waves of a single workgroup of
192 // maximum size on a CU.
193 std::pair<unsigned, unsigned> Default = {
194 getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second),
195 getOccupancyWithWorkGroupSizes(LDSBytes, FlatWorkGroupSizes).second};
196 Default.first = std::min(Default.first, Default.second);
197
198 // Make sure requested minimum is within the default range and lower than the
199 // requested maximum. The latter must not violate target specification.
200 if (RequestedWavesPerEU.first < Default.first ||
201 RequestedWavesPerEU.first > Default.second ||
202 RequestedWavesPerEU.first > RequestedWavesPerEU.second ||
203 RequestedWavesPerEU.second > getMaxWavesPerEU())
204 return Default;
205
206 // We cannot exceed maximum occupancy implied by flat workgroup size and LDS.
207 RequestedWavesPerEU.second =
208 std::min(RequestedWavesPerEU.second, Default.second);
209 return RequestedWavesPerEU;
210}
211
212std::pair<unsigned, unsigned>
214 // Default/requested minimum/maximum flat work group sizes.
215 std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
216 // Minimum number of bytes allocated in the LDS.
217 unsigned LDSBytes =
218 AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size", {0, UINT32_MAX},
219 /*OnlyFirstRequired=*/true)
220 .first;
221 return getWavesPerEU(FlatWorkGroupSizes, LDSBytes, F);
222}
223
224std::pair<unsigned, unsigned>
225AMDGPUSubtarget::getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes,
226 unsigned LDSBytes, const Function &F) const {
227 // Default minimum/maximum number of waves per execution unit.
228 std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
229
230 // Requested minimum/maximum number of waves per execution unit.
231 std::pair<unsigned, unsigned> Requested =
232 AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", Default, true);
233 return getEffectiveWavesPerEU(Requested, FlatWorkGroupSizes, LDSBytes);
234}
235
236std::optional<unsigned>
238 unsigned Dim) const {
239 auto *Node = Kernel.getMetadata("reqd_work_group_size");
240 if (Node && Node->getNumOperands() == 3)
241 return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();
242 return std::nullopt;
243}
244
246 const Function &F, bool RequiresUniformYZ) const {
247 auto *Node = F.getMetadata("reqd_work_group_size");
248 if (!Node || Node->getNumOperands() != 3)
249 return false;
250 unsigned XLen =
251 mdconst::extract<ConstantInt>(Node->getOperand(0))->getZExtValue();
252 unsigned YLen =
253 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue();
254 unsigned ZLen =
255 mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
256
257 bool Is1D = YLen <= 1 && ZLen <= 1;
258 bool IsXLargeEnough =
259 isPowerOf2_32(XLen) && (!RequiresUniformYZ || XLen >= getWavefrontSize());
260 return Is1D || IsXLargeEnough;
261}
262
264 return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
265}
266
268 unsigned Dimension) const {
269 std::optional<unsigned> ReqdSize = getReqdWorkGroupSize(Kernel, Dimension);
270 if (ReqdSize)
271 return *ReqdSize - 1;
272 return getFlatWorkGroupSizes(Kernel).second - 1;
273}
274
276 for (int I = 0; I < 3; ++I) {
277 if (getMaxWorkitemID(Func, I) > 0)
278 return false;
279 }
280
281 return true;
282}
283
285 Function *Kernel = I->getParent()->getParent();
286 unsigned MinSize = 0;
287 unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
288 bool IdQuery = false;
289
290 // If reqd_work_group_size is present it narrows value down.
291 if (auto *CI = dyn_cast<CallInst>(I)) {
292 const Function *F = CI->getCalledFunction();
293 if (F) {
294 unsigned Dim = UINT_MAX;
295 switch (F->getIntrinsicID()) {
296 case Intrinsic::amdgcn_workitem_id_x:
297 case Intrinsic::r600_read_tidig_x:
298 IdQuery = true;
299 [[fallthrough]];
300 case Intrinsic::r600_read_local_size_x:
301 Dim = 0;
302 break;
303 case Intrinsic::amdgcn_workitem_id_y:
304 case Intrinsic::r600_read_tidig_y:
305 IdQuery = true;
306 [[fallthrough]];
307 case Intrinsic::r600_read_local_size_y:
308 Dim = 1;
309 break;
310 case Intrinsic::amdgcn_workitem_id_z:
311 case Intrinsic::r600_read_tidig_z:
312 IdQuery = true;
313 [[fallthrough]];
314 case Intrinsic::r600_read_local_size_z:
315 Dim = 2;
316 break;
317 default:
318 break;
319 }
320
321 if (Dim <= 3) {
322 std::optional<unsigned> ReqdSize = getReqdWorkGroupSize(*Kernel, Dim);
323 if (ReqdSize)
324 MinSize = MaxSize = *ReqdSize;
325 }
326 }
327 }
328
329 if (!MaxSize)
330 return false;
331
332 // Range metadata is [Lo, Hi). For ID query we need to pass max size
333 // as Hi. For size query we need to pass Hi + 1.
334 if (IdQuery)
335 MinSize = 0;
336 else
337 ++MaxSize;
338
339 APInt Lower{32, MinSize};
340 APInt Upper{32, MaxSize};
341 if (auto *CI = dyn_cast<CallBase>(I)) {
343 CI->addRangeRetAttr(Range);
344 } else {
345 MDBuilder MDB(I->getContext());
346 MDNode *MaxWorkGroupSizeRange = MDB.createRange(Lower, Upper);
347 I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
348 }
349 return true;
350}
351
353 assert(AMDGPU::isKernel(F.getCallingConv()));
354
355 // We don't allocate the segment if we know the implicit arguments weren't
356 // used, even if the ABI implies we need them.
357 if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
358 return 0;
359
360 if (isMesaKernel(F))
361 return 16;
362
363 // Assume all implicit inputs are used by default
364 const Module *M = F.getParent();
365 unsigned NBytes =
367 return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",
368 NBytes);
369}
370
372 Align &MaxAlign) const {
373 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
374 F.getCallingConv() == CallingConv::SPIR_KERNEL);
375
376 const DataLayout &DL = F.getDataLayout();
377 uint64_t ExplicitArgBytes = 0;
378 MaxAlign = Align(1);
379
380 for (const Argument &Arg : F.args()) {
381 if (Arg.hasAttribute("amdgpu-hidden-argument"))
382 continue;
383
384 const bool IsByRef = Arg.hasByRefAttr();
385 Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
386 Align Alignment = DL.getValueOrABITypeAlignment(
387 IsByRef ? Arg.getParamAlign() : std::nullopt, ArgTy);
388 uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
389 ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;
390 MaxAlign = std::max(MaxAlign, Alignment);
391 }
392
393 return ExplicitArgBytes;
394}
395
397 Align &MaxAlign) const {
398 if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
399 F.getCallingConv() != CallingConv::SPIR_KERNEL)
400 return 0;
401
402 uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
403
404 unsigned ExplicitOffset = getExplicitKernelArgOffset();
405
406 uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
407 unsigned ImplicitBytes = getImplicitArgNumBytes(F);
408 if (ImplicitBytes != 0) {
409 const Align Alignment = getAlignmentForImplicitArgPtr();
410 TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
411 MaxAlign = std::max(MaxAlign, Alignment);
412 }
413
414 // Being able to dereference past the end is useful for emitting scalar loads.
415 return alignTo(TotalSize, 4);
416}
417
422
425 return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
426 return static_cast<const AMDGPUSubtarget &>(MF.getSubtarget<R600Subtarget>());
427}
428
430 if (TM.getTargetTriple().isAMDGCN())
431 return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));
432 return static_cast<const AMDGPUSubtarget &>(
433 TM.getSubtarget<R600Subtarget>(F));
434}
435
436// FIXME: This has no reason to be in subtarget
439 return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3,
440 std::numeric_limits<uint32_t>::max());
441}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the InstructionSelector class for AMDGPU.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file describes how to lower LLVM inline asm to machine code INLINEASM.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
AMDGPU R600 specific subclass of TargetSubtarget.
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
std::optional< unsigned > getReqdWorkGroupSize(const Function &F, unsigned Dim) const
Align getAlignmentForImplicitArgPtr() const
unsigned getEUsPerCU() const
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto whic...
bool isMesaKernel(const Function &F) const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getLocalMemorySize() const
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
SmallVector< unsigned > getMaxNumWorkGroups(const Function &F) const
Return the number of work groups for the function.
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
virtual unsigned getMaxFlatWorkGroupSize() const =0
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
unsigned getMaxWavesPerEU() const
bool hasWavefrontsEvenlySplittingXDim(const Function &F, bool REquiresUniformYZ=false) const
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
bool isSingleLaneExecution(const Function &Kernel) const
Return true if only a single workitem can be active in a wave.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
unsigned getWavefrontSize() const
virtual unsigned getMinFlatWorkGroupSize() const =0
std::pair< unsigned, unsigned > getEffectiveWavesPerEU(std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
Returns the target minimum/maximum number of waves per EU.
bool hasD16Writes32BitVgpr() const
Class for arbitrary precision integers.
Definition APInt.h:78
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
This class represents a range of values.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:576
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1077
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition Triple.h:904
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:666
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1847
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:851
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39