14#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
121 bool REquiresUniformYZ =
false)
const;
137 std::pair<unsigned, unsigned>
139 std::pair<unsigned, unsigned> FlatWorkGroupSizes)
const;
145 std::pair<unsigned, unsigned>
146 getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes,
147 unsigned LDSBytes,
const Function &
F)
const;
153 std::pair<unsigned, unsigned>
155 std::pair<unsigned, unsigned> FlatWorkGroupSizes,
156 unsigned LDSBytes)
const;
168 std::pair<unsigned, unsigned>
178 std::pair<unsigned, unsigned> FlatWorkGroupSizes)
const;
185 std::pair<unsigned, unsigned>
343 switch (TargetTriple.
getOS()) {
This file defines the SmallVector class.
bool hasFP8ConversionScaleInsts() const
bool hasFminFmaxLegacy() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
bool EnableRealTrue16Insts
bool hasBF16PackedInsts() const
bool hasFP4ConversionScaleInsts() const
std::optional< unsigned > getReqdWorkGroupSize(const Function &F, unsigned Dim) const
Align getAlignmentForImplicitArgPtr() const
bool hasMadMacF32Insts() const
unsigned getEUsPerCU() const
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto whic...
bool isMesaKernel(const Function &F) const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasCvtPkF16F32Inst() const
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
virtual unsigned getMinWavesPerEU() const =0
bool hasBF16ConversionInsts() const
bool HasFP4ConversionScaleInsts
bool hasFP6BF6ConversionScaleInsts() const
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool HasFP6BF6ConversionScaleInsts
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
bool hasBF8ConversionScaleInsts() const
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getLocalMemorySize() const
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
unsigned getAddressableLocalMemorySize() const
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
SmallVector< unsigned > getMaxNumWorkGroups(const Function &F) const
Return the number of work groups for the function.
bool isGCN3Encoding() const
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
bool hasMadMixInsts() const
unsigned getWavefrontSizeLog2() const
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
bool HasF16BF16ToFP6BF6ConversionScaleInsts
bool has16BitInsts() const
bool HasF32ToF16BF16ConversionSRInsts
virtual ~AMDGPUSubtarget()=default
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
bool isAmdHsaOrMesa(const Function &F) const
bool hasFastFMAF32() const
bool isPromoteAllocaEnabled() const
bool hasTrigReducedRange() const
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
virtual unsigned getMaxFlatWorkGroupSize() const =0
bool hasDsSrc2Insts() const
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
bool hasF16BF16ToFP6BF6ConversionScaleInsts() const
unsigned getMaxWavesPerEU() const
bool hasWavefrontsEvenlySplittingXDim(const Function &F, bool REquiresUniformYZ=false) const
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
bool HasFP8ConversionScaleInsts
unsigned AddressableLocalMemorySize
bool isSingleLaneExecution(const Function &Kernel) const
Return true if only a single workitem can be active in a wave.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
bool HasBF8ConversionScaleInsts
bool hasBF16TransInsts() const
unsigned getWavefrontSize() const
virtual unsigned getMinFlatWorkGroupSize() const =0
std::pair< unsigned, unsigned > getEffectiveWavesPerEU(std::pair< unsigned, unsigned > RequestedWavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes, unsigned LDSBytes) const
Returns the target minimum/maximum number of waves per EU.
bool HasBF16ConversionInsts
bool hasInv2PiInlineImm() const
bool hasF32ToF16BF16ConversionSRInsts() const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
Overload which uses the specified values for the flat work group sizes, rather than querying the func...
bool hasVOP3PInsts() const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Primary interface to the complete machine description for the target machine.
Triple - Helper class for working with autoconf configuration names.
OSType getOS() const
Get the parsed operating system type of this triple.
bool isAMDGCN() const
Tests whether the target is AMDGCN.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
This struct is a compact representation of a valid (non-zero power of two) alignment.