17#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
27class AMDGPUTargetMachine;
32class SITargetLowering;
73 bool HasFP32Denormals;
74 bool HasFP64FP16Denormals;
75 static constexpr bool InlinerVectorBonusPercent = 0;
82 static inline int getFullRateInstrCost() {
102 std::pair<InstructionCost, MVT> getTypeLegalizationCost(
Type *Ty)
const;
125 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override;
127 unsigned ChainSizeInBytes,
130 unsigned ChainSizeInBytes,
135 unsigned AddrSpace)
const;
137 unsigned AddrSpace)
const override;
139 unsigned AddrSpace)
const override;
144 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
145 std::optional<uint32_t> AtomicElementSize)
const override;
149 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
151 std::optional<uint32_t> AtomicCpySize)
const override;
161 ArrayRef<const Value *>
Args = {},
162 const Instruction *CxtI =
nullptr)
const override;
165 const Instruction *
I =
nullptr)
const override;
168 ArrayRef<unsigned> Indices = {})
const;
173 unsigned Index,
const Value *Op0,
174 const Value *Op1)
const override;
211 Value *NewV)
const override;
217 unsigned LaneAgIdx)
const;
219 std::optional<Instruction *>
224 const APInt &DemandedElts,
225 APInt &UndefElts)
const;
234 SimplifyAndSetOp)
const override;
242 const Instruction *CxtI =
nullptr)
const override;
245 SmallVectorImpl<Use *> &Ops)
const override;
248 const Function *Callee)
const override;
257 return InlinerVectorBonusPercent;
262 std::optional<FastMathFlags> FMF,
299 const Instruction *
I =
nullptr)
const override;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
uint64_t IntrinsicInst * II
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Base class which can be used to help build a TTI implementation.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Account for loads of i8 vector types to have reduced cost.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
int getInlinerVectorBonusPercent() const override
bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const
Analyze if the results of inline asm are divergent.
bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
unsigned getNumberOfRegisters(unsigned RCID) const override
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const override
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getCacheLineSize() const override
Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
bool shouldPrefetchAddressSpace(unsigned AS) const override
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool hasBranchDivergence(const Function *F=nullptr) const override
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
unsigned getInliningThresholdMultiplier() const override
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const override
unsigned getPrefetchDistance() const override
How much before a load we should place the prefetch instruction.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
unsigned adjustInliningThreshold(const CallBase *CB) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Whether it is profitable to sink the operands of an Instruction I to the basic block of I.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
bool isAlwaysUniform(const Value *V) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
bool isSourceOfDivergence(const Value *V) const override
int getInliningLastCallToStaticBonus() const override
unsigned getFlatAddressSpace() const override
InstructionCost getVectorSplitCost() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
unsigned getNumberOfParts(Type *Tp) const override
When counting parts on AMD GPUs, account for i8s being grouped together under a single i32 value.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
unsigned getMinVectorRegisterBitWidth() const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const override
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override
The core instruction combiner logic.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM Value Representation.
Base class of all SIMD vector types.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static bool addrspacesMayAlias(unsigned AS1, unsigned AS2)
This is an optimization pass for GlobalISel generic memory operations.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Information about a load/store intrinsic defined by the target.