29#define DEBUG_TYPE "hexagontti"
36 cl::desc(
"Enable auto-vectorization of floatint point types on v68."));
40 cl::desc(
"Control lookup table emission on Hexagon target"));
50bool HexagonTTIImpl::useHVX()
const {
54bool HexagonTTIImpl::isHVXVectorType(
Type *Ty)
const {
55 auto *VecTy = dyn_cast<VectorType>(Ty);
60 if (ST.
useHVXV69Ops() || !VecTy->getElementType()->isFloatingPointTy())
65unsigned HexagonTTIImpl::getTypeNumElements(
Type *Ty)
const {
66 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty))
67 return VTy->getNumElements();
69 "Expecting scalar type");
91 if (L && L->isInnermost() &&
canPeel(L) &&
108 bool Vector = ClassID == 1;
110 return useHVX() ? 32 : 0;
115 return useHVX() ? 2 : 1;
137 bool IsScalable)
const {
138 assert(!IsScalable &&
"Scalable VFs are not supported for Hexagon");
151 if (ICA.
getID() == Intrinsic::bswap) {
152 std::pair<InstructionCost, MVT> LT =
172 assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
177 if (Opcode == Instruction::Store)
181 if (Src->isVectorTy()) {
184 if (isHVXVectorType(VecTy)) {
188 assert(RegWidth &&
"Non-zero vector register width expected");
190 if (VecWidth % RegWidth == 0)
191 return VecWidth / RegWidth;
193 const Align RegAlign(RegWidth / 8);
194 if (Alignment > RegAlign)
195 Alignment = RegAlign;
196 unsigned AlignWidth = 8 * Alignment.
value();
197 unsigned NumLoads =
alignTo(VecWidth, AlignWidth) / AlignWidth;
207 const Align BoundAlignment = std::min(Alignment,
Align(8));
208 unsigned AlignWidth = 8 * BoundAlignment.
value();
209 unsigned NumLoads =
alignTo(VecWidth, AlignWidth) / AlignWidth;
210 if (Alignment ==
Align(4) || Alignment ==
Align(8))
211 return Cost * NumLoads;
214 unsigned LogA =
Log2(BoundAlignment);
215 return (3 - LogA) *
Cost * NumLoads;
240 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
249 bool UseMaskForCond,
bool UseMaskForGaps)
const {
250 if (Indices.
size() != Factor || UseMaskForCond || UseMaskForGaps)
254 UseMaskForCond, UseMaskForGaps);
266 if (Opcode == Instruction::FCmp)
267 return LT.first +
FloatFactor * getTypeNumElements(ValTy);
270 Op1Info, Op2Info,
I);
280 Op2Info, Args, CxtI);
286 if (LT.second.isFloatingPoint())
287 return LT.first +
FloatFactor * getTypeNumElements(Ty);
298 auto isNonHVXFP = [
this] (
Type *Ty) {
301 if (isNonHVXFP(SrcTy) || isNonHVXFP(DstTy))
311 std::max(SrcLT.first, DstLT.first) +
FloatFactor * (SrcN + DstN);
314 return Cost == 0 ? 0 : 1;
324 const Value *Op1)
const {
325 Type *ElemTy = Val->
isVectorTy() ? cast<VectorType>(Val)->getElementType()
327 if (Opcode == Instruction::InsertElement) {
329 unsigned Cost = (Index != 0) ? 2 : 0;
337 if (Opcode == Instruction::ExtractElement)
371 auto isCastFoldedIntoLoad = [
this](
const CastInst *CI) ->
bool {
372 if (!CI->isIntegerCast())
379 if (DBW != 32 || SBW >= DBW)
382 const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
389 if (
const CastInst *CI = dyn_cast<const CastInst>(U))
390 if (isCastFoldedIntoLoad(CI))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
mir Rename Register Operands
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
Compute a cost of the given call instruction.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
This is the base class for all instructions that perform data casts.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
static constexpr ElementCount getFixed(ScalarTy MinVal)
unsigned getL1PrefetchDistance() const
bool useHVXV69Ops() const
unsigned getVectorLength() const
bool useHVXV68Ops() const
unsigned getL1CacheLineSize() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const override
bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
— Vector TTI begin —
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *S, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override
unsigned getMinVectorRegisterBitWidth() const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
Bias LSR towards creating post-increment opportunities.
bool shouldBuildLookupTables() const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
unsigned getCacheLineSize() const override
unsigned getPrefetchDistance() const override
— Vector TTI end —
static InstructionCost getMax()
Type * getReturnType() const
Intrinsic::ID getID() const
An instruction for reading from memory.
Represents a single loop in the control flow graph.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
LLVM Value Representation.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Base class of all SIMD vector types.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool canPeel(const Loop *L)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
unsigned Log2(Align A)
Returns the log2 of the alignment.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.