21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
45class BlockFrequencyInfo;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
65class TargetLibraryInfo;
121 Type *RetTy =
nullptr;
134 bool TypeBasedOnly =
false);
207class TargetTransformInfo;
338 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
346 const PointersChainInfo &
Info,
Type *AccessTy,
497 std::pair<const Value *, unsigned>
698 KnownBits & Known,
bool &KnownBitsComputed)
const;
705 SimplifyAndSetOp)
const;
749 bool HasBaseReg, int64_t Scale,
751 int64_t ScalableOffset = 0)
const;
827 Align Alignment,
unsigned AddrSpace)
const;
871 unsigned AddrSpace = 0)
const;
915 unsigned ScalarOpdIdx)
const;
933 const APInt &DemandedElts,
934 bool Insert,
bool Extract,
943 ArrayRef<Type *> Tys,
999 bool IsZeroCmp)
const;
1031 unsigned *
Fast =
nullptr)
const;
1216 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1227 Type *ScalarValTy)
const;
1233 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1280 unsigned NumStridedMemAccesses,
1281 unsigned NumPrefetches,
bool HasCall)
const;
1306 std::optional<unsigned> BinOp = std::nullopt)
const;
1335 unsigned Opcode,
Type *Ty,
1339 ArrayRef<const Value *>
Args = {},
const Instruction *CxtI =
nullptr,
1340 const TargetLibraryInfo *TLibInfo =
nullptr)
const;
1350 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
1351 const SmallBitVector &OpcodeMask,
1366 ArrayRef<const Value *>
Args = {},
1367 const Instruction *CxtI =
nullptr)
const;
1418 unsigned Index)
const;
1440 const Instruction *
I =
nullptr)
const;
1449 unsigned Index = -1, Value *Op0 =
nullptr,
1450 Value *Op1 =
nullptr)
const;
1464 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const;
1474 unsigned Index = -1)
const;
1483 const APInt &DemandedDstElts,
1492 const Instruction *
I =
nullptr)
const;
1516 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1530 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1548 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1553 return FMF && !(*FMF).allowReassoc();
1581 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1630 const SCEV *
Ptr =
nullptr)
const;
1654 Type *ExpectedType)
const;
1659 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
1660 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1670 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1672 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1686 unsigned DefaultCallPenalty)
const;
1722 unsigned AddrSpace)
const;
1726 unsigned AddrSpace)
const;
1738 unsigned ChainSizeInBytes,
1744 unsigned ChainSizeInBytes,
1816 Align Alignment)
const;
1906 template <
typename T>
class Model;
1908 std::unique_ptr<Concept> TTIImpl;
1956 virtual std::pair<const Value *, unsigned>
1960 Value *NewV)
const = 0;
1979 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1984 SimplifyAndSetOp) = 0;
1989 int64_t BaseOffset,
bool HasBaseReg,
1990 int64_t Scale,
unsigned AddrSpace,
1992 int64_t ScalableOffset) = 0;
2013 Align Alignment) = 0;
2015 Align Alignment) = 0;
2021 unsigned AddrSpace) = 0;
2033 bool HasBaseReg, int64_t Scale,
2034 unsigned AddrSpace) = 0;
2047 unsigned ScalarOpdIdx) = 0;
2076 unsigned *
Fast) = 0;
2098 Type *Ty =
nullptr)
const = 0;
2108 bool IsScalable)
const = 0;
2109 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
2111 Type *ScalarValTy)
const = 0;
2113 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
2132 unsigned NumStridedMemAccesses,
2133 unsigned NumPrefetches,
2134 bool HasCall)
const = 0;
2159 std::optional<unsigned> BinOp)
const = 0;
2167 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2181 unsigned Index) = 0;
2202 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) = 0;
2206 unsigned Index) = 0;
2210 const APInt &DemandedDstElts,
2228 bool VariableMask,
Align Alignment,
2233 bool VariableMask,
Align Alignment,
2240 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
2243 std::optional<FastMathFlags> FMF,
2270 Type *ExpectedType) = 0;
2273 unsigned DestAddrSpace,
Align SrcAlign,
Align DestAlign,
2274 std::optional<uint32_t> AtomicElementSize)
const = 0;
2278 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2280 std::optional<uint32_t> AtomicCpySize)
const = 0;
2284 unsigned DefaultCallPenalty)
const = 0;
2295 unsigned AddrSpace)
const = 0;
2298 unsigned AddrSpace)
const = 0;
2303 unsigned ChainSizeInBytes,
2306 unsigned ChainSizeInBytes,
2323 Align Alignment)
const = 0;
2342template <
typename T>
2347 Model(
T Impl) : Impl(std::move(Impl)) {}
2348 ~Model()
override =
default;
2350 const DataLayout &getDataLayout()
const override {
2351 return Impl.getDataLayout();
2355 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2356 ArrayRef<const Value *>
Operands, Type *AccessType,
2360 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2362 const PointersChainInfo &
Info,
2367 unsigned getInliningThresholdMultiplier()
const override {
2368 return Impl.getInliningThresholdMultiplier();
2370 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2371 return Impl.adjustInliningThreshold(CB);
2373 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const override {
2374 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2376 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const override {
2377 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2379 int getInliningLastCallToStaticBonus()
const override {
2380 return Impl.getInliningLastCallToStaticBonus();
2382 int getInlinerVectorBonusPercent()
const override {
2383 return Impl.getInlinerVectorBonusPercent();
2385 unsigned getCallerAllocaCost(
const CallBase *CB,
2386 const AllocaInst *AI)
const override {
2387 return Impl.getCallerAllocaCost(CB, AI);
2389 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2390 return Impl.getMemcpyCost(
I);
2393 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2394 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2397 InstructionCost getInstructionCost(
const User *U,
2402 BranchProbability getPredictableBranchThreshold()
override {
2403 return Impl.getPredictableBranchThreshold();
2405 InstructionCost getBranchMispredictPenalty()
override {
2406 return Impl.getBranchMispredictPenalty();
2408 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2409 return Impl.hasBranchDivergence(
F);
2411 bool isSourceOfDivergence(
const Value *V)
override {
2412 return Impl.isSourceOfDivergence(V);
2415 bool isAlwaysUniform(
const Value *V)
override {
2416 return Impl.isAlwaysUniform(V);
2419 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2420 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2423 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2424 return Impl.addrspacesMayAlias(AS0, AS1);
2427 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2429 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2431 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2434 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2435 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2439 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2440 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2443 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2444 return Impl.getAssumedAddrSpace(V);
2447 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2449 std::pair<const Value *, unsigned>
2450 getPredicatedAddrSpace(
const Value *V)
const override {
2451 return Impl.getPredicatedAddrSpace(V);
2454 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *
II, Value *OldV,
2455 Value *NewV)
const override {
2456 return Impl.rewriteIntrinsicWithAddressSpace(
II, OldV, NewV);
2459 bool isLoweredToCall(
const Function *
F)
override {
2460 return Impl.isLoweredToCall(
F);
2462 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2463 UnrollingPreferences &UP,
2464 OptimizationRemarkEmitter *ORE)
override {
2465 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2467 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2468 PeelingPreferences &PP)
override {
2469 return Impl.getPeelingPreferences(L, SE, PP);
2471 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2472 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2473 HardwareLoopInfo &HWLoopInfo)
override {
2474 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2476 unsigned getEpilogueVectorizationMinVF()
override {
2477 return Impl.getEpilogueVectorizationMinVF();
2479 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2480 return Impl.preferPredicateOverEpilogue(TFI);
2483 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2484 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2486 std::optional<Instruction *>
2487 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &
II)
override {
2488 return Impl.instCombineIntrinsic(IC,
II);
2490 std::optional<Value *>
2491 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &
II,
2492 APInt DemandedMask, KnownBits &Known,
2493 bool &KnownBitsComputed)
override {
2494 return Impl.simplifyDemandedUseBitsIntrinsic(IC,
II, DemandedMask, Known,
2497 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2498 InstCombiner &IC, IntrinsicInst &
II, APInt DemandedElts, APInt &UndefElts,
2499 APInt &UndefElts2, APInt &UndefElts3,
2500 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2501 SimplifyAndSetOp)
override {
2502 return Impl.simplifyDemandedVectorEltsIntrinsic(
2503 IC,
II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2506 bool isLegalAddImmediate(int64_t Imm)
override {
2507 return Impl.isLegalAddImmediate(Imm);
2509 bool isLegalAddScalableImmediate(int64_t Imm)
override {
2510 return Impl.isLegalAddScalableImmediate(Imm);
2512 bool isLegalICmpImmediate(int64_t Imm)
override {
2513 return Impl.isLegalICmpImmediate(Imm);
2515 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2516 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2517 Instruction *
I, int64_t ScalableOffset)
override {
2518 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2519 AddrSpace,
I, ScalableOffset);
2521 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2522 const TargetTransformInfo::LSRCost &C2)
override {
2523 return Impl.isLSRCostLess(C1, C2);
2525 bool isNumRegsMajorCostOfLSR()
override {
2526 return Impl.isNumRegsMajorCostOfLSR();
2528 bool shouldDropLSRSolutionIfLessProfitable()
const override {
2529 return Impl.shouldDropLSRSolutionIfLessProfitable();
2531 bool isProfitableLSRChainElement(Instruction *
I)
override {
2532 return Impl.isProfitableLSRChainElement(
I);
2534 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2535 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2536 DominatorTree *DT, AssumptionCache *AC,
2537 TargetLibraryInfo *LibInfo)
override {
2538 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2541 getPreferredAddressingMode(
const Loop *L,
2542 ScalarEvolution *SE)
const override {
2543 return Impl.getPreferredAddressingMode(L, SE);
2545 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2546 return Impl.isLegalMaskedStore(DataType, Alignment);
2548 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2549 return Impl.isLegalMaskedLoad(DataType, Alignment);
2551 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2552 return Impl.isLegalNTStore(DataType, Alignment);
2554 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2555 return Impl.isLegalNTLoad(DataType, Alignment);
2557 bool isLegalBroadcastLoad(Type *ElementTy,
2558 ElementCount NumElements)
const override {
2559 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2561 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2562 return Impl.isLegalMaskedScatter(DataType, Alignment);
2564 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2565 return Impl.isLegalMaskedGather(DataType, Alignment);
2567 bool forceScalarizeMaskedGather(
VectorType *DataType,
2568 Align Alignment)
override {
2569 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2571 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2572 Align Alignment)
override {
2573 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2575 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
override {
2576 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2578 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
override {
2579 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2581 bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
override {
2582 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2584 bool isLegalInterleavedAccessType(
VectorType *VTy,
unsigned Factor,
2586 unsigned AddrSpace)
override {
2587 return Impl.isLegalInterleavedAccessType(VTy, Factor, Alignment, AddrSpace);
2589 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)
override {
2590 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2592 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2593 const SmallBitVector &OpcodeMask)
const override {
2594 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2596 bool enableOrderedReductions()
override {
2597 return Impl.enableOrderedReductions();
2599 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2600 return Impl.hasDivRemOp(DataType, IsSigned);
2602 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2603 return Impl.hasVolatileVariant(
I, AddrSpace);
2605 bool prefersVectorizedAddressing()
override {
2606 return Impl.prefersVectorizedAddressing();
2608 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2609 StackOffset BaseOffset,
bool HasBaseReg,
2611 unsigned AddrSpace)
override {
2612 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2615 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2616 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2617 return Impl.isTruncateFree(Ty1, Ty2);
2619 bool isProfitableToHoist(Instruction *
I)
override {
2620 return Impl.isProfitableToHoist(
I);
2622 bool useAA()
override {
return Impl.useAA(); }
2623 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2624 unsigned getRegUsageForType(Type *Ty)
override {
2625 return Impl.getRegUsageForType(Ty);
2627 bool shouldBuildLookupTables()
override {
2628 return Impl.shouldBuildLookupTables();
2630 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2631 return Impl.shouldBuildLookupTablesForConstant(
C);
2633 bool shouldBuildRelLookupTables()
override {
2634 return Impl.shouldBuildRelLookupTables();
2636 bool useColdCCForColdCall(Function &
F)
override {
2637 return Impl.useColdCCForColdCall(
F);
2639 bool isTargetIntrinsicTriviallyScalarizable(
Intrinsic::ID ID)
override {
2640 return Impl.isTargetIntrinsicTriviallyScalarizable(
ID);
2644 unsigned ScalarOpdIdx)
override {
2645 return Impl.isTargetIntrinsicWithScalarOpAtArg(
ID, ScalarOpdIdx);
2649 int OpdIdx)
override {
2650 return Impl.isTargetIntrinsicWithOverloadTypeAtArg(
ID, OpdIdx);
2653 bool isTargetIntrinsicWithStructReturnOverloadAtField(
Intrinsic::ID ID,
2654 int RetIdx)
override {
2655 return Impl.isTargetIntrinsicWithStructReturnOverloadAtField(
ID, RetIdx);
2658 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2659 const APInt &DemandedElts,
2660 bool Insert,
bool Extract,
2662 ArrayRef<Value *> VL = {})
override {
2663 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2667 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2668 ArrayRef<Type *> Tys,
2670 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2673 bool supportsEfficientVectorElementLoadStore()
override {
2674 return Impl.supportsEfficientVectorElementLoadStore();
2677 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2678 bool supportsTailCallFor(
const CallBase *CB)
override {
2679 return Impl.supportsTailCallFor(CB);
2682 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2683 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2685 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2686 bool IsZeroCmp)
const override {
2687 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2689 bool enableSelectOptimize()
override {
2690 return Impl.enableSelectOptimize();
2692 bool shouldTreatInstructionLikeSelect(
const Instruction *
I)
override {
2693 return Impl.shouldTreatInstructionLikeSelect(
I);
2695 bool enableInterleavedAccessVectorization()
override {
2696 return Impl.enableInterleavedAccessVectorization();
2698 bool enableMaskedInterleavedAccessVectorization()
override {
2699 return Impl.enableMaskedInterleavedAccessVectorization();
2701 bool isFPVectorizationPotentiallyUnsafe()
override {
2702 return Impl.isFPVectorizationPotentiallyUnsafe();
2704 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2706 unsigned *
Fast)
override {
2711 return Impl.getPopcntSupport(IntTyWidthInBit);
2713 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2715 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2716 return Impl.isExpensiveToSpeculativelyExecute(
I);
2719 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2720 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2723 InstructionCost getFPOpCost(Type *Ty)
override {
2724 return Impl.getFPOpCost(Ty);
2727 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2728 const APInt &Imm, Type *Ty)
override {
2729 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2731 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2733 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2735 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2736 const APInt &Imm, Type *Ty,
2738 Instruction *Inst =
nullptr)
override {
2739 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2742 const APInt &Imm, Type *Ty,
2744 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2746 bool preferToKeepConstantsAttached(
const Instruction &Inst,
2747 const Function &Fn)
const override {
2748 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2750 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2751 return Impl.getNumberOfRegisters(ClassID);
2753 bool hasConditionalLoadStoreForType(Type *Ty =
nullptr)
const override {
2754 return Impl.hasConditionalLoadStoreForType(Ty);
2756 unsigned getRegisterClassForType(
bool Vector,
2757 Type *Ty =
nullptr)
const override {
2758 return Impl.getRegisterClassForType(
Vector, Ty);
2760 const char *getRegisterClassName(
unsigned ClassID)
const override {
2761 return Impl.getRegisterClassName(ClassID);
2763 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2764 return Impl.getRegisterBitWidth(K);
2766 unsigned getMinVectorRegisterBitWidth()
const override {
2767 return Impl.getMinVectorRegisterBitWidth();
2769 std::optional<unsigned>
getMaxVScale()
const override {
2770 return Impl.getMaxVScale();
2772 std::optional<unsigned> getVScaleForTuning()
const override {
2773 return Impl.getVScaleForTuning();
2775 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2776 return Impl.isVScaleKnownToBeAPowerOfTwo();
2778 bool shouldMaximizeVectorBandwidth(
2780 return Impl.shouldMaximizeVectorBandwidth(K);
2782 ElementCount getMinimumVF(
unsigned ElemWidth,
2783 bool IsScalable)
const override {
2784 return Impl.getMinimumVF(ElemWidth, IsScalable);
2786 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2787 return Impl.getMaximumVF(ElemWidth, Opcode);
2789 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2790 Type *ScalarValTy)
const override {
2791 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2793 bool shouldConsiderAddressTypePromotion(
2794 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2795 return Impl.shouldConsiderAddressTypePromotion(
2796 I, AllowPromotionWithoutCommonHeader);
2798 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2799 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2800 return Impl.getCacheSize(Level);
2802 std::optional<unsigned>
2803 getCacheAssociativity(
CacheLevel Level)
const override {
2804 return Impl.getCacheAssociativity(Level);
2807 std::optional<unsigned> getMinPageSize()
const override {
2808 return Impl.getMinPageSize();
2813 unsigned getPrefetchDistance()
const override {
2814 return Impl.getPrefetchDistance();
2820 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2821 unsigned NumStridedMemAccesses,
2822 unsigned NumPrefetches,
2823 bool HasCall)
const override {
2824 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2825 NumPrefetches, HasCall);
2831 unsigned getMaxPrefetchIterationsAhead()
const override {
2832 return Impl.getMaxPrefetchIterationsAhead();
2836 bool enableWritePrefetching()
const override {
2837 return Impl.enableWritePrefetching();
2841 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2842 return Impl.shouldPrefetchAddressSpace(AS);
2845 InstructionCost getPartialReductionCost(
2846 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
2849 std::optional<unsigned> BinOp = std::nullopt)
const override {
2850 return Impl.getPartialReductionCost(Opcode, InputTypeA, InputTypeB,
2851 AccumType, VF, OpAExtend, OpBExtend,
2855 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2856 return Impl.getMaxInterleaveFactor(VF);
2858 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2860 ProfileSummaryInfo *PSI,
2861 BlockFrequencyInfo *BFI)
override {
2862 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2864 InstructionCost getArithmeticInstrCost(
2866 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2867 ArrayRef<const Value *> Args,
2868 const Instruction *CxtI =
nullptr)
override {
2869 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2872 InstructionCost getAltInstrCost(
VectorType *VecTy,
unsigned Opcode0,
2874 const SmallBitVector &OpcodeMask,
2876 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask,
CostKind);
2883 ArrayRef<const Value *> Args,
2884 const Instruction *CxtI)
override {
2885 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args,
2888 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2891 const Instruction *
I)
override {
2892 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2894 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2896 unsigned Index)
override {
2897 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2900 const Instruction *
I =
nullptr)
override {
2901 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2903 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2906 OperandValueInfo Op1Info,
2907 OperandValueInfo Op2Info,
2908 const Instruction *
I)
override {
2909 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
2910 Op1Info, Op2Info,
I);
2912 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2914 unsigned Index, Value *Op0,
2915 Value *Op1)
override {
2916 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2918 InstructionCost getVectorInstrCost(
2921 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
override {
2922 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Scalar,
2925 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2927 unsigned Index)
override {
2931 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2932 const APInt &DemandedDstElts,
2934 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2937 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2940 OperandValueInfo OpInfo,
2941 const Instruction *
I)
override {
2945 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2948 const Instruction *
I)
override {
2949 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2952 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2955 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2959 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2960 bool VariableMask, Align Alignment,
2962 const Instruction *
I =
nullptr)
override {
2963 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2967 getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2968 bool VariableMask, Align Alignment,
2970 const Instruction *
I =
nullptr)
override {
2971 return Impl.getStridedMemoryOpCost(Opcode, DataTy,
Ptr, VariableMask,
2974 InstructionCost getInterleavedMemoryOpCost(
2975 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2977 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2978 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2980 UseMaskForCond, UseMaskForGaps);
2983 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2984 std::optional<FastMathFlags> FMF,
2986 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2991 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2994 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2997 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
3001 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
3003 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
3005 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
3007 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
3009 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
3010 ArrayRef<Type *> Tys,
3014 unsigned getNumberOfParts(Type *Tp)
override {
3015 return Impl.getNumberOfParts(Tp);
3017 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
3018 const SCEV *
Ptr)
override {
3019 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
3021 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
3022 return Impl.getCostOfKeepingLiveOverCall(Tys);
3024 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
3025 MemIntrinsicInfo &
Info)
override {
3026 return Impl.getTgtMemIntrinsic(Inst,
Info);
3028 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
3029 return Impl.getAtomicMemIntrinsicMaxElementSize();
3031 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
3032 Type *ExpectedType)
override {
3033 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
3035 Type *getMemcpyLoopLoweringType(
3036 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
3037 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
3038 std::optional<uint32_t> AtomicElementSize)
const override {
3039 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
3040 DestAddrSpace, SrcAlign, DestAlign,
3043 void getMemcpyLoopResidualLoweringType(
3044 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
3045 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
3046 Align SrcAlign, Align DestAlign,
3047 std::optional<uint32_t> AtomicCpySize)
const override {
3048 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
3049 SrcAddrSpace, DestAddrSpace,
3050 SrcAlign, DestAlign, AtomicCpySize);
3053 const Function *Callee)
const override {
3054 return Impl.areInlineCompatible(Caller, Callee);
3056 unsigned getInlineCallPenalty(
const Function *
F,
const CallBase &Call,
3057 unsigned DefaultCallPenalty)
const override {
3058 return Impl.getInlineCallPenalty(
F, Call, DefaultCallPenalty);
3060 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
3061 const ArrayRef<Type *> &Types)
const override {
3062 return Impl.areTypesABICompatible(Caller, Callee, Types);
3065 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
3068 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
3070 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
3071 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
3073 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
3074 return Impl.isLegalToVectorizeLoad(LI);
3076 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
3077 return Impl.isLegalToVectorizeStore(SI);
3079 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
3080 unsigned AddrSpace)
const override {
3081 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
3084 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
3085 unsigned AddrSpace)
const override {
3086 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
3089 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
3090 ElementCount VF)
const override {
3091 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
3093 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
3094 return Impl.isElementTypeLegalForScalableVector(Ty);
3096 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
3097 unsigned ChainSizeInBytes,
3099 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
3101 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
3102 unsigned ChainSizeInBytes,
3104 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
3106 bool preferFixedOverScalableIfEqualCost()
const override {
3107 return Impl.preferFixedOverScalableIfEqualCost();
3109 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
3110 ReductionFlags Flags)
const override {
3111 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
3113 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
3114 ReductionFlags Flags)
const override {
3115 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
3117 bool preferEpilogueVectorization()
const override {
3118 return Impl.preferEpilogueVectorization();
3121 bool shouldExpandReduction(
const IntrinsicInst *
II)
const override {
3122 return Impl.shouldExpandReduction(
II);
3126 getPreferredExpandedReductionShuffle(
const IntrinsicInst *
II)
const override {
3127 return Impl.getPreferredExpandedReductionShuffle(
II);
3130 unsigned getGISelRematGlobalCost()
const override {
3131 return Impl.getGISelRematGlobalCost();
3134 unsigned getMinTripCountTailFoldingThreshold()
const override {
3135 return Impl.getMinTripCountTailFoldingThreshold();
3138 bool supportsScalableVectors()
const override {
3139 return Impl.supportsScalableVectors();
3142 bool enableScalableVectorization()
const override {
3143 return Impl.enableScalableVectorization();
3146 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
3147 Align Alignment)
const override {
3148 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
3151 bool isProfitableToSinkOperands(Instruction *
I,
3152 SmallVectorImpl<Use *> &Ops)
const override {
3153 return Impl.isProfitableToSinkOperands(
I, Ops);
3156 bool isVectorShiftByScalarCheap(Type *Ty)
const override {
3157 return Impl.isVectorShiftByScalarCheap(Ty);
3161 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
3162 return Impl.getVPLegalizationStrategy(PI);
3165 bool hasArmWideBranch(
bool Thumb)
const override {
3166 return Impl.hasArmWideBranch(Thumb);
3169 uint64_t getFeatureMask(
const Function &
F)
const override {
3170 return Impl.getFeatureMask(
F);
3173 bool isMultiversionedFunction(
const Function &
F)
const override {
3174 return Impl.isMultiversionedFunction(
F);
3177 unsigned getMaxNumArgs()
const override {
3178 return Impl.getMaxNumArgs();
3181 unsigned getNumBytesToPadGlobalArray(
unsigned Size,
3186 void collectKernelLaunchBounds(
3188 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB)
const override {
3189 Impl.collectKernelLaunchBounds(
F, LB);
3193template <
typename T>
3195 : TTIImpl(new Model<
T>(Impl)) {}
3226 : TTICallback(Arg.TTICallback) {}
3228 : TTICallback(
std::
move(Arg.TTICallback)) {}
3230 TTICallback =
RHS.TTICallback;
3234 TTICallback = std::move(
RHS.TTICallback);
3266 std::optional<TargetTransformInfo>
TTI;
3268 virtual void anchor();
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL