LLVM 22.0.0git
TargetTransformInfoImpl.h
Go to the documentation of this file.
1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
20#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/Operator.h"
25#include <optional>
26#include <utility>
27
28namespace llvm {
29
30class Function;
31
32/// Base class for use as a mix-in that aids implementing
33/// a TargetTransformInfo-compatible class.
35
36protected:
38
39 const DataLayout &DL;
40
42
43public:
45
46 // Provide value semantics. MSVC requires that we spell all of these out.
49
50 virtual const DataLayout &getDataLayout() const { return DL; }
51
52 // FIXME: It looks like this implementation is dead. All clients appear to
53 // use the (non-const) version from `TargetTransformInfoImplCRTPBase`.
54 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
56 Type *AccessType,
58 // In the basic model, we just assume that all-constant GEPs will be folded
59 // into their uses via addressing modes.
60 for (const Value *Operand : Operands)
61 if (!isa<Constant>(Operand))
62 return TTI::TCC_Basic;
63
64 return TTI::TCC_Free;
65 }
66
67 virtual InstructionCost
69 const TTI::PointersChainInfo &Info, Type *AccessTy,
71 llvm_unreachable("Not implemented");
72 }
73
74 virtual unsigned
75 getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
77 BlockFrequencyInfo *BFI) const {
78 (void)PSI;
79 (void)BFI;
80 JTSize = 0;
81 return SI.getNumCases();
82 }
83
84 virtual InstructionCost
87 llvm_unreachable("Not implemented");
88 }
89
90 virtual unsigned getInliningThresholdMultiplier() const { return 1; }
92 return 8;
93 }
95 return 8;
96 }
98 // This is the value of InlineConstants::LastCallToStaticBonus before it was
99 // removed along with the introduction of this function.
100 return 15000;
101 }
102 virtual unsigned adjustInliningThreshold(const CallBase *CB) const {
103 return 0;
104 }
105 virtual unsigned getCallerAllocaCost(const CallBase *CB,
106 const AllocaInst *AI) const {
107 return 0;
108 };
109
110 virtual int getInlinerVectorBonusPercent() const { return 150; }
111
113 return TTI::TCC_Expensive;
114 }
115
116 virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { return 64; }
117
118 // Although this default value is arbitrary, it is not random. It is assumed
119 // that a condition that evaluates the same way by a higher percentage than
120 // this is best represented as control flow. Therefore, the default value N
121 // should be set such that the win from N% correct executions is greater than
122 // the loss from (100 - N)% mispredicted executions for the majority of
123 // intended targets.
125 return BranchProbability(99, 100);
126 }
127
128 virtual InstructionCost getBranchMispredictPenalty() const { return 0; }
129
130 virtual bool hasBranchDivergence(const Function *F = nullptr) const {
131 return false;
132 }
133
134 virtual bool isSourceOfDivergence(const Value *V) const { return false; }
135
136 virtual bool isAlwaysUniform(const Value *V) const { return false; }
137
138 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
139 return false;
140 }
141
142 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
143 return true;
144 }
145
146 virtual unsigned getFlatAddressSpace() const { return -1; }
147
149 Intrinsic::ID IID) const {
150 return false;
151 }
152
153 virtual bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
154 virtual bool
156 return AS == 0;
157 };
158
159 virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
160
161 virtual bool isSingleThreaded() const { return false; }
162
163 virtual std::pair<const Value *, unsigned>
165 return std::make_pair(nullptr, -1);
166 }
167
169 Value *OldV,
170 Value *NewV) const {
171 return nullptr;
172 }
173
174 virtual bool isLoweredToCall(const Function *F) const {
175 assert(F && "A concrete function must be provided to this routine.");
176
177 // FIXME: These should almost certainly not be handled here, and instead
178 // handled with the help of TLI or the target itself. This was largely
179 // ported from existing analysis heuristics here so that such refactorings
180 // can take place in the future.
181
182 if (F->isIntrinsic())
183 return false;
184
185 if (F->hasLocalLinkage() || !F->hasName())
186 return true;
187
188 StringRef Name = F->getName();
189
190 // These will all likely lower to a single selection DAG node.
191 // clang-format off
192 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
193 Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
194 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
195 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
196 Name == "sin" || Name == "sinf" || Name == "sinl" ||
197 Name == "cos" || Name == "cosf" || Name == "cosl" ||
198 Name == "tan" || Name == "tanf" || Name == "tanl" ||
199 Name == "asin" || Name == "asinf" || Name == "asinl" ||
200 Name == "acos" || Name == "acosf" || Name == "acosl" ||
201 Name == "atan" || Name == "atanf" || Name == "atanl" ||
202 Name == "atan2" || Name == "atan2f" || Name == "atan2l"||
203 Name == "sinh" || Name == "sinhf" || Name == "sinhl" ||
204 Name == "cosh" || Name == "coshf" || Name == "coshl" ||
205 Name == "tanh" || Name == "tanhf" || Name == "tanhl" ||
206 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ||
207 Name == "exp10" || Name == "exp10l" || Name == "exp10f")
208 return false;
209 // clang-format on
210 // These are all likely to be optimized into something smaller.
211 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
212 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
213 Name == "floorf" || Name == "ceil" || Name == "round" ||
214 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
215 Name == "llabs")
216 return false;
217
218 return true;
219 }
220
222 AssumptionCache &AC,
223 TargetLibraryInfo *LibInfo,
224 HardwareLoopInfo &HWLoopInfo) const {
225 return false;
226 }
227
228 virtual unsigned getEpilogueVectorizationMinVF() const { return 16; }
229
231 return false;
232 }
233
234 virtual TailFoldingStyle
235 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
237 }
238
239 virtual std::optional<Instruction *>
241 return std::nullopt;
242 }
243
244 virtual std::optional<Value *>
246 APInt DemandedMask, KnownBits &Known,
247 bool &KnownBitsComputed) const {
248 return std::nullopt;
249 }
250
251 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
252 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
253 APInt &UndefElts2, APInt &UndefElts3,
254 std::function<void(Instruction *, unsigned, APInt, APInt &)>
255 SimplifyAndSetOp) const {
256 return std::nullopt;
257 }
258
261 OptimizationRemarkEmitter *) const {}
262
264 TTI::PeelingPreferences &) const {}
265
266 virtual bool isLegalAddImmediate(int64_t Imm) const { return false; }
267
268 virtual bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
269
270 virtual bool isLegalICmpImmediate(int64_t Imm) const { return false; }
271
272 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
273 int64_t BaseOffset, bool HasBaseReg,
274 int64_t Scale, unsigned AddrSpace,
275 Instruction *I = nullptr,
276 int64_t ScalableOffset = 0) const {
277 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
278 // taken from the implementation of LSR.
279 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
280 }
281
282 virtual bool isLSRCostLess(const TTI::LSRCost &C1,
283 const TTI::LSRCost &C2) const {
284 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
285 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
286 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
287 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
288 }
289
290 virtual bool isNumRegsMajorCostOfLSR() const { return true; }
291
292 virtual bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
293
295 return false;
296 }
297
298 virtual bool canMacroFuseCmp() const { return false; }
299
300 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
302 TargetLibraryInfo *LibInfo) const {
303 return false;
304 }
305
308 return TTI::AMK_None;
309 }
310
311 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment,
312 unsigned AddressSpace) const {
313 return false;
314 }
315
316 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment,
317 unsigned AddressSpace) const {
318 return false;
319 }
320
321 virtual bool isLegalNTStore(Type *DataType, Align Alignment) const {
322 // By default, assume nontemporal memory stores are available for stores
323 // that are aligned and have a size that is a power of 2.
324 unsigned DataSize = DL.getTypeStoreSize(DataType);
325 return Alignment >= DataSize && isPowerOf2_32(DataSize);
326 }
327
328 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const {
329 // By default, assume nontemporal memory loads are available for loads that
330 // are aligned and have a size that is a power of 2.
331 unsigned DataSize = DL.getTypeStoreSize(DataType);
332 return Alignment >= DataSize && isPowerOf2_32(DataSize);
333 }
334
335 virtual bool isLegalBroadcastLoad(Type *ElementTy,
336 ElementCount NumElements) const {
337 return false;
338 }
339
340 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
341 return false;
342 }
343
344 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
345 return false;
346 }
347
349 Align Alignment) const {
350 return false;
351 }
352
354 Align Alignment) const {
355 return false;
356 }
357
358 virtual bool isLegalMaskedCompressStore(Type *DataType,
359 Align Alignment) const {
360 return false;
361 }
362
363 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
364 unsigned Opcode1,
365 const SmallBitVector &OpcodeMask) const {
366 return false;
367 }
368
369 virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const {
370 return false;
372
373 virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const {
374 return false;
375 }
376
377 virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
378 Align Alignment,
379 unsigned AddrSpace) const {
380 return false;
381 }
382
383 virtual bool isLegalMaskedVectorHistogram(Type *AddrType,
384 Type *DataType) const {
385 return false;
386 }
387
388 virtual bool enableOrderedReductions() const { return false; }
389
390 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) const {
391 return false;
392 }
393
394 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
395 return false;
396 }
397
398 virtual bool prefersVectorizedAddressing() const { return true; }
399
401 StackOffset BaseOffset,
402 bool HasBaseReg, int64_t Scale,
403 unsigned AddrSpace) const {
404 // Guess that all legal addressing mode are free.
405 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset.getFixed(), HasBaseReg,
406 Scale, AddrSpace, /*I=*/nullptr,
407 BaseOffset.getScalable()))
408 return 0;
410 }
411
412 virtual bool LSRWithInstrQueries() const { return false; }
413
414 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
415
416 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
417
418 virtual bool useAA() const { return false; }
419
420 virtual bool isTypeLegal(Type *Ty) const { return false; }
421
422 virtual unsigned getRegUsageForType(Type *Ty) const { return 1; }
423
424 virtual bool shouldBuildLookupTables() const { return true; }
425
427 return true;
428 }
429
430 virtual bool shouldBuildRelLookupTables() const { return false; }
431
432 virtual bool useColdCCForColdCall(Function &F) const { return false; }
433
435 return false;
436 }
437
439 unsigned ScalarOpdIdx) const {
440 return false;
441 }
442
444 int OpdIdx) const {
445 return OpdIdx == -1;
446 }
447
448 virtual bool
450 int RetIdx) const {
451 return RetIdx == 0;
452 }
453
455 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
456 TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
457 ArrayRef<Value *> VL = {}) const {
458 return 0;
459 }
460
461 virtual InstructionCost
464 return 0;
465 }
466
467 virtual bool supportsEfficientVectorElementLoadStore() const { return false; }
468
469 virtual bool supportsTailCalls() const { return true; }
470
471 virtual bool supportsTailCallFor(const CallBase *CB) const {
472 llvm_unreachable("Not implemented");
473 }
474
475 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) const {
476 return false;
477 }
478
480 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
481 return {};
482 }
483
484 virtual bool enableSelectOptimize() const { return true; }
485
486 virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const {
487 // A select with two constant operands will usually be better left as a
488 // select.
489 using namespace llvm::PatternMatch;
491 return false;
492 // If the select is a logical-and/logical-or then it is better treated as a
493 // and/or by the backend.
494 return isa<SelectInst>(I) &&
497 }
498
499 virtual bool enableInterleavedAccessVectorization() const { return false; }
500
502 return false;
503 }
504
505 virtual bool isFPVectorizationPotentiallyUnsafe() const { return false; }
506
508 unsigned BitWidth,
509 unsigned AddressSpace,
510 Align Alignment,
511 unsigned *Fast) const {
512 return false;
513 }
514
516 getPopcntSupport(unsigned IntTyWidthInBit) const {
517 return TTI::PSK_Software;
518 }
519
520 virtual bool haveFastSqrt(Type *Ty) const { return false; }
521
523 return true;
524 }
525
526 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
527
528 virtual InstructionCost getFPOpCost(Type *Ty) const {
530 }
531
532 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
533 const APInt &Imm,
534 Type *Ty) const {
535 return 0;
536 }
537
538 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
540 return TTI::TCC_Basic;
541 }
542
543 virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
544 const APInt &Imm, Type *Ty,
546 Instruction *Inst = nullptr) const {
547 return TTI::TCC_Free;
548 }
549
550 virtual InstructionCost
551 getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
552 Type *Ty, TTI::TargetCostKind CostKind) const {
553 return TTI::TCC_Free;
554 }
555
557 const Function &Fn) const {
558 return false;
559 }
560
561 virtual unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
562 virtual bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const {
563 return false;
564 }
565
566 virtual unsigned getRegisterClassForType(bool Vector,
567 Type *Ty = nullptr) const {
568 return Vector ? 1 : 0;
569 }
570
571 virtual const char *getRegisterClassName(unsigned ClassID) const {
572 switch (ClassID) {
573 default:
574 return "Generic::Unknown Register Class";
575 case 0:
576 return "Generic::ScalarRC";
577 case 1:
578 return "Generic::VectorRC";
579 }
580 }
581
582 virtual TypeSize
584 return TypeSize::getFixed(32);
585 }
586
587 virtual unsigned getMinVectorRegisterBitWidth() const { return 128; }
588
589 virtual std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
590 virtual std::optional<unsigned> getVScaleForTuning() const {
591 return std::nullopt;
592 }
593 virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
594
595 virtual bool
597 return false;
598 }
599
600 virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
601 return ElementCount::get(0, IsScalable);
602 }
603
604 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
605 return 0;
606 }
607 virtual unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const {
608 return VF;
609 }
610
612 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
613 AllowPromotionWithoutCommonHeader = false;
614 return false;
615 }
616
617 virtual unsigned getCacheLineSize() const { return 0; }
618 virtual std::optional<unsigned>
620 switch (Level) {
622 [[fallthrough]];
624 return std::nullopt;
625 }
626 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
627 }
628
629 virtual std::optional<unsigned>
631 switch (Level) {
633 [[fallthrough]];
635 return std::nullopt;
636 }
637
638 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
639 }
640
641 virtual std::optional<unsigned> getMinPageSize() const { return {}; }
642
643 virtual unsigned getPrefetchDistance() const { return 0; }
644 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
645 unsigned NumStridedMemAccesses,
646 unsigned NumPrefetches,
647 bool HasCall) const {
648 return 1;
649 }
650 virtual unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
651 virtual bool enableWritePrefetching() const { return false; }
652 virtual bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
653
655 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
657 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
660 }
661
662 virtual unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
663
665 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
667 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) const {
668 // Widenable conditions will eventually lower into constants, so some
669 // operations with them will be trivially optimized away.
670 auto IsWidenableCondition = [](const Value *V) {
671 if (auto *II = dyn_cast<IntrinsicInst>(V))
672 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
673 return true;
674 return false;
675 };
676 // FIXME: A number of transformation tests seem to require these values
677 // which seems a little odd for how arbitary there are.
678 switch (Opcode) {
679 default:
680 break;
681 case Instruction::FDiv:
682 case Instruction::FRem:
683 case Instruction::SDiv:
684 case Instruction::SRem:
685 case Instruction::UDiv:
686 case Instruction::URem:
687 // FIXME: Unlikely to be true for CodeSize.
688 return TTI::TCC_Expensive;
689 case Instruction::And:
690 case Instruction::Or:
691 if (any_of(Args, IsWidenableCondition))
692 return TTI::TCC_Free;
693 break;
694 }
695
696 // Assume a 3cy latency for fp arithmetic ops.
698 if (Ty->getScalarType()->isFloatingPointTy())
699 return 3;
700
701 return 1;
702 }
703
704 virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
705 unsigned Opcode1,
706 const SmallBitVector &OpcodeMask,
709 }
710
711 virtual InstructionCost
714 VectorType *SubTp, ArrayRef<const Value *> Args = {},
715 const Instruction *CxtI = nullptr) const {
716 return 1;
717 }
718
719 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
720 Type *Src, TTI::CastContextHint CCH,
722 const Instruction *I) const {
723 switch (Opcode) {
724 default:
725 break;
726 case Instruction::IntToPtr: {
727 unsigned SrcSize = Src->getScalarSizeInBits();
728 if (DL.isLegalInteger(SrcSize) &&
729 SrcSize <= DL.getPointerTypeSizeInBits(Dst))
730 return 0;
731 break;
732 }
733 case Instruction::PtrToAddr: {
734 unsigned DstSize = Dst->getScalarSizeInBits();
735 assert(DstSize == DL.getAddressSizeInBits(Src));
736 if (DL.isLegalInteger(DstSize))
737 return 0;
738 break;
739 }
740 case Instruction::PtrToInt: {
741 unsigned DstSize = Dst->getScalarSizeInBits();
742 if (DL.isLegalInteger(DstSize) &&
743 DstSize >= DL.getPointerTypeSizeInBits(Src))
744 return 0;
745 break;
746 }
747 case Instruction::BitCast:
748 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
749 // Identity and pointer-to-pointer casts are free.
750 return 0;
751 break;
752 case Instruction::Trunc: {
753 // trunc to a native type is free (assuming the target has compare and
754 // shift-right of the same width).
755 TypeSize DstSize = DL.getTypeSizeInBits(Dst);
756 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
757 return 0;
758 break;
759 }
760 }
761 return 1;
762 }
763
764 virtual InstructionCost
765 getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
766 unsigned Index, TTI::TargetCostKind CostKind) const {
767 return 1;
768 }
769
770 virtual InstructionCost getCFInstrCost(unsigned Opcode,
772 const Instruction *I = nullptr) const {
773 // A phi would be free, unless we're costing the throughput because it
774 // will require a register.
775 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
776 return 0;
777 return 1;
778 }
779
781 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
783 TTI::OperandValueInfo Op2Info, const Instruction *I) const {
784 return 1;
785 }
786
787 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
789 unsigned Index, const Value *Op0,
790 const Value *Op1) const {
791 return 1;
792 }
793
794 /// \param ScalarUserAndIdx encodes the information about extracts from a
795 /// vector with 'Scalar' being the value being extracted,'User' being the user
796 /// of the extract(nullptr if user is not known before vectorization) and
797 /// 'Idx' being the extract lane.
799 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
800 Value *Scalar,
801 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
802 return 1;
803 }
804
807 unsigned Index) const {
808 return 1;
809 }
810
811 virtual InstructionCost
814 unsigned Index) const {
815 return 1;
816 }
817
818 virtual InstructionCost
819 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
820 const APInt &DemandedDstElts,
822 return 1;
823 }
824
825 virtual InstructionCost
828 // Note: The `insertvalue` cost here is chosen to match the default case of
829 // getInstructionCost() -- as prior to adding this helper `insertvalue` was
830 // not handled.
831 if (Opcode == Instruction::InsertValue &&
833 return TTI::TCC_Basic;
834 return TTI::TCC_Free;
835 }
836
837 virtual InstructionCost
838 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
840 TTI::OperandValueInfo OpInfo, const Instruction *I) const {
841 return 1;
842 }
843
844 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
845 Align Alignment,
846 unsigned AddressSpace,
848 const Instruction *I) const {
849 return 1;
850 }
851
852 virtual InstructionCost
853 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
854 unsigned AddressSpace,
856 return 1;
857 }
858
859 virtual InstructionCost
860 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
861 bool VariableMask, Align Alignment,
863 const Instruction *I = nullptr) const {
864 return 1;
865 }
866
868 unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
869 TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const {
870 return 1;
871 }
872
873 virtual InstructionCost
874 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
875 bool VariableMask, Align Alignment,
877 const Instruction *I = nullptr) const {
879 }
880
882 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
883 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
884 bool UseMaskForCond, bool UseMaskForGaps) const {
885 return 1;
886 }
887
888 virtual InstructionCost
891 switch (ICA.getID()) {
892 default:
893 break;
894 case Intrinsic::allow_runtime_check:
895 case Intrinsic::allow_ubsan_check:
896 case Intrinsic::annotation:
897 case Intrinsic::assume:
898 case Intrinsic::sideeffect:
899 case Intrinsic::pseudoprobe:
900 case Intrinsic::arithmetic_fence:
901 case Intrinsic::dbg_assign:
902 case Intrinsic::dbg_declare:
903 case Intrinsic::dbg_value:
904 case Intrinsic::dbg_label:
905 case Intrinsic::invariant_start:
906 case Intrinsic::invariant_end:
907 case Intrinsic::launder_invariant_group:
908 case Intrinsic::strip_invariant_group:
909 case Intrinsic::is_constant:
910 case Intrinsic::lifetime_start:
911 case Intrinsic::lifetime_end:
912 case Intrinsic::experimental_noalias_scope_decl:
913 case Intrinsic::objectsize:
914 case Intrinsic::ptr_annotation:
915 case Intrinsic::var_annotation:
916 case Intrinsic::experimental_gc_result:
917 case Intrinsic::experimental_gc_relocate:
918 case Intrinsic::coro_alloc:
919 case Intrinsic::coro_begin:
920 case Intrinsic::coro_begin_custom_abi:
921 case Intrinsic::coro_free:
922 case Intrinsic::coro_end:
923 case Intrinsic::coro_frame:
924 case Intrinsic::coro_size:
925 case Intrinsic::coro_align:
926 case Intrinsic::coro_suspend:
927 case Intrinsic::coro_subfn_addr:
928 case Intrinsic::threadlocal_address:
929 case Intrinsic::experimental_widenable_condition:
930 case Intrinsic::ssa_copy:
931 // These intrinsics don't actually represent code after lowering.
932 return 0;
933 }
934 return 1;
935 }
936
940 return 1;
941 }
942
943 // Assume that we have a register of the right size for the type.
944 virtual unsigned getNumberOfParts(Type *Tp) const { return 1; }
945
948 const SCEV *,
949 TTI::TargetCostKind) const {
950 return 0;
951 }
952
953 virtual InstructionCost
955 std::optional<FastMathFlags> FMF,
956 TTI::TargetCostKind) const {
957 return 1;
958 }
959
962 TTI::TargetCostKind) const {
963 return 1;
964 }
965
966 virtual InstructionCost
967 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
968 VectorType *Ty, std::optional<FastMathFlags> FMF,
970 return 1;
971 }
972
973 virtual InstructionCost
974 getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
976 return 1;
977 }
978
979 virtual InstructionCost
981 return 0;
982 }
983
985 MemIntrinsicInfo &Info) const {
986 return false;
987 }
988
989 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const {
990 // Note for overrides: You must ensure for all element unordered-atomic
991 // memory intrinsics that all power-of-2 element sizes up to, and
992 // including, the return value of this method have a corresponding
993 // runtime lib call. These runtime lib call definitions can be found
994 // in RuntimeLibcalls.h
995 return 0;
996 }
997
998 virtual Value *
1000 bool CanCreate = true) const {
1001 return nullptr;
1002 }
1003
1004 virtual Type *
1006 unsigned SrcAddrSpace, unsigned DestAddrSpace,
1007 Align SrcAlign, Align DestAlign,
1008 std::optional<uint32_t> AtomicElementSize) const {
1009 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
1011 }
1012
1014 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1015 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1016 Align SrcAlign, Align DestAlign,
1017 std::optional<uint32_t> AtomicCpySize) const {
1018 unsigned OpSizeInBytes = AtomicCpySize.value_or(1);
1019 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
1020 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
1021 OpsOut.push_back(OpType);
1022 }
1023
1024 virtual bool areInlineCompatible(const Function *Caller,
1025 const Function *Callee) const {
1026 return (Caller->getFnAttribute("target-cpu") ==
1027 Callee->getFnAttribute("target-cpu")) &&
1028 (Caller->getFnAttribute("target-features") ==
1029 Callee->getFnAttribute("target-features"));
1030 }
1031
1032 virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1033 unsigned DefaultCallPenalty) const {
1034 return DefaultCallPenalty;
1035 }
1036
1037 virtual bool areTypesABICompatible(const Function *Caller,
1038 const Function *Callee,
1039 const ArrayRef<Type *> &Types) const {
1040 return (Caller->getFnAttribute("target-cpu") ==
1041 Callee->getFnAttribute("target-cpu")) &&
1042 (Caller->getFnAttribute("target-features") ==
1043 Callee->getFnAttribute("target-features"));
1044 }
1045
1047 return false;
1048 }
1049
1051 return false;
1052 }
1053
1054 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
1055 return 128;
1056 }
1057
1058 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
1059
1060 virtual bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
1061
1062 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1063 Align Alignment,
1064 unsigned AddrSpace) const {
1065 return true;
1066 }
1067
1068 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1069 Align Alignment,
1070 unsigned AddrSpace) const {
1071 return true;
1072 }
1073
1075 ElementCount VF) const {
1076 return true;
1077 }
1078
1080 return true;
1081 }
1082
1083 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1084 unsigned ChainSizeInBytes,
1085 VectorType *VecTy) const {
1086 return VF;
1087 }
1088
1089 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1090 unsigned ChainSizeInBytes,
1091 VectorType *VecTy) const {
1092 return VF;
1093 }
1094
1095 virtual bool preferFixedOverScalableIfEqualCost() const { return false; }
1096
1097 virtual bool preferInLoopReduction(RecurKind Kind, Type *Ty) const {
1098 return false;
1099 }
1100 virtual bool preferAlternateOpcodeVectorization() const { return true; }
1101
1102 virtual bool preferPredicatedReductionSelect() const { return false; }
1103
1104 virtual bool preferEpilogueVectorization() const { return true; }
1105
1106 virtual bool shouldExpandReduction(const IntrinsicInst *II) const {
1107 return true;
1108 }
1109
1110 virtual TTI::ReductionShuffle
1113 }
1114
1115 virtual unsigned getGISelRematGlobalCost() const { return 1; }
1116
1117 virtual unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
1118
1119 virtual bool supportsScalableVectors() const { return false; }
1120
1121 virtual bool enableScalableVectorization() const { return false; }
1122
1123 virtual bool hasActiveVectorLength() const { return false; }
1124
1126 SmallVectorImpl<Use *> &Ops) const {
1127 return false;
1128 }
1129
1130 virtual bool isVectorShiftByScalarCheap(Type *Ty) const { return false; }
1131
1135 /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
1136 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
1137 }
1138
1139 virtual bool hasArmWideBranch(bool) const { return false; }
1140
1141 virtual APInt getFeatureMask(const Function &F) const {
1142 return APInt::getZero(32);
1143 }
1144
1145 virtual bool isMultiversionedFunction(const Function &F) const {
1146 return false;
1147 }
1148
1149 virtual unsigned getMaxNumArgs() const { return UINT_MAX; }
1150
1151 virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
1152 Type *ArrayType) const {
1153 return 0;
1154 }
1155
1157 const Function &F,
1158 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
1159
1160 virtual bool allowVectorElementIndexingUsingGEP() const { return true; }
1161
1162protected:
1163 // Obtain the minimum required size to hold the value (without the sign)
1164 // In case of a vector it returns the min required size for one element.
1165 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
1166 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
1167 const auto *VectorValue = cast<Constant>(Val);
1168
1169 // In case of a vector need to pick the max between the min
1170 // required size for each element
1171 auto *VT = cast<FixedVectorType>(Val->getType());
1172
1173 // Assume unsigned elements
1174 isSigned = false;
1175
1176 // The max required size is the size of the vector element type
1177 unsigned MaxRequiredSize =
1178 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
1179
1180 unsigned MinRequiredSize = 0;
1181 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
1182 if (auto *IntElement =
1183 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
1184 bool signedElement = IntElement->getValue().isNegative();
1185 // Get the element min required size.
1186 unsigned ElementMinRequiredSize =
1187 IntElement->getValue().getSignificantBits() - 1;
1188 // In case one element is signed then all the vector is signed.
1189 isSigned |= signedElement;
1190 // Save the max required bit size between all the elements.
1191 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
1192 } else {
1193 // not an int constant element
1194 return MaxRequiredSize;
1195 }
1196 }
1197 return MinRequiredSize;
1198 }
1199
1200 if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
1201 isSigned = CI->getValue().isNegative();
1202 return CI->getValue().getSignificantBits() - 1;
1203 }
1204
1205 if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
1206 isSigned = true;
1207 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1208 }
1209
1210 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1211 isSigned = false;
1212 return Cast->getSrcTy()->getScalarSizeInBits();
1213 }
1214
1215 isSigned = false;
1216 return Val->getType()->getScalarSizeInBits();
1217 }
1218
1219 bool isStridedAccess(const SCEV *Ptr) const {
1220 return Ptr && isa<SCEVAddRecExpr>(Ptr);
1221 }
1222
1224 const SCEV *Ptr) const {
1225 if (!isStridedAccess(Ptr))
1226 return nullptr;
1227 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
1228 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
1229 }
1230
1232 int64_t MergeDistance) const {
1233 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
1234 if (!Step)
1235 return false;
1236 APInt StrideVal = Step->getAPInt();
1237 if (StrideVal.getBitWidth() > 64)
1238 return false;
1239 // FIXME: Need to take absolute value for negative stride case.
1240 return StrideVal.getSExtValue() < MergeDistance;
1241 }
1242};
1243
1244/// CRTP base class for use as a mix-in that aids implementing
1245/// a TargetTransformInfo-compatible class.
1246template <typename T>
1248private:
1250
1251protected:
1253
1254public:
1257 TTI::TargetCostKind CostKind) const override {
1258 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
1259 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
1260 bool HasBaseReg = (BaseGV == nullptr);
1261
1262 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
1263 APInt BaseOffset(PtrSizeBits, 0);
1264 int64_t Scale = 0;
1265
1266 auto GTI = gep_type_begin(PointeeType, Operands);
1267 Type *TargetType = nullptr;
1268
1269 // Handle the case where the GEP instruction has a single operand,
1270 // the basis, therefore TargetType is a nullptr.
1271 if (Operands.empty())
1272 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
1273
1274 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
1275 TargetType = GTI.getIndexedType();
1276 // We assume that the cost of Scalar GEP with constant index and the
1277 // cost of Vector GEP with splat constant index are the same.
1278 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
1279 if (!ConstIdx)
1280 if (auto Splat = getSplatValue(*I))
1281 ConstIdx = dyn_cast<ConstantInt>(Splat);
1282 if (StructType *STy = GTI.getStructTypeOrNull()) {
1283 // For structures the index is always splat or scalar constant
1284 assert(ConstIdx && "Unexpected GEP index");
1285 uint64_t Field = ConstIdx->getZExtValue();
1286 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
1287 } else {
1288 // If this operand is a scalable type, bail out early.
1289 // TODO: Make isLegalAddressingMode TypeSize aware.
1290 if (TargetType->isScalableTy())
1291 return TTI::TCC_Basic;
1292 int64_t ElementSize =
1293 GTI.getSequentialElementStride(DL).getFixedValue();
1294 if (ConstIdx) {
1295 BaseOffset +=
1296 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
1297 } else {
1298 // Needs scale register.
1299 if (Scale != 0)
1300 // No addressing mode takes two scale registers.
1301 return TTI::TCC_Basic;
1302 Scale = ElementSize;
1303 }
1304 }
1305 }
1306
1307 // If we haven't been provided a hint, use the target type for now.
1308 //
1309 // TODO: Take a look at potentially removing this: This is *slightly* wrong
1310 // as it's possible to have a GEP with a foldable target type but a memory
1311 // access that isn't foldable. For example, this load isn't foldable on
1312 // RISC-V:
1313 //
1314 // %p = getelementptr i32, ptr %base, i32 42
1315 // %x = load <2 x i32>, ptr %p
1316 if (!AccessType)
1317 AccessType = TargetType;
1318
1319 // If the final address of the GEP is a legal addressing mode for the given
1320 // access type, then we can fold it into its users.
1321 if (static_cast<const T *>(this)->isLegalAddressingMode(
1322 AccessType, const_cast<GlobalValue *>(BaseGV),
1323 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
1324 Ptr->getType()->getPointerAddressSpace()))
1325 return TTI::TCC_Free;
1326
1327 // TODO: Instead of returning TCC_Basic here, we should use
1328 // getArithmeticInstrCost. Or better yet, provide a hook to let the target
1329 // model it.
1330 return TTI::TCC_Basic;
1331 }
1332
1335 const TTI::PointersChainInfo &Info, Type *AccessTy,
1336 TTI::TargetCostKind CostKind) const override {
1338 // In the basic model we take into account GEP instructions only
1339 // (although here can come alloca instruction, a value, constants and/or
1340 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
1341 // pointer). Typically, if Base is a not a GEP-instruction and all the
1342 // pointers are relative to the same base address, all the rest are
1343 // either GEP instructions, PHIs, bitcasts or constants. When we have same
1344 // base, we just calculate cost of each non-Base GEP as an ADD operation if
1345 // any their index is a non-const.
1346 // If no known dependecies between the pointers cost is calculated as a sum
1347 // of costs of GEP instructions.
1348 for (const Value *V : Ptrs) {
1349 const auto *GEP = dyn_cast<GetElementPtrInst>(V);
1350 if (!GEP)
1351 continue;
1352 if (Info.isSameBase() && V != Base) {
1353 if (GEP->hasAllConstantIndices())
1354 continue;
1355 Cost += static_cast<const T *>(this)->getArithmeticInstrCost(
1356 Instruction::Add, GEP->getType(), CostKind,
1358 {});
1359 } else {
1360 SmallVector<const Value *> Indices(GEP->indices());
1361 Cost += static_cast<const T *>(this)->getGEPCost(
1362 GEP->getSourceElementType(), GEP->getPointerOperand(), Indices,
1363 AccessTy, CostKind);
1364 }
1365 }
1366 return Cost;
1367 }
1368
1371 TTI::TargetCostKind CostKind) const override {
1372 using namespace llvm::PatternMatch;
1373
1374 auto *TargetTTI = static_cast<const T *>(this);
1375 // Handle non-intrinsic calls, invokes, and callbr.
1376 // FIXME: Unlikely to be true for anything but CodeSize.
1377 auto *CB = dyn_cast<CallBase>(U);
1378 if (CB && !isa<IntrinsicInst>(U)) {
1379 if (const Function *F = CB->getCalledFunction()) {
1380 if (!TargetTTI->isLoweredToCall(F))
1381 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1382
1383 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1384 }
1385 // For indirect or other calls, scale cost by number of arguments.
1386 return TTI::TCC_Basic * (CB->arg_size() + 1);
1387 }
1388
1389 Type *Ty = U->getType();
1390 unsigned Opcode = Operator::getOpcode(U);
1391 auto *I = dyn_cast<Instruction>(U);
1392 switch (Opcode) {
1393 default:
1394 break;
1395 case Instruction::Call: {
1396 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1397 auto *Intrinsic = cast<IntrinsicInst>(U);
1398 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1399 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1400 }
1401 case Instruction::Br:
1402 case Instruction::Ret:
1403 case Instruction::PHI:
1404 case Instruction::Switch:
1405 return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1406 case Instruction::Freeze:
1407 return TTI::TCC_Free;
1408 case Instruction::ExtractValue:
1409 case Instruction::InsertValue:
1410 return TargetTTI->getInsertExtractValueCost(Opcode, CostKind);
1411 case Instruction::Alloca:
1412 if (cast<AllocaInst>(U)->isStaticAlloca())
1413 return TTI::TCC_Free;
1414 break;
1415 case Instruction::GetElementPtr: {
1416 const auto *GEP = cast<GEPOperator>(U);
1417 Type *AccessType = nullptr;
1418 // For now, only provide the AccessType in the simple case where the GEP
1419 // only has one user.
1420 if (GEP->hasOneUser() && I)
1421 AccessType = I->user_back()->getAccessType();
1422
1423 return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1424 Operands.front(), Operands.drop_front(),
1425 AccessType, CostKind);
1426 }
1427 case Instruction::Add:
1428 case Instruction::FAdd:
1429 case Instruction::Sub:
1430 case Instruction::FSub:
1431 case Instruction::Mul:
1432 case Instruction::FMul:
1433 case Instruction::UDiv:
1434 case Instruction::SDiv:
1435 case Instruction::FDiv:
1436 case Instruction::URem:
1437 case Instruction::SRem:
1438 case Instruction::FRem:
1439 case Instruction::Shl:
1440 case Instruction::LShr:
1441 case Instruction::AShr:
1442 case Instruction::And:
1443 case Instruction::Or:
1444 case Instruction::Xor:
1445 case Instruction::FNeg: {
1447 TTI::OperandValueInfo Op2Info;
1448 if (Opcode != Instruction::FNeg)
1449 Op2Info = TTI::getOperandInfo(Operands[1]);
1450 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1451 Op2Info, Operands, I);
1452 }
1453 case Instruction::IntToPtr:
1454 case Instruction::PtrToAddr:
1455 case Instruction::PtrToInt:
1456 case Instruction::SIToFP:
1457 case Instruction::UIToFP:
1458 case Instruction::FPToUI:
1459 case Instruction::FPToSI:
1460 case Instruction::Trunc:
1461 case Instruction::FPTrunc:
1462 case Instruction::BitCast:
1463 case Instruction::FPExt:
1464 case Instruction::SExt:
1465 case Instruction::ZExt:
1466 case Instruction::AddrSpaceCast: {
1467 Type *OpTy = Operands[0]->getType();
1468 return TargetTTI->getCastInstrCost(
1469 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1470 }
1471 case Instruction::Store: {
1472 auto *SI = cast<StoreInst>(U);
1473 Type *ValTy = Operands[0]->getType();
1475 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1476 SI->getPointerAddressSpace(), CostKind,
1477 OpInfo, I);
1478 }
1479 case Instruction::Load: {
1480 // FIXME: Arbitary cost which could come from the backend.
1482 return 4;
1483 auto *LI = cast<LoadInst>(U);
1484 Type *LoadType = U->getType();
1485 // If there is a non-register sized type, the cost estimation may expand
1486 // it to be several instructions to load into multiple registers on the
1487 // target. But, if the only use of the load is a trunc instruction to a
1488 // register sized type, the instruction selector can combine these
1489 // instructions to be a single load. So, in this case, we use the
1490 // destination type of the trunc instruction rather than the load to
1491 // accurately estimate the cost of this load instruction.
1492 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1493 !LoadType->isVectorTy()) {
1494 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1495 LoadType = TI->getDestTy();
1496 }
1497 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1499 {TTI::OK_AnyValue, TTI::OP_None}, I);
1500 }
1501 case Instruction::Select: {
1502 const Value *Op0, *Op1;
1503 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
1504 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
1505 // select x, y, false --> x & y
1506 // select x, true, y --> x | y
1507 const auto Op1Info = TTI::getOperandInfo(Op0);
1508 const auto Op2Info = TTI::getOperandInfo(Op1);
1509 assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1510 Op1->getType()->getScalarSizeInBits() == 1);
1511
1513 return TargetTTI->getArithmeticInstrCost(
1514 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1515 CostKind, Op1Info, Op2Info, Operands, I);
1516 }
1517 const auto Op1Info = TTI::getOperandInfo(Operands[1]);
1518 const auto Op2Info = TTI::getOperandInfo(Operands[2]);
1519 Type *CondTy = Operands[0]->getType();
1520 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1522 CostKind, Op1Info, Op2Info, I);
1523 }
1524 case Instruction::ICmp:
1525 case Instruction::FCmp: {
1526 const auto Op1Info = TTI::getOperandInfo(Operands[0]);
1527 const auto Op2Info = TTI::getOperandInfo(Operands[1]);
1528 Type *ValTy = Operands[0]->getType();
1529 // TODO: Also handle ICmp/FCmp constant expressions.
1530 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1531 I ? cast<CmpInst>(I)->getPredicate()
1533 CostKind, Op1Info, Op2Info, I);
1534 }
1535 case Instruction::InsertElement: {
1536 auto *IE = dyn_cast<InsertElementInst>(U);
1537 if (!IE)
1538 return TTI::TCC_Basic; // FIXME
1539 unsigned Idx = -1;
1540 if (auto *CI = dyn_cast<ConstantInt>(Operands[2]))
1541 if (CI->getValue().getActiveBits() <= 32)
1542 Idx = CI->getZExtValue();
1543 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1544 }
1545 case Instruction::ShuffleVector: {
1546 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1547 if (!Shuffle)
1548 return TTI::TCC_Basic; // FIXME
1549
1550 auto *VecTy = cast<VectorType>(U->getType());
1551 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
1552 ArrayRef<int> Mask = Shuffle->getShuffleMask();
1553 int NumSubElts, SubIndex;
1554
1555 // Treat undef/poison mask as free (no matter the length).
1556 if (all_of(Mask, [](int M) { return M < 0; }))
1557 return TTI::TCC_Free;
1558
1559 // TODO: move more of this inside improveShuffleKindFromMask.
1560 if (Shuffle->changesLength()) {
1561 // Treat a 'subvector widening' as a free shuffle.
1562 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1563 return TTI::TCC_Free;
1564
1565 if (Shuffle->isExtractSubvectorMask(SubIndex))
1566 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
1567 VecSrcTy, Mask, CostKind, SubIndex,
1568 VecTy, Operands, Shuffle);
1569
1570 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1571 return TargetTTI->getShuffleCost(
1572 TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind,
1573 SubIndex,
1574 FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1575 Operands, Shuffle);
1576
1577 int ReplicationFactor, VF;
1578 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1579 APInt DemandedDstElts = APInt::getZero(Mask.size());
1580 for (auto I : enumerate(Mask)) {
1581 if (I.value() != PoisonMaskElem)
1582 DemandedDstElts.setBit(I.index());
1583 }
1584 return TargetTTI->getReplicationShuffleCost(
1585 VecSrcTy->getElementType(), ReplicationFactor, VF,
1586 DemandedDstElts, CostKind);
1587 }
1588
1589 bool IsUnary = isa<UndefValue>(Operands[1]);
1590 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1591 SmallVector<int, 16> AdjustMask(Mask);
1592
1593 // Widening shuffle - widening the source(s) to the new length
1594 // (treated as free - see above), and then perform the adjusted
1595 // shuffle at that width.
1596 if (Shuffle->increasesLength()) {
1597 for (int &M : AdjustMask)
1598 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1599
1600 return TargetTTI->getShuffleCost(
1602 VecTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1603 }
1604
1605 // Narrowing shuffle - perform shuffle at original wider width and
1606 // then extract the lower elements.
1607 // FIXME: This can assume widening, which is not true of all vector
1608 // architectures (and is not even the default).
1609 AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
1610
1611 InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
1613 VecSrcTy, VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands,
1614 Shuffle);
1615
1616 SmallVector<int, 16> ExtractMask(Mask.size());
1617 std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
1618 return ShuffleCost + TargetTTI->getShuffleCost(
1619 TTI::SK_ExtractSubvector, VecTy, VecSrcTy,
1620 ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
1621 }
1622
1623 if (Shuffle->isIdentity())
1624 return TTI::TCC_Free;
1625
1626 if (Shuffle->isReverse())
1627 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, VecSrcTy, Mask,
1628 CostKind, 0, nullptr, Operands,
1629 Shuffle);
1630
1631 if (Shuffle->isTranspose())
1632 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, VecSrcTy,
1633 Mask, CostKind, 0, nullptr, Operands,
1634 Shuffle);
1635
1636 if (Shuffle->isZeroEltSplat())
1637 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, VecSrcTy,
1638 Mask, CostKind, 0, nullptr, Operands,
1639 Shuffle);
1640
1641 if (Shuffle->isSingleSource())
1642 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1643 VecSrcTy, Mask, CostKind, 0, nullptr,
1644 Operands, Shuffle);
1645
1646 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1647 return TargetTTI->getShuffleCost(
1648 TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind, SubIndex,
1649 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
1650 Shuffle);
1651
1652 if (Shuffle->isSelect())
1653 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, VecSrcTy, Mask,
1654 CostKind, 0, nullptr, Operands,
1655 Shuffle);
1656
1657 if (Shuffle->isSplice(SubIndex))
1658 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, VecSrcTy, Mask,
1659 CostKind, SubIndex, nullptr, Operands,
1660 Shuffle);
1661
1662 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, VecSrcTy,
1663 Mask, CostKind, 0, nullptr, Operands,
1664 Shuffle);
1665 }
1666 case Instruction::ExtractElement: {
1667 auto *EEI = dyn_cast<ExtractElementInst>(U);
1668 if (!EEI)
1669 return TTI::TCC_Basic; // FIXME
1670 unsigned Idx = -1;
1671 if (auto *CI = dyn_cast<ConstantInt>(Operands[1]))
1672 if (CI->getValue().getActiveBits() <= 32)
1673 Idx = CI->getZExtValue();
1674 Type *DstTy = Operands[0]->getType();
1675 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1676 }
1677 }
1678
1679 // By default, just classify everything remaining as 'basic'.
1680 return TTI::TCC_Basic;
1681 }
1682
1684 auto *TargetTTI = static_cast<const T *>(this);
1685 SmallVector<const Value *, 4> Ops(I->operand_values());
1686 InstructionCost Cost = TargetTTI->getInstructionCost(
1689 }
1690
1691 bool supportsTailCallFor(const CallBase *CB) const override {
1692 return static_cast<const T *>(this)->supportsTailCalls();
1693 }
1694};
1695} // namespace llvm
1696
1697#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint32_t Index
uint64_t Size
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:78
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1041
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
an instruction to allocate memory on the stack
Definition: Instructions.h:64
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Class to represent array types.
Definition: DerivedTypes.h:398
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:154
This is an important base class in LLVM.
Definition: Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getAddressSizeInBits(unsigned AS) const
The size in bits of an address in for the given AS.
Definition: DataLayout.h:407
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:220
LLVM_ABI const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:708
LLVM_ABI unsigned getPointerTypeSizeInBits(Type *) const
The pointer representation size in bits for this type.
Definition: DataLayout.cpp:742
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:674
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:468
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:318
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:22
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:803
The core instruction combiner logic.
Definition: InstCombiner.h:48
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
An instruction for reading from memory.
Definition: Instructions.h:180
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:43
The optimization diagnostic interface.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:90
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:34
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:44
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:43
An instruction for storing to memory.
Definition: Instructions.h:296
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:657
Class to represent struct types.
Definition: DerivedTypes.h:218
Multiway switch.
Provides information about what library functions are available for the current target.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const
virtual bool preferAlternateOpcodeVectorization() const
virtual bool isProfitableLSRChainElement(Instruction *I) const
virtual unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
virtual int getInliningLastCallToStaticBonus() const
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
virtual bool preferFixedOverScalableIfEqualCost() const
virtual const DataLayout & getDataLayout() const
virtual std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
virtual bool enableInterleavedAccessVectorization() const
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
virtual InstructionCost getFPOpCost(Type *Ty) const
virtual unsigned getMaxInterleaveFactor(ElementCount VF) const
virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
virtual TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isStridedAccess(const SCEV *Ptr) const
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace) const
virtual TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) const
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
virtual InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
virtual bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty) const
virtual unsigned adjustInliningThreshold(const CallBase *CB) const
virtual InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
virtual bool shouldDropLSRSolutionIfLessProfitable() const
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) const
virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
virtual unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const
virtual bool isLegalICmpImmediate(int64_t Imm) const
virtual bool preferPredicatedReductionSelect() const
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo, const Instruction *I) const
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
virtual bool haveFastSqrt(Type *Ty) const
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
virtual std::optional< unsigned > getVScaleForTuning() const
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
virtual unsigned getNumberOfParts(Type *Tp) const
virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
virtual void getPeelingPreferences(Loop *, ScalarEvolution &, TTI::PeelingPreferences &) const
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
virtual bool useColdCCForColdCall(Function &F) const
virtual InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
virtual unsigned getNumberOfRegisters(unsigned ClassID) const
virtual unsigned getPrefetchDistance() const
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
virtual bool isLegalAddScalableImmediate(int64_t Imm) const
virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace) const
TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg)
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment) const
virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
virtual unsigned getMinVectorRegisterBitWidth() const
unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const
virtual bool shouldBuildLookupTablesForConstant(Constant *C) const
virtual bool isFPVectorizationPotentiallyUnsafe() const
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const
virtual InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
virtual std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const
virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
virtual unsigned getEpilogueVectorizationMinVF() const
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const
virtual TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
virtual TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const
virtual unsigned getMaxPrefetchIterationsAhead() const
virtual bool allowVectorElementIndexingUsingGEP() const
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const
virtual TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
const SCEVConstant * getConstantStrideStep(ScalarEvolution *SE, const SCEV *Ptr) const
virtual bool hasBranchDivergence(const Function *F=nullptr) const
virtual InstructionCost getArithmeticReductionCost(unsigned, VectorType *, std::optional< FastMathFlags > FMF, TTI::TargetCostKind) const
virtual bool isProfitableToHoist(Instruction *I) const
virtual const char * getRegisterClassName(unsigned ClassID) const
virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, FastMathFlags, TTI::TargetCostKind) const
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const
virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
virtual bool isVectorShiftByScalarCheap(Type *Ty) const
virtual bool isLegalNTStore(Type *DataType, Align Alignment) const
virtual InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
virtual APInt getFeatureMask(const Function &F) const
virtual std::optional< unsigned > getMinPageSize() const
virtual unsigned getRegUsageForType(Type *Ty) const
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const
virtual bool isLoweredToCall(const Function *F) const
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
virtual bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty) const
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const
virtual BranchProbability getPredictableBranchThreshold() const
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const
virtual bool isLegalToVectorizeStore(StoreInst *SI) const
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const
virtual bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const
virtual bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const
virtual bool preferInLoopReduction(RecurKind Kind, Type *Ty) const
virtual bool isMultiversionedFunction(const Function &F) const
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
virtual bool isNoopAddrSpaceCast(unsigned, unsigned) const
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
virtual bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) const
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
virtual bool isLegalAddImmediate(int64_t Imm) const
virtual InstructionCost getInsertExtractValueCost(unsigned Opcode, TTI::TargetCostKind CostKind) const
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const
virtual InstructionCost getBranchMispredictPenalty() const
virtual bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const
virtual bool enableMaskedInterleavedAccessVectorization() const
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const
virtual unsigned getInliningThresholdMultiplier() const
TargetTransformInfoImplBase(const DataLayout &DL)
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const
virtual bool isAlwaysUniform(const Value *V) const
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, const Instruction *I) const
virtual bool shouldExpandReduction(const IntrinsicInst *II) const
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
virtual unsigned getGISelRematGlobalCost() const
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const
virtual bool isTypeLegal(Type *Ty) const
virtual unsigned getAssumedAddrSpace(const Value *V) const
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar, ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx) const
virtual unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const
virtual bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
virtual InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace) const
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
virtual bool supportsTailCallFor(const CallBase *CB) const
virtual std::optional< unsigned > getMaxVScale() const
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const
virtual unsigned getFlatAddressSpace() const
virtual bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const
virtual InstructionCost getMemcpyCost(const Instruction *I) const
virtual unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
virtual bool isSourceOfDivergence(const Value *V) const
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
virtual void getUnrollingPreferences(Loop *, ScalarEvolution &, TTI::UnrollingPreferences &, OptimizationRemarkEmitter *) const
TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)=default
virtual bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
virtual bool supportsEfficientVectorElementLoadStore() const
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
virtual unsigned getMinTripCountTailFoldingThreshold() const
virtual TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
virtual void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
bool supportsTailCallFor(const CallBase *CB) const override
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const override
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static LLVM_ABI CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
MemIndexedMode
The type of load/store indexing.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
CacheLevel
The possible cache levels.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
Base class of all SIMD vector types.
Definition: DerivedTypes.h:430
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
constexpr int PoisonMaskElem
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
gep_type_iterator gep_type_begin(const User *GEP)
InstructionCost Cost
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Parameters that control the generic loop unrolling transformation.