LLVM 21.0.0git
TargetTransformInfoImpl.h
Go to the documentation of this file.
1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
20#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/Operator.h"
25#include <optional>
26#include <utility>
27
28namespace llvm {
29
30class Function;
31
32/// Base class for use as a mix-in that aids implementing
33/// a TargetTransformInfo-compatible class.
35
36protected:
38
39 const DataLayout &DL;
40
42
43public:
44 // Provide value semantics. MSVC requires that we spell all of these out.
47
48 const DataLayout &getDataLayout() const { return DL; }
49
50 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
53 // In the basic model, we just assume that all-constant GEPs will be folded
54 // into their uses via addressing modes.
55 for (const Value *Operand : Operands)
56 if (!isa<Constant>(Operand))
57 return TTI::TCC_Basic;
58
59 return TTI::TCC_Free;
60 }
61
63 unsigned &JTSize,
65 BlockFrequencyInfo *BFI) const {
66 (void)PSI;
67 (void)BFI;
68 JTSize = 0;
69 return SI.getNumCases();
70 }
71
72 unsigned getInliningThresholdMultiplier() const { return 1; }
75 return 8;
76 }
78 // This is the value of InlineConstants::LastCallToStaticBonus before it was
79 // removed along with the introduction of this function.
80 return 15000;
81 }
82 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
83 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
84 return 0;
85 };
86
87 int getInlinerVectorBonusPercent() const { return 150; }
88
90 return TTI::TCC_Expensive;
91 }
92
94 return 64;
95 }
96
97 // Although this default value is arbitrary, it is not random. It is assumed
98 // that a condition that evaluates the same way by a higher percentage than
99 // this is best represented as control flow. Therefore, the default value N
100 // should be set such that the win from N% correct executions is greater than
101 // the loss from (100 - N)% mispredicted executions for the majority of
102 // intended targets.
104 return BranchProbability(99, 100);
105 }
106
108
109 bool hasBranchDivergence(const Function *F = nullptr) const { return false; }
110
111 bool isSourceOfDivergence(const Value *V) const { return false; }
112
113 bool isAlwaysUniform(const Value *V) const { return false; }
114
115 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
116 return false;
117 }
118
119 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
120 return true;
121 }
122
123 unsigned getFlatAddressSpace() const { return -1; }
124
126 Intrinsic::ID IID) const {
127 return false;
128 }
129
130 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
132 return AS == 0;
133 };
134
135 unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
136
137 bool isSingleThreaded() const { return false; }
138
139 std::pair<const Value *, unsigned>
141 return std::make_pair(nullptr, -1);
142 }
143
145 Value *NewV) const {
146 return nullptr;
147 }
148
149 bool isLoweredToCall(const Function *F) const {
150 assert(F && "A concrete function must be provided to this routine.");
151
152 // FIXME: These should almost certainly not be handled here, and instead
153 // handled with the help of TLI or the target itself. This was largely
154 // ported from existing analysis heuristics here so that such refactorings
155 // can take place in the future.
156
157 if (F->isIntrinsic())
158 return false;
159
160 if (F->hasLocalLinkage() || !F->hasName())
161 return true;
162
163 StringRef Name = F->getName();
164
165 // These will all likely lower to a single selection DAG node.
166 // clang-format off
167 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
168 Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
169 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
170 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
171 Name == "sin" || Name == "sinf" || Name == "sinl" ||
172 Name == "cos" || Name == "cosf" || Name == "cosl" ||
173 Name == "tan" || Name == "tanf" || Name == "tanl" ||
174 Name == "asin" || Name == "asinf" || Name == "asinl" ||
175 Name == "acos" || Name == "acosf" || Name == "acosl" ||
176 Name == "atan" || Name == "atanf" || Name == "atanl" ||
177 Name == "atan2" || Name == "atan2f" || Name == "atan2l"||
178 Name == "sinh" || Name == "sinhf" || Name == "sinhl" ||
179 Name == "cosh" || Name == "coshf" || Name == "coshl" ||
180 Name == "tanh" || Name == "tanhf" || Name == "tanhl" ||
181 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ||
182 Name == "exp10" || Name == "exp10l" || Name == "exp10f")
183 return false;
184 // clang-format on
185 // These are all likely to be optimized into something smaller.
186 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
187 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
188 Name == "floorf" || Name == "ceil" || Name == "round" ||
189 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
190 Name == "llabs")
191 return false;
192
193 return true;
194 }
195
198 HardwareLoopInfo &HWLoopInfo) const {
199 return false;
200 }
201
202 unsigned getEpilogueVectorizationMinVF() const { return 16; }
203
204 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; }
205
207 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
209 }
210
211 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
212 IntrinsicInst &II) const {
213 return std::nullopt;
214 }
215
216 std::optional<Value *>
218 APInt DemandedMask, KnownBits &Known,
219 bool &KnownBitsComputed) const {
220 return std::nullopt;
221 }
222
224 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
225 APInt &UndefElts2, APInt &UndefElts3,
226 std::function<void(Instruction *, unsigned, APInt, APInt &)>
227 SimplifyAndSetOp) const {
228 return std::nullopt;
229 }
230
233 OptimizationRemarkEmitter *) const {}
234
236 TTI::PeelingPreferences &) const {}
237
238 bool isLegalAddImmediate(int64_t Imm) const { return false; }
239
240 bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
241
242 bool isLegalICmpImmediate(int64_t Imm) const { return false; }
243
244 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
245 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
246 Instruction *I = nullptr,
247 int64_t ScalableOffset = 0) const {
248 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
249 // taken from the implementation of LSR.
250 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
251 }
252
253 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
254 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
255 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
256 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
257 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
258 }
259
260 bool isNumRegsMajorCostOfLSR() const { return true; }
261
262 bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
263
264 bool isProfitableLSRChainElement(Instruction *I) const { return false; }
265
266 bool canMacroFuseCmp() const { return false; }
267
270 TargetLibraryInfo *LibInfo) const {
271 return false;
272 }
273
276 return TTI::AMK_None;
277 }
278
279 bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
280 return false;
281 }
282
283 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
284 return false;
285 }
286
287 bool isLegalNTStore(Type *DataType, Align Alignment) const {
288 // By default, assume nontemporal memory stores are available for stores
289 // that are aligned and have a size that is a power of 2.
290 unsigned DataSize = DL.getTypeStoreSize(DataType);
291 return Alignment >= DataSize && isPowerOf2_32(DataSize);
292 }
294 bool isLegalNTLoad(Type *DataType, Align Alignment) const {
295 // By default, assume nontemporal memory loads are available for loads that
296 // are aligned and have a size that is a power of 2.
297 unsigned DataSize = DL.getTypeStoreSize(DataType);
298 return Alignment >= DataSize && isPowerOf2_32(DataSize);
299 }
300
301 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
302 return false;
303 }
304
305 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
306 return false;
307 }
308
309 bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
310 return false;
311 }
312
313 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
314 return false;
315 }
316
318 Align Alignment) const {
319 return false;
320 }
321
322 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const {
323 return false;
324 }
325
326 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
327 const SmallBitVector &OpcodeMask) const {
328 return false;
329 }
330
331 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const {
332 return false;
333 }
334
335 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const {
336 return false;
337 }
338
339 bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
340 Align Alignment, unsigned AddrSpace) {
341 return false;
342 }
343
344 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const {
345 return false;
346 }
347
348 bool enableOrderedReductions() const { return false; }
349
350 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
351
352 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
353 return false;
354 }
355
356 bool prefersVectorizedAddressing() const { return true; }
357
359 StackOffset BaseOffset, bool HasBaseReg,
360 int64_t Scale,
361 unsigned AddrSpace) const {
362 // Guess that all legal addressing mode are free.
363 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset.getFixed(), HasBaseReg,
364 Scale, AddrSpace, /*I=*/nullptr,
365 BaseOffset.getScalable()))
366 return 0;
367 return -1;
368 }
369
370 bool LSRWithInstrQueries() const { return false; }
371
372 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
373
374 bool isProfitableToHoist(Instruction *I) const { return true; }
375
376 bool useAA() const { return false; }
377
378 bool isTypeLegal(Type *Ty) const { return false; }
379
380 unsigned getRegUsageForType(Type *Ty) const { return 1; }
381
382 bool shouldBuildLookupTables() const { return true; }
383
384 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
385
386 bool shouldBuildRelLookupTables() const { return false; }
387
388 bool useColdCCForColdCall(Function &F) const { return false; }
389
391 return false;
392 }
393
395 unsigned ScalarOpdIdx) const {
396 return false;
397 }
398
400 int OpdIdx) const {
401 return OpdIdx == -1;
402 }
403
405 int RetIdx) const {
406 return RetIdx == 0;
407 }
408
410 const APInt &DemandedElts,
411 bool Insert, bool Extract,
413 ArrayRef<Value *> VL = {}) const {
414 return 0;
415 }
416
417 InstructionCost
421 return 0;
422 }
423
424 bool supportsEfficientVectorElementLoadStore() const { return false; }
425
426 bool supportsTailCalls() const { return true; }
427
428 bool enableAggressiveInterleaving(bool LoopHasReductions) const {
429 return false;
430 }
431
433 bool IsZeroCmp) const {
434 return {};
435 }
436
437 bool enableSelectOptimize() const { return true; }
438
440 // A select with two constant operands will usually be better left as a
441 // select.
442 using namespace llvm::PatternMatch;
444 return false;
445 // If the select is a logical-and/logical-or then it is better treated as a
446 // and/or by the backend.
447 return isa<SelectInst>(I) &&
450 }
451
452 bool enableInterleavedAccessVectorization() const { return false; }
453
454 bool enableMaskedInterleavedAccessVectorization() const { return false; }
455
456 bool isFPVectorizationPotentiallyUnsafe() const { return false; }
457
459 unsigned AddressSpace, Align Alignment,
460 unsigned *Fast) const {
461 return false;
462 }
463
464 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
465 return TTI::PSK_Software;
466 }
467
468 bool haveFastSqrt(Type *Ty) const { return false; }
469
470 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; }
471
472 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
473
476 }
477
478 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
479 const APInt &Imm, Type *Ty) const {
480 return 0;
481 }
482
485 return TTI::TCC_Basic;
486 }
487
488 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
489 const APInt &Imm, Type *Ty,
491 Instruction *Inst = nullptr) const {
492 return TTI::TCC_Free;
493 }
494
496 const APInt &Imm, Type *Ty,
498 return TTI::TCC_Free;
499 }
500
502 const Function &Fn) const {
503 return false;
504 }
505
506 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
507 bool hasConditionalLoadStoreForType(Type *Ty) const { return false; }
508
509 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
510 return Vector ? 1 : 0;
511 };
512
513 const char *getRegisterClassName(unsigned ClassID) const {
514 switch (ClassID) {
515 default:
516 return "Generic::Unknown Register Class";
517 case 0:
518 return "Generic::ScalarRC";
519 case 1:
520 return "Generic::VectorRC";
521 }
522 }
523
525 return TypeSize::getFixed(32);
526 }
527
528 unsigned getMinVectorRegisterBitWidth() const { return 128; }
529
530 std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
531 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
532 bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
533
534 bool
536 return false;
537 }
538
539 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
540 return ElementCount::get(0, IsScalable);
541 }
542
543 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
544 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; }
545
547 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
548 AllowPromotionWithoutCommonHeader = false;
549 return false;
550 }
551
552 unsigned getCacheLineSize() const { return 0; }
553 std::optional<unsigned>
555 switch (Level) {
557 [[fallthrough]];
559 return std::nullopt;
560 }
561 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
562 }
563
564 std::optional<unsigned>
566 switch (Level) {
568 [[fallthrough]];
570 return std::nullopt;
571 }
572
573 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
574 }
575
576 std::optional<unsigned> getMinPageSize() const { return {}; }
577
578 unsigned getPrefetchDistance() const { return 0; }
579 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
580 unsigned NumStridedMemAccesses,
581 unsigned NumPrefetches, bool HasCall) const {
582 return 1;
583 }
584 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
585 bool enableWritePrefetching() const { return false; }
586 bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
587
589 getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB,
590 Type *AccumType, ElementCount VF,
593 std::optional<unsigned> BinOp = std::nullopt) const {
595 }
596
597 unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
598
600 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
603 const Instruction *CxtI = nullptr) const {
604 // Widenable conditions will eventually lower into constants, so some
605 // operations with them will be trivially optimized away.
606 auto IsWidenableCondition = [](const Value *V) {
607 if (auto *II = dyn_cast<IntrinsicInst>(V))
608 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
609 return true;
610 return false;
611 };
612 // FIXME: A number of transformation tests seem to require these values
613 // which seems a little odd for how arbitary there are.
614 switch (Opcode) {
615 default:
616 break;
617 case Instruction::FDiv:
618 case Instruction::FRem:
619 case Instruction::SDiv:
620 case Instruction::SRem:
621 case Instruction::UDiv:
622 case Instruction::URem:
623 // FIXME: Unlikely to be true for CodeSize.
624 return TTI::TCC_Expensive;
625 case Instruction::And:
626 case Instruction::Or:
627 if (any_of(Args, IsWidenableCondition))
628 return TTI::TCC_Free;
629 break;
630 }
631
632 // Assume a 3cy latency for fp arithmetic ops.
634 if (Ty->getScalarType()->isFloatingPointTy())
635 return 3;
636
637 return 1;
638 }
639
641 unsigned Opcode1,
642 const SmallBitVector &OpcodeMask,
645 }
646
648 ArrayRef<int> Mask,
650 VectorType *SubTp,
651 ArrayRef<const Value *> Args = {},
652 const Instruction *CxtI = nullptr) const {
653 return 1;
654 }
655
656 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
659 const Instruction *I) const {
660 switch (Opcode) {
661 default:
662 break;
663 case Instruction::IntToPtr: {
664 unsigned SrcSize = Src->getScalarSizeInBits();
665 if (DL.isLegalInteger(SrcSize) &&
666 SrcSize <= DL.getPointerTypeSizeInBits(Dst))
667 return 0;
668 break;
669 }
670 case Instruction::PtrToInt: {
671 unsigned DstSize = Dst->getScalarSizeInBits();
672 if (DL.isLegalInteger(DstSize) &&
673 DstSize >= DL.getPointerTypeSizeInBits(Src))
674 return 0;
675 break;
676 }
677 case Instruction::BitCast:
678 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
679 // Identity and pointer-to-pointer casts are free.
680 return 0;
681 break;
682 case Instruction::Trunc: {
683 // trunc to a native type is free (assuming the target has compare and
684 // shift-right of the same width).
685 TypeSize DstSize = DL.getTypeSizeInBits(Dst);
686 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
687 return 0;
688 break;
689 }
690 }
691 return 1;
692 }
693
695 VectorType *VecTy,
696 unsigned Index) const {
697 return 1;
698 }
699
701 const Instruction *I = nullptr) const {
702 // A phi would be free, unless we're costing the throughput because it
703 // will require a register.
704 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
705 return 0;
706 return 1;
707 }
708
709 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
710 CmpInst::Predicate VecPred,
712 TTI::OperandValueInfo Op1Info,
713 TTI::OperandValueInfo Op2Info,
714 const Instruction *I) const {
715 return 1;
716 }
717
720 unsigned Index, Value *Op0,
721 Value *Op1) const {
722 return 1;
723 }
724
725 /// \param ScalarUserAndIdx encodes the information about extracts from a
726 /// vector with 'Scalar' being the value being extracted,'User' being the user
727 /// of the extract(nullptr if user is not known before vectorization) and
728 /// 'Idx' being the extract lane.
730 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
731 Value *Scalar,
732 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
733 return 1;
734 }
735
738 unsigned Index) const {
739 return 1;
740 }
741
742 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
743 const APInt &DemandedDstElts,
745 return 1;
746 }
747
748 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
749 unsigned AddressSpace,
752 const Instruction *I) const {
753 return 1;
754 }
755
756 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
757 unsigned AddressSpace,
759 const Instruction *I) const {
760 return 1;
761 }
762
764 Align Alignment, unsigned AddressSpace,
766 return 1;
767 }
768
770 const Value *Ptr, bool VariableMask,
771 Align Alignment,
773 const Instruction *I = nullptr) const {
774 return 1;
775 }
776
778 unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
779 TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const {
780 return 1;
781 }
782
784 const Value *Ptr, bool VariableMask,
785 Align Alignment,
787 const Instruction *I = nullptr) const {
789 }
790
792 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
793 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
794 bool UseMaskForCond, bool UseMaskForGaps) const {
795 return 1;
796 }
797
800 switch (ICA.getID()) {
801 default:
802 break;
803 case Intrinsic::experimental_vector_histogram_add:
804 // For now, we want explicit support from the target for histograms.
806 case Intrinsic::allow_runtime_check:
807 case Intrinsic::allow_ubsan_check:
808 case Intrinsic::annotation:
809 case Intrinsic::assume:
810 case Intrinsic::sideeffect:
811 case Intrinsic::pseudoprobe:
812 case Intrinsic::arithmetic_fence:
813 case Intrinsic::dbg_assign:
814 case Intrinsic::dbg_declare:
815 case Intrinsic::dbg_value:
816 case Intrinsic::dbg_label:
817 case Intrinsic::invariant_start:
818 case Intrinsic::invariant_end:
819 case Intrinsic::launder_invariant_group:
820 case Intrinsic::strip_invariant_group:
821 case Intrinsic::is_constant:
822 case Intrinsic::lifetime_start:
823 case Intrinsic::lifetime_end:
824 case Intrinsic::experimental_noalias_scope_decl:
825 case Intrinsic::objectsize:
826 case Intrinsic::ptr_annotation:
827 case Intrinsic::var_annotation:
828 case Intrinsic::experimental_gc_result:
829 case Intrinsic::experimental_gc_relocate:
830 case Intrinsic::coro_alloc:
831 case Intrinsic::coro_begin:
832 case Intrinsic::coro_begin_custom_abi:
833 case Intrinsic::coro_free:
834 case Intrinsic::coro_end:
835 case Intrinsic::coro_frame:
836 case Intrinsic::coro_size:
837 case Intrinsic::coro_align:
838 case Intrinsic::coro_suspend:
839 case Intrinsic::coro_subfn_addr:
840 case Intrinsic::threadlocal_address:
841 case Intrinsic::experimental_widenable_condition:
842 case Intrinsic::ssa_copy:
843 // These intrinsics don't actually represent code after lowering.
844 return 0;
845 }
846 return 1;
847 }
848
852 return 1;
853 }
854
855 // Assume that we have a register of the right size for the type.
856 unsigned getNumberOfParts(Type *Tp) const { return 1; }
857
859 const SCEV *) const {
860 return 0;
861 }
862
864 std::optional<FastMathFlags> FMF,
865 TTI::TargetCostKind) const {
866 return 1;
867 }
868
871 TTI::TargetCostKind) const {
872 return 1;
873 }
874
875 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
876 Type *ResTy, VectorType *Ty,
877 FastMathFlags FMF,
879 return 1;
880 }
881
883 VectorType *Ty,
885 return 1;
886 }
887
889 return 0;
890 }
891
893 return false;
894 }
895
897 // Note for overrides: You must ensure for all element unordered-atomic
898 // memory intrinsics that all power-of-2 element sizes up to, and
899 // including, the return value of this method have a corresponding
900 // runtime lib call. These runtime lib call definitions can be found
901 // in RuntimeLibcalls.h
902 return 0;
903 }
904
906 Type *ExpectedType) const {
907 return nullptr;
908 }
909
910 Type *
912 unsigned SrcAddrSpace, unsigned DestAddrSpace,
913 Align SrcAlign, Align DestAlign,
914 std::optional<uint32_t> AtomicElementSize) const {
915 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
916 : Type::getInt8Ty(Context);
917 }
918
920 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
921 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
922 Align SrcAlign, Align DestAlign,
923 std::optional<uint32_t> AtomicCpySize) const {
924 unsigned OpSizeInBytes = AtomicCpySize.value_or(1);
925 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
926 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
927 OpsOut.push_back(OpType);
928 }
929
930 bool areInlineCompatible(const Function *Caller,
931 const Function *Callee) const {
932 return (Caller->getFnAttribute("target-cpu") ==
933 Callee->getFnAttribute("target-cpu")) &&
934 (Caller->getFnAttribute("target-features") ==
935 Callee->getFnAttribute("target-features"));
936 }
937
938 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
939 unsigned DefaultCallPenalty) const {
940 return DefaultCallPenalty;
941 }
942
943 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
944 const ArrayRef<Type *> &Types) const {
945 return (Caller->getFnAttribute("target-cpu") ==
946 Callee->getFnAttribute("target-cpu")) &&
947 (Caller->getFnAttribute("target-features") ==
948 Callee->getFnAttribute("target-features"));
949 }
950
952 const DataLayout &DL) const {
953 return false;
954 }
955
957 const DataLayout &DL) const {
958 return false;
959 }
960
961 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
962
963 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
964
965 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
966
967 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
968 unsigned AddrSpace) const {
969 return true;
970 }
971
972 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
973 unsigned AddrSpace) const {
974 return true;
975 }
976
978 ElementCount VF) const {
979 return true;
980 }
981
982 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
983
984 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
985 unsigned ChainSizeInBytes,
986 VectorType *VecTy) const {
987 return VF;
988 }
989
990 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
991 unsigned ChainSizeInBytes,
992 VectorType *VecTy) const {
993 return VF;
994 }
995
996 bool preferFixedOverScalableIfEqualCost() const { return false; }
997
998 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
999 TTI::ReductionFlags Flags) const {
1000 return false;
1001 }
1002
1003 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1004 TTI::ReductionFlags Flags) const {
1005 return false;
1006 }
1007
1009 return true;
1010 }
1011
1012 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
1013
1017 }
1018
1019 unsigned getGISelRematGlobalCost() const { return 1; }
1020
1021 unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
1022
1023 bool supportsScalableVectors() const { return false; }
1024
1025 bool enableScalableVectorization() const { return false; }
1026
1027 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1028 Align Alignment) const {
1029 return false;
1030 }
1031
1033 SmallVectorImpl<Use *> &Ops) const {
1034 return false;
1035 }
1036
1037 bool isVectorShiftByScalarCheap(Type *Ty) const { return false; }
1038
1042 /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
1043 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
1044 }
1045
1046 bool hasArmWideBranch(bool) const { return false; }
1047
1048 uint64_t getFeatureMask(const Function &F) const { return 0; }
1049
1050 bool isMultiversionedFunction(const Function &F) const { return false; }
1051
1052 unsigned getMaxNumArgs() const { return UINT_MAX; }
1053
1054 unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const {
1055 return 0;
1056 }
1057
1059 const Function &F,
1060 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
1061
1062protected:
1063 // Obtain the minimum required size to hold the value (without the sign)
1064 // In case of a vector it returns the min required size for one element.
1065 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
1066 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
1067 const auto *VectorValue = cast<Constant>(Val);
1068
1069 // In case of a vector need to pick the max between the min
1070 // required size for each element
1071 auto *VT = cast<FixedVectorType>(Val->getType());
1072
1073 // Assume unsigned elements
1074 isSigned = false;
1075
1076 // The max required size is the size of the vector element type
1077 unsigned MaxRequiredSize =
1078 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
1079
1080 unsigned MinRequiredSize = 0;
1081 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
1082 if (auto *IntElement =
1083 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
1084 bool signedElement = IntElement->getValue().isNegative();
1085 // Get the element min required size.
1086 unsigned ElementMinRequiredSize =
1087 IntElement->getValue().getSignificantBits() - 1;
1088 // In case one element is signed then all the vector is signed.
1089 isSigned |= signedElement;
1090 // Save the max required bit size between all the elements.
1091 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
1092 } else {
1093 // not an int constant element
1094 return MaxRequiredSize;
1095 }
1096 }
1097 return MinRequiredSize;
1098 }
1099
1100 if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
1101 isSigned = CI->getValue().isNegative();
1102 return CI->getValue().getSignificantBits() - 1;
1103 }
1104
1105 if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
1106 isSigned = true;
1107 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1108 }
1109
1110 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1111 isSigned = false;
1112 return Cast->getSrcTy()->getScalarSizeInBits();
1113 }
1114
1115 isSigned = false;
1116 return Val->getType()->getScalarSizeInBits();
1117 }
1118
1119 bool isStridedAccess(const SCEV *Ptr) const {
1120 return Ptr && isa<SCEVAddRecExpr>(Ptr);
1121 }
1122
1124 const SCEV *Ptr) const {
1125 if (!isStridedAccess(Ptr))
1126 return nullptr;
1127 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
1128 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
1129 }
1130
1132 int64_t MergeDistance) const {
1133 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
1134 if (!Step)
1135 return false;
1136 APInt StrideVal = Step->getAPInt();
1137 if (StrideVal.getBitWidth() > 64)
1138 return false;
1139 // FIXME: Need to take absolute value for negative stride case.
1140 return StrideVal.getSExtValue() < MergeDistance;
1141 }
1142};
1143
1144/// CRTP base class for use as a mix-in that aids implementing
1145/// a TargetTransformInfo-compatible class.
1146template <typename T>
1148private:
1150
1151protected:
1153
1154public:
1156
1160 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
1161 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
1162 bool HasBaseReg = (BaseGV == nullptr);
1163
1164 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
1165 APInt BaseOffset(PtrSizeBits, 0);
1166 int64_t Scale = 0;
1167
1168 auto GTI = gep_type_begin(PointeeType, Operands);
1169 Type *TargetType = nullptr;
1170
1171 // Handle the case where the GEP instruction has a single operand,
1172 // the basis, therefore TargetType is a nullptr.
1173 if (Operands.empty())
1174 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
1175
1176 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
1177 TargetType = GTI.getIndexedType();
1178 // We assume that the cost of Scalar GEP with constant index and the
1179 // cost of Vector GEP with splat constant index are the same.
1180 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
1181 if (!ConstIdx)
1182 if (auto Splat = getSplatValue(*I))
1183 ConstIdx = dyn_cast<ConstantInt>(Splat);
1184 if (StructType *STy = GTI.getStructTypeOrNull()) {
1185 // For structures the index is always splat or scalar constant
1186 assert(ConstIdx && "Unexpected GEP index");
1187 uint64_t Field = ConstIdx->getZExtValue();
1188 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
1189 } else {
1190 // If this operand is a scalable type, bail out early.
1191 // TODO: Make isLegalAddressingMode TypeSize aware.
1192 if (TargetType->isScalableTy())
1193 return TTI::TCC_Basic;
1194 int64_t ElementSize =
1195 GTI.getSequentialElementStride(DL).getFixedValue();
1196 if (ConstIdx) {
1197 BaseOffset +=
1198 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
1199 } else {
1200 // Needs scale register.
1201 if (Scale != 0)
1202 // No addressing mode takes two scale registers.
1203 return TTI::TCC_Basic;
1204 Scale = ElementSize;
1205 }
1206 }
1207 }
1208
1209 // If we haven't been provided a hint, use the target type for now.
1210 //
1211 // TODO: Take a look at potentially removing this: This is *slightly* wrong
1212 // as it's possible to have a GEP with a foldable target type but a memory
1213 // access that isn't foldable. For example, this load isn't foldable on
1214 // RISC-V:
1215 //
1216 // %p = getelementptr i32, ptr %base, i32 42
1217 // %x = load <2 x i32>, ptr %p
1218 if (!AccessType)
1219 AccessType = TargetType;
1220
1221 // If the final address of the GEP is a legal addressing mode for the given
1222 // access type, then we can fold it into its users.
1223 if (static_cast<T *>(this)->isLegalAddressingMode(
1224 AccessType, const_cast<GlobalValue *>(BaseGV),
1225 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
1226 Ptr->getType()->getPointerAddressSpace()))
1227 return TTI::TCC_Free;
1228
1229 // TODO: Instead of returning TCC_Basic here, we should use
1230 // getArithmeticInstrCost. Or better yet, provide a hook to let the target
1231 // model it.
1232 return TTI::TCC_Basic;
1233 }
1234
1236 const Value *Base,
1238 Type *AccessTy,
1241 // In the basic model we take into account GEP instructions only
1242 // (although here can come alloca instruction, a value, constants and/or
1243 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
1244 // pointer). Typically, if Base is a not a GEP-instruction and all the
1245 // pointers are relative to the same base address, all the rest are
1246 // either GEP instructions, PHIs, bitcasts or constants. When we have same
1247 // base, we just calculate cost of each non-Base GEP as an ADD operation if
1248 // any their index is a non-const.
1249 // If no known dependecies between the pointers cost is calculated as a sum
1250 // of costs of GEP instructions.
1251 for (const Value *V : Ptrs) {
1252 const auto *GEP = dyn_cast<GetElementPtrInst>(V);
1253 if (!GEP)
1254 continue;
1255 if (Info.isSameBase() && V != Base) {
1256 if (GEP->hasAllConstantIndices())
1257 continue;
1258 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1259 Instruction::Add, GEP->getType(), CostKind,
1261 {});
1262 } else {
1263 SmallVector<const Value *> Indices(GEP->indices());
1264 Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
1265 GEP->getPointerOperand(),
1266 Indices, AccessTy, CostKind);
1267 }
1268 }
1269 return Cost;
1270 }
1271
1275 using namespace llvm::PatternMatch;
1276
1277 auto *TargetTTI = static_cast<T *>(this);
1278 // Handle non-intrinsic calls, invokes, and callbr.
1279 // FIXME: Unlikely to be true for anything but CodeSize.
1280 auto *CB = dyn_cast<CallBase>(U);
1281 if (CB && !isa<IntrinsicInst>(U)) {
1282 if (const Function *F = CB->getCalledFunction()) {
1283 if (!TargetTTI->isLoweredToCall(F))
1284 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1285
1286 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1287 }
1288 // For indirect or other calls, scale cost by number of arguments.
1289 return TTI::TCC_Basic * (CB->arg_size() + 1);
1290 }
1291
1292 Type *Ty = U->getType();
1293 unsigned Opcode = Operator::getOpcode(U);
1294 auto *I = dyn_cast<Instruction>(U);
1295 switch (Opcode) {
1296 default:
1297 break;
1298 case Instruction::Call: {
1299 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1300 auto *Intrinsic = cast<IntrinsicInst>(U);
1301 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1302 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1303 }
1304 case Instruction::Br:
1305 case Instruction::Ret:
1306 case Instruction::PHI:
1307 case Instruction::Switch:
1308 return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1309 case Instruction::ExtractValue:
1310 case Instruction::Freeze:
1311 return TTI::TCC_Free;
1312 case Instruction::Alloca:
1313 if (cast<AllocaInst>(U)->isStaticAlloca())
1314 return TTI::TCC_Free;
1315 break;
1316 case Instruction::GetElementPtr: {
1317 const auto *GEP = cast<GEPOperator>(U);
1318 Type *AccessType = nullptr;
1319 // For now, only provide the AccessType in the simple case where the GEP
1320 // only has one user.
1321 if (GEP->hasOneUser() && I)
1322 AccessType = I->user_back()->getAccessType();
1323
1324 return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1325 Operands.front(), Operands.drop_front(),
1326 AccessType, CostKind);
1327 }
1328 case Instruction::Add:
1329 case Instruction::FAdd:
1330 case Instruction::Sub:
1331 case Instruction::FSub:
1332 case Instruction::Mul:
1333 case Instruction::FMul:
1334 case Instruction::UDiv:
1335 case Instruction::SDiv:
1336 case Instruction::FDiv:
1337 case Instruction::URem:
1338 case Instruction::SRem:
1339 case Instruction::FRem:
1340 case Instruction::Shl:
1341 case Instruction::LShr:
1342 case Instruction::AShr:
1343 case Instruction::And:
1344 case Instruction::Or:
1345 case Instruction::Xor:
1346 case Instruction::FNeg: {
1348 TTI::OperandValueInfo Op2Info;
1349 if (Opcode != Instruction::FNeg)
1350 Op2Info = TTI::getOperandInfo(Operands[1]);
1351 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1352 Op2Info, Operands, I);
1353 }
1354 case Instruction::IntToPtr:
1355 case Instruction::PtrToInt:
1356 case Instruction::SIToFP:
1357 case Instruction::UIToFP:
1358 case Instruction::FPToUI:
1359 case Instruction::FPToSI:
1360 case Instruction::Trunc:
1361 case Instruction::FPTrunc:
1362 case Instruction::BitCast:
1363 case Instruction::FPExt:
1364 case Instruction::SExt:
1365 case Instruction::ZExt:
1366 case Instruction::AddrSpaceCast: {
1367 Type *OpTy = Operands[0]->getType();
1368 return TargetTTI->getCastInstrCost(
1369 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1370 }
1371 case Instruction::Store: {
1372 auto *SI = cast<StoreInst>(U);
1373 Type *ValTy = Operands[0]->getType();
1375 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1376 SI->getPointerAddressSpace(), CostKind,
1377 OpInfo, I);
1378 }
1379 case Instruction::Load: {
1380 // FIXME: Arbitary cost which could come from the backend.
1382 return 4;
1383 auto *LI = cast<LoadInst>(U);
1384 Type *LoadType = U->getType();
1385 // If there is a non-register sized type, the cost estimation may expand
1386 // it to be several instructions to load into multiple registers on the
1387 // target. But, if the only use of the load is a trunc instruction to a
1388 // register sized type, the instruction selector can combine these
1389 // instructions to be a single load. So, in this case, we use the
1390 // destination type of the trunc instruction rather than the load to
1391 // accurately estimate the cost of this load instruction.
1392 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1393 !LoadType->isVectorTy()) {
1394 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1395 LoadType = TI->getDestTy();
1396 }
1397 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1399 {TTI::OK_AnyValue, TTI::OP_None}, I);
1400 }
1401 case Instruction::Select: {
1402 const Value *Op0, *Op1;
1403 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
1404 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
1405 // select x, y, false --> x & y
1406 // select x, true, y --> x | y
1407 const auto Op1Info = TTI::getOperandInfo(Op0);
1408 const auto Op2Info = TTI::getOperandInfo(Op1);
1409 assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1410 Op1->getType()->getScalarSizeInBits() == 1);
1411
1413 return TargetTTI->getArithmeticInstrCost(
1414 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1415 CostKind, Op1Info, Op2Info, Operands, I);
1416 }
1417 const auto Op1Info = TTI::getOperandInfo(Operands[1]);
1418 const auto Op2Info = TTI::getOperandInfo(Operands[2]);
1419 Type *CondTy = Operands[0]->getType();
1420 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1422 CostKind, Op1Info, Op2Info, I);
1423 }
1424 case Instruction::ICmp:
1425 case Instruction::FCmp: {
1426 const auto Op1Info = TTI::getOperandInfo(Operands[0]);
1427 const auto Op2Info = TTI::getOperandInfo(Operands[1]);
1428 Type *ValTy = Operands[0]->getType();
1429 // TODO: Also handle ICmp/FCmp constant expressions.
1430 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1431 I ? cast<CmpInst>(I)->getPredicate()
1433 CostKind, Op1Info, Op2Info, I);
1434 }
1435 case Instruction::InsertElement: {
1436 auto *IE = dyn_cast<InsertElementInst>(U);
1437 if (!IE)
1438 return TTI::TCC_Basic; // FIXME
1439 unsigned Idx = -1;
1440 if (auto *CI = dyn_cast<ConstantInt>(Operands[2]))
1441 if (CI->getValue().getActiveBits() <= 32)
1442 Idx = CI->getZExtValue();
1443 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1444 }
1445 case Instruction::ShuffleVector: {
1446 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1447 if (!Shuffle)
1448 return TTI::TCC_Basic; // FIXME
1449
1450 auto *VecTy = cast<VectorType>(U->getType());
1451 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
1452 ArrayRef<int> Mask = Shuffle->getShuffleMask();
1453 int NumSubElts, SubIndex;
1454
1455 // TODO: move more of this inside improveShuffleKindFromMask.
1456 if (Shuffle->changesLength()) {
1457 // Treat a 'subvector widening' as a free shuffle.
1458 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1459 return 0;
1460
1461 if (Shuffle->isExtractSubvectorMask(SubIndex))
1462 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1463 Mask, CostKind, SubIndex, VecTy,
1464 Operands, Shuffle);
1465
1466 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1467 return TargetTTI->getShuffleCost(
1468 TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1469 FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1470 Operands, Shuffle);
1471
1472 int ReplicationFactor, VF;
1473 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1474 APInt DemandedDstElts = APInt::getZero(Mask.size());
1475 for (auto I : enumerate(Mask)) {
1476 if (I.value() != PoisonMaskElem)
1477 DemandedDstElts.setBit(I.index());
1478 }
1479 return TargetTTI->getReplicationShuffleCost(
1480 VecSrcTy->getElementType(), ReplicationFactor, VF,
1481 DemandedDstElts, CostKind);
1482 }
1483
1484 bool IsUnary = isa<UndefValue>(Operands[1]);
1485 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1486 SmallVector<int, 16> AdjustMask(Mask);
1487
1488 // Widening shuffle - widening the source(s) to the new length
1489 // (treated as free - see above), and then perform the adjusted
1490 // shuffle at that width.
1491 if (Shuffle->increasesLength()) {
1492 for (int &M : AdjustMask)
1493 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1494
1495 return TargetTTI->getShuffleCost(
1497 AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1498 }
1499
1500 // Narrowing shuffle - perform shuffle at original wider width and
1501 // then extract the lower elements.
1502 AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
1503
1504 InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
1506 VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1507
1508 SmallVector<int, 16> ExtractMask(Mask.size());
1509 std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
1510 return ShuffleCost + TargetTTI->getShuffleCost(
1511 TTI::SK_ExtractSubvector, VecSrcTy,
1512 ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
1513 }
1514
1515 if (Shuffle->isIdentity())
1516 return 0;
1517
1518 if (Shuffle->isReverse())
1519 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
1520 0, nullptr, Operands, Shuffle);
1521
1522 if (Shuffle->isSelect())
1523 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
1524 0, nullptr, Operands, Shuffle);
1525
1526 if (Shuffle->isTranspose())
1527 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
1528 CostKind, 0, nullptr, Operands,
1529 Shuffle);
1530
1531 if (Shuffle->isZeroEltSplat())
1532 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
1533 CostKind, 0, nullptr, Operands,
1534 Shuffle);
1535
1536 if (Shuffle->isSingleSource())
1537 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
1538 CostKind, 0, nullptr, Operands,
1539 Shuffle);
1540
1541 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1542 return TargetTTI->getShuffleCost(
1543 TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1544 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
1545 Shuffle);
1546
1547 if (Shuffle->isSplice(SubIndex))
1548 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
1549 SubIndex, nullptr, Operands, Shuffle);
1550
1551 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
1552 CostKind, 0, nullptr, Operands, Shuffle);
1553 }
1554 case Instruction::ExtractElement: {
1555 auto *EEI = dyn_cast<ExtractElementInst>(U);
1556 if (!EEI)
1557 return TTI::TCC_Basic; // FIXME
1558 unsigned Idx = -1;
1559 if (auto *CI = dyn_cast<ConstantInt>(Operands[1]))
1560 if (CI->getValue().getActiveBits() <= 32)
1561 Idx = CI->getZExtValue();
1562 Type *DstTy = Operands[0]->getType();
1563 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1564 }
1565 }
1566
1567 // By default, just classify everything as 'basic' or -1 to represent that
1568 // don't know the throughput cost.
1570 }
1571
1573 auto *TargetTTI = static_cast<T *>(this);
1574 SmallVector<const Value *, 4> Ops(I->operand_values());
1575 InstructionCost Cost = TargetTTI->getInstructionCost(
1578 }
1579
1580 bool supportsTailCallFor(const CallBase *CB) const {
1581 return static_cast<const T *>(this)->supportsTailCalls();
1582 }
1583};
1584} // namespace llvm
1585
1586#endif
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint32_t Index
uint64_t Size
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:78
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1015
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
an instruction to allocate memory on the stack
Definition: Instructions.h:63
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Class to represent array types.
Definition: DerivedTypes.h:395
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:219
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:709
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:743
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:617
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:421
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:317
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
The core instruction combiner logic.
Definition: InstCombiner.h:48
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:42
The optimization diagnostic interface.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:596
Class to represent struct types.
Definition: DerivedTypes.h:218
Multiway switch.
Provides information about what library functions are available for the current target.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
const DataLayout & getDataLayout() const
bool isLegalToVectorizeStore(StoreInst *SI) const
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
bool shouldTreatInstructionLikeSelect(const Instruction *I)
bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
std::optional< unsigned > getVScaleForTuning() const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
bool isLegalICmpImmediate(int64_t Imm) const
bool hasConditionalLoadStoreForType(Type *Ty) const
unsigned getRegUsageForType(Type *Ty) const
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
void getPeelingPreferences(Loop *, ScalarEvolution &, TTI::PeelingPreferences &) const
bool isAlwaysUniform(const Value *V) const
bool isProfitableToHoist(Instruction *I) const
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
bool isTruncateFree(Type *Ty1, Type *Ty2) const
bool isStridedAccess(const SCEV *Ptr) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
InstructionCost getBranchMispredictPenalty() const
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned, VectorType *, std::optional< FastMathFlags > FMF, TTI::TargetCostKind) const
InstructionCost getFPOpCost(Type *Ty) const
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
std::optional< unsigned > getMaxVScale() const
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
bool isProfitableLSRChainElement(Instruction *I) const
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
bool isNoopAddrSpaceCast(unsigned, unsigned) const
unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg)
TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
void getUnrollingPreferences(Loop *, ScalarEvolution &, TTI::UnrollingPreferences &, OptimizationRemarkEmitter *) const
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace)
unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
unsigned getAssumedAddrSpace(const Value *V) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar, ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx) const
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
bool isLegalNTStore(Type *DataType, Align Alignment) const
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
unsigned adjustInliningThreshold(const CallBase *CB) const
BranchProbability getPredictableBranchThreshold() const
std::optional< unsigned > getMinPageSize() const
uint64_t getFeatureMask(const Function &F) const
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const
const SCEVConstant * getConstantStrideStep(ScalarEvolution *SE, const SCEV *Ptr) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, const DataLayout &DL) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const
unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
bool isSourceOfDivergence(const Value *V) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
unsigned getMaxInterleaveFactor(ElementCount VF) const
TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp=std::nullopt) const
unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const
bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, const DataLayout &DL) const
bool hasDivRemOp(Type *DataType, bool IsSigned) const
InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, const SCEV *) const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const
bool isLoweredToCall(const Function *F) const
bool hasBranchDivergence(const Function *F=nullptr) const
TargetTransformInfoImplBase(const DataLayout &DL)
bool isMultiversionedFunction(const Function &F) const
bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
const char * getRegisterClassName(unsigned ClassID) const
bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, FastMathFlags, TTI::TargetCostKind) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1) const
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo, const Instruction *I) const
bool useColdCCForColdCall(Function &F) const
bool shouldExpandReduction(const IntrinsicInst *II) const
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment) const
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={}) const
bool isLegalAddScalableImmediate(int64_t Imm) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)=default
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, const Instruction *I) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
bool supportsTailCallFor(const CallBase *CB) const
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
MemIndexedMode
The type of load/store indexing.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
CacheLevel
The possible cache levels.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:480
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
constexpr int PoisonMaskElem
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
gep_type_iterator gep_type_begin(const User *GEP)
InstructionCost Cost
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Flags describing the kind of vector reduction.
Parameters that control the generic loop unrolling transformation.