LLVM 21.0.0git
BasicTTIImpl.h
Go to the documentation of this file.
1//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides a helper that implements much of the TTI interface in
11/// terms of the target-independent code generator and TargetLowering
12/// interfaces.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17#define LLVM_CODEGEN_BASICTTIIMPL_H
18
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/BitVector.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
41#include "llvm/IR/Intrinsics.h"
42#include "llvm/IR/Operator.h"
43#include "llvm/IR/Type.h"
44#include "llvm/IR/Value.h"
52#include <algorithm>
53#include <cassert>
54#include <cstdint>
55#include <limits>
56#include <optional>
57#include <utility>
58
59namespace llvm {
60
61class Function;
62class GlobalValue;
63class LLVMContext;
64class ScalarEvolution;
65class SCEV;
66class TargetMachine;
67
68extern cl::opt<unsigned> PartialUnrollingThreshold;
69
70/// Base class which can be used to help build a TTI implementation.
71///
72/// This class provides as much implementation of the TTI interface as is
73/// possible using the target independent parts of the code generator.
74///
75/// In order to subclass it, your class must implement a getST() method to
76/// return the subtarget, and a getTLI() method to return the target lowering.
77/// We need these methods implemented in the derived class so that this class
78/// doesn't have to duplicate storage for them.
79template <typename T>
81private:
84
85 /// Helper function to access this as a T.
86 T *thisT() { return static_cast<T *>(this); }
87
88 /// Estimate a cost of Broadcast as an extract and sequence of insert
89 /// operations.
90 InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy,
93 // Broadcast cost is equal to the cost of extracting the zero'th element
94 // plus the cost of inserting it into every element of the result vector.
95 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
96 CostKind, 0, nullptr, nullptr);
97
98 for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
99 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
100 CostKind, i, nullptr, nullptr);
101 }
102 return Cost;
103 }
104
105 /// Estimate a cost of shuffle as a sequence of extract and insert
106 /// operations.
107 InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy,
110 // Shuffle cost is equal to the cost of extracting element from its argument
111 // plus the cost of inserting them onto the result vector.
112
113 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
114 // index 0 of first vector, index 1 of second vector,index 2 of first
115 // vector and finally index 3 of second vector and insert them at index
116 // <0,1,2,3> of result vector.
117 for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
118 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
119 CostKind, i, nullptr, nullptr);
120 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
121 CostKind, i, nullptr, nullptr);
122 }
123 return Cost;
124 }
125
126 /// Estimate a cost of subvector extraction as a sequence of extract and
127 /// insert operations.
128 InstructionCost getExtractSubvectorOverhead(VectorType *VTy,
130 int Index,
131 FixedVectorType *SubVTy) {
132 assert(VTy && SubVTy &&
133 "Can only extract subvectors from vectors");
134 int NumSubElts = SubVTy->getNumElements();
135 assert((!isa<FixedVectorType>(VTy) ||
136 (Index + NumSubElts) <=
137 (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
138 "SK_ExtractSubvector index out of range");
139
141 // Subvector extraction cost is equal to the cost of extracting element from
142 // the source type plus the cost of inserting them into the result vector
143 // type.
144 for (int i = 0; i != NumSubElts; ++i) {
145 Cost +=
146 thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
147 CostKind, i + Index, nullptr, nullptr);
148 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy,
149 CostKind, i, nullptr, nullptr);
150 }
151 return Cost;
152 }
153
154 /// Estimate a cost of subvector insertion as a sequence of extract and
155 /// insert operations.
156 InstructionCost getInsertSubvectorOverhead(VectorType *VTy,
158 int Index,
159 FixedVectorType *SubVTy) {
160 assert(VTy && SubVTy &&
161 "Can only insert subvectors into vectors");
162 int NumSubElts = SubVTy->getNumElements();
163 assert((!isa<FixedVectorType>(VTy) ||
164 (Index + NumSubElts) <=
165 (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
166 "SK_InsertSubvector index out of range");
167
169 // Subvector insertion cost is equal to the cost of extracting element from
170 // the source type plus the cost of inserting them into the result vector
171 // type.
172 for (int i = 0; i != NumSubElts; ++i) {
173 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy,
174 CostKind, i, nullptr, nullptr);
175 Cost +=
176 thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, CostKind,
177 i + Index, nullptr, nullptr);
178 }
179 return Cost;
180 }
181
182 /// Local query method delegates up to T which *must* implement this!
183 const TargetSubtargetInfo *getST() const {
184 return static_cast<const T *>(this)->getST();
185 }
186
187 /// Local query method delegates up to T which *must* implement this!
188 const TargetLoweringBase *getTLI() const {
189 return static_cast<const T *>(this)->getTLI();
190 }
191
192 static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
193 switch (M) {
195 return ISD::UNINDEXED;
196 case TTI::MIM_PreInc:
197 return ISD::PRE_INC;
198 case TTI::MIM_PreDec:
199 return ISD::PRE_DEC;
200 case TTI::MIM_PostInc:
201 return ISD::POST_INC;
202 case TTI::MIM_PostDec:
203 return ISD::POST_DEC;
204 }
205 llvm_unreachable("Unexpected MemIndexedMode");
206 }
207
208 InstructionCost getCommonMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
209 Align Alignment,
210 bool VariableMask,
211 bool IsGatherScatter,
213 unsigned AddressSpace = 0) {
214 // We cannot scalarize scalable vectors, so return Invalid.
215 if (isa<ScalableVectorType>(DataTy))
217
218 auto *VT = cast<FixedVectorType>(DataTy);
219 unsigned VF = VT->getNumElements();
220
221 // Assume the target does not have support for gather/scatter operations
222 // and provide a rough estimate.
223 //
224 // First, compute the cost of the individual memory operations.
225 InstructionCost AddrExtractCost =
226 IsGatherScatter ? getScalarizationOverhead(
228 PointerType::get(VT->getContext(), 0), VF),
229 /*Insert=*/false, /*Extract=*/true, CostKind)
230 : 0;
231
232 // The cost of the scalar loads/stores.
233 InstructionCost MemoryOpCost =
234 VF * thisT()->getMemoryOpCost(Opcode, VT->getElementType(), Alignment,
236
237 // Next, compute the cost of packing the result in a vector.
238 InstructionCost PackingCost =
239 getScalarizationOverhead(VT, Opcode != Instruction::Store,
240 Opcode == Instruction::Store, CostKind);
241
242 InstructionCost ConditionalCost = 0;
243 if (VariableMask) {
244 // Compute the cost of conditionally executing the memory operations with
245 // variable masks. This includes extracting the individual conditions, a
246 // branches and PHIs to combine the results.
247 // NOTE: Estimating the cost of conditionally executing the memory
248 // operations accurately is quite difficult and the current solution
249 // provides a very rough estimate only.
250 ConditionalCost =
253 /*Insert=*/false, /*Extract=*/true, CostKind) +
254 VF * (thisT()->getCFInstrCost(Instruction::Br, CostKind) +
255 thisT()->getCFInstrCost(Instruction::PHI, CostKind));
256 }
257
258 return AddrExtractCost + MemoryOpCost + PackingCost + ConditionalCost;
259 }
260
261 /// Checks if the provided mask \p is a splat mask, i.e. it contains only -1
262 /// or same non -1 index value and this index value contained at least twice.
263 /// So, mask <0, -1,-1, -1> is not considered splat (it is just identity),
264 /// same for <-1, 0, -1, -1> (just a slide), while <2, -1, 2, -1> is a splat
265 /// with \p Index=2.
266 static bool isSplatMask(ArrayRef<int> Mask, unsigned NumSrcElts, int &Index) {
267 // Check that the broadcast index meets at least twice.
268 bool IsCompared = false;
269 if (int SplatIdx = PoisonMaskElem;
270 all_of(enumerate(Mask), [&](const auto &P) {
271 if (P.value() == PoisonMaskElem)
272 return P.index() != Mask.size() - 1 || IsCompared;
273 if (static_cast<unsigned>(P.value()) >= NumSrcElts * 2)
274 return false;
275 if (SplatIdx == PoisonMaskElem) {
276 SplatIdx = P.value();
277 return P.index() != Mask.size() - 1;
278 }
279 IsCompared = true;
280 return SplatIdx == P.value();
281 })) {
282 Index = SplatIdx;
283 return true;
284 }
285 return false;
286 }
287
288protected:
289 explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
290 : BaseT(DL) {}
291 virtual ~BasicTTIImplBase() = default;
292
294
295public:
296 /// \name Scalar TTI Implementations
297 /// @{
299 unsigned AddressSpace, Align Alignment,
300 unsigned *Fast) const {
301 EVT E = EVT::getIntegerVT(Context, BitWidth);
302 return getTLI()->allowsMisalignedMemoryAccesses(
304 }
305
306 bool areInlineCompatible(const Function *Caller,
307 const Function *Callee) const {
308 const TargetMachine &TM = getTLI()->getTargetMachine();
309
310 const FeatureBitset &CallerBits =
311 TM.getSubtargetImpl(*Caller)->getFeatureBits();
312 const FeatureBitset &CalleeBits =
313 TM.getSubtargetImpl(*Callee)->getFeatureBits();
314
315 // Inline a callee if its target-features are a subset of the callers
316 // target-features.
317 return (CallerBits & CalleeBits) == CalleeBits;
318 }
319
320 bool hasBranchDivergence(const Function *F = nullptr) { return false; }
321
322 bool isSourceOfDivergence(const Value *V) { return false; }
323
324 bool isAlwaysUniform(const Value *V) { return false; }
325
326 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
327 return false;
328 }
329
330 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
331 return true;
332 }
333
335 // Return an invalid address space.
336 return -1;
337 }
338
340 Intrinsic::ID IID) const {
341 return false;
342 }
343
344 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
345 return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
346 }
347
348 unsigned getAssumedAddrSpace(const Value *V) const {
349 return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
350 }
351
352 bool isSingleThreaded() const {
353 return getTLI()->getTargetMachine().Options.ThreadModel ==
355 }
356
357 std::pair<const Value *, unsigned>
359 return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
360 }
361
363 Value *NewV) const {
364 return nullptr;
365 }
366
367 bool isLegalAddImmediate(int64_t imm) {
368 return getTLI()->isLegalAddImmediate(imm);
369 }
370
371 bool isLegalAddScalableImmediate(int64_t Imm) {
372 return getTLI()->isLegalAddScalableImmediate(Imm);
373 }
374
375 bool isLegalICmpImmediate(int64_t imm) {
376 return getTLI()->isLegalICmpImmediate(imm);
377 }
378
379 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
380 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
381 Instruction *I = nullptr,
382 int64_t ScalableOffset = 0) {
384 AM.BaseGV = BaseGV;
385 AM.BaseOffs = BaseOffset;
386 AM.HasBaseReg = HasBaseReg;
387 AM.Scale = Scale;
388 AM.ScalableOffset = ScalableOffset;
389 return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
390 }
391
392 int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) {
393 return getTLI()->getPreferredLargeGEPBaseOffset(MinOffset, MaxOffset);
394 }
395
396 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
397 Type *ScalarValTy) const {
398 auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
399 auto *SrcTy = FixedVectorType::get(ScalarMemTy, VF / 2);
400 EVT VT = getTLI()->getValueType(DL, SrcTy);
401 if (getTLI()->isOperationLegal(ISD::STORE, VT) ||
402 getTLI()->isOperationCustom(ISD::STORE, VT))
403 return true;
404
405 EVT ValVT =
406 getTLI()->getValueType(DL, FixedVectorType::get(ScalarValTy, VF / 2));
407 EVT LegalizedVT =
408 getTLI()->getTypeToTransformTo(ScalarMemTy->getContext(), VT);
409 return getTLI()->isTruncStoreLegal(LegalizedVT, ValVT);
410 };
411 while (VF > 2 && IsSupportedByTarget(VF))
412 VF /= 2;
413 return VF;
414 }
415
417 const DataLayout &DL) const {
418 EVT VT = getTLI()->getValueType(DL, Ty);
419 return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
420 }
421
423 const DataLayout &DL) const {
424 EVT VT = getTLI()->getValueType(DL, Ty);
425 return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
426 }
427
430 }
431
434 }
435
438 }
439
442 }
443
445 StackOffset BaseOffset, bool HasBaseReg,
446 int64_t Scale, unsigned AddrSpace) {
448 AM.BaseGV = BaseGV;
449 AM.BaseOffs = BaseOffset.getFixed();
450 AM.HasBaseReg = HasBaseReg;
451 AM.Scale = Scale;
452 AM.ScalableOffset = BaseOffset.getScalable();
453 if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
454 return 0;
455 return -1;
456 }
457
458 bool isTruncateFree(Type *Ty1, Type *Ty2) {
459 return getTLI()->isTruncateFree(Ty1, Ty2);
460 }
461
463 return getTLI()->isProfitableToHoist(I);
464 }
465
466 bool useAA() const { return getST()->useAA(); }
467
468 bool isTypeLegal(Type *Ty) {
469 EVT VT = getTLI()->getValueType(DL, Ty, /*AllowUnknown=*/true);
470 return getTLI()->isTypeLegal(VT);
471 }
472
473 unsigned getRegUsageForType(Type *Ty) {
474 EVT ETy = getTLI()->getValueType(DL, Ty);
475 return getTLI()->getNumRegisters(Ty->getContext(), ETy);
476 }
477
481 return BaseT::getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
482 }
483
485 unsigned &JumpTableSize,
487 BlockFrequencyInfo *BFI) {
488 /// Try to find the estimated number of clusters. Note that the number of
489 /// clusters identified in this function could be different from the actual
490 /// numbers found in lowering. This function ignore switches that are
491 /// lowered with a mix of jump table / bit test / BTree. This function was
492 /// initially intended to be used when estimating the cost of switch in
493 /// inline cost heuristic, but it's a generic cost model to be used in other
494 /// places (e.g., in loop unrolling).
495 unsigned N = SI.getNumCases();
496 const TargetLoweringBase *TLI = getTLI();
497 const DataLayout &DL = this->getDataLayout();
498
499 JumpTableSize = 0;
500 bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
501
502 // Early exit if both a jump table and bit test are not allowed.
503 if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
504 return N;
505
506 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
507 APInt MinCaseVal = MaxCaseVal;
508 for (auto CI : SI.cases()) {
509 const APInt &CaseVal = CI.getCaseValue()->getValue();
510 if (CaseVal.sgt(MaxCaseVal))
511 MaxCaseVal = CaseVal;
512 if (CaseVal.slt(MinCaseVal))
513 MinCaseVal = CaseVal;
514 }
515
516 // Check if suitable for a bit test
517 if (N <= DL.getIndexSizeInBits(0u)) {
519 for (auto I : SI.cases())
520 Dests.insert(I.getCaseSuccessor());
521
522 if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
523 DL))
524 return 1;
525 }
526
527 // Check if suitable for a jump table.
528 if (IsJTAllowed) {
529 if (N < 2 || N < TLI->getMinimumJumpTableEntries())
530 return N;
532 (MaxCaseVal - MinCaseVal)
533 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
534 // Check whether a range of clusters is dense enough for a jump table
535 if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
536 JumpTableSize = Range;
537 return 1;
538 }
539 }
540 return N;
541 }
542
544 const TargetLoweringBase *TLI = getTLI();
545 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
546 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
547 }
548
550 const TargetMachine &TM = getTLI()->getTargetMachine();
551 // If non-PIC mode, do not generate a relative lookup table.
552 if (!TM.isPositionIndependent())
553 return false;
554
555 /// Relative lookup table entries consist of 32-bit offsets.
556 /// Do not generate relative lookup tables for large code models
557 /// in 64-bit achitectures where 32-bit offsets might not be enough.
558 if (TM.getCodeModel() == CodeModel::Medium ||
559 TM.getCodeModel() == CodeModel::Large)
560 return false;
561
562 const Triple &TargetTriple = TM.getTargetTriple();
563 if (!TargetTriple.isArch64Bit())
564 return false;
565
566 // TODO: Triggers issues on aarch64 on darwin, so temporarily disable it
567 // there.
568 if (TargetTriple.getArch() == Triple::aarch64 && TargetTriple.isOSDarwin())
569 return false;
570
571 return true;
572 }
573
574 bool haveFastSqrt(Type *Ty) {
575 const TargetLoweringBase *TLI = getTLI();
576 EVT VT = TLI->getValueType(DL, Ty);
577 return TLI->isTypeLegal(VT) &&
579 }
580
582 return true;
583 }
584
586 // Check whether FADD is available, as a proxy for floating-point in
587 // general.
588 const TargetLoweringBase *TLI = getTLI();
589 EVT VT = TLI->getValueType(DL, Ty);
593 }
594
596 const Function &Fn) const {
597 switch (Inst.getOpcode()) {
598 default:
599 break;
600 case Instruction::SDiv:
601 case Instruction::SRem:
602 case Instruction::UDiv:
603 case Instruction::URem: {
604 if (!isa<ConstantInt>(Inst.getOperand(1)))
605 return false;
606 EVT VT = getTLI()->getValueType(DL, Inst.getType());
607 return !getTLI()->isIntDivCheap(VT, Fn.getAttributes());
608 }
609 };
610
611 return false;
612 }
613
614 unsigned getInliningThresholdMultiplier() const { return 1; }
615 unsigned adjustInliningThreshold(const CallBase *CB) { return 0; }
616 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
617 return 0;
618 }
619
620 int getInlinerVectorBonusPercent() const { return 150; }
621
625 // This unrolling functionality is target independent, but to provide some
626 // motivation for its intended use, for x86:
627
628 // According to the Intel 64 and IA-32 Architectures Optimization Reference
629 // Manual, Intel Core models and later have a loop stream detector (and
630 // associated uop queue) that can benefit from partial unrolling.
631 // The relevant requirements are:
632 // - The loop must have no more than 4 (8 for Nehalem and later) branches
633 // taken, and none of them may be calls.
634 // - The loop can have no more than 18 (28 for Nehalem and later) uops.
635
636 // According to the Software Optimization Guide for AMD Family 15h
637 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
638 // and loop buffer which can benefit from partial unrolling.
639 // The relevant requirements are:
640 // - The loop must have fewer than 16 branches
641 // - The loop must have less than 40 uops in all executed loop branches
642
643 // The number of taken branches in a loop is hard to estimate here, and
644 // benchmarking has revealed that it is better not to be conservative when
645 // estimating the branch count. As a result, we'll ignore the branch limits
646 // until someone finds a case where it matters in practice.
647
648 unsigned MaxOps;
649 const TargetSubtargetInfo *ST = getST();
650 if (PartialUnrollingThreshold.getNumOccurrences() > 0)
652 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
653 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
654 else
655 return;
656
657 // Scan the loop: don't unroll loops with calls.
658 for (BasicBlock *BB : L->blocks()) {
659 for (Instruction &I : *BB) {
660 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
661 if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
662 if (!thisT()->isLoweredToCall(F))
663 continue;
664 }
665
666 if (ORE) {
667 ORE->emit([&]() {
668 return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(),
669 L->getHeader())
670 << "advising against unrolling the loop because it "
671 "contains a "
672 << ore::NV("Call", &I);
673 });
674 }
675 return;
676 }
677 }
678 }
679
680 // Enable runtime and partial unrolling up to the specified size.
681 // Enable using trip count upper bound to unroll loops.
682 UP.Partial = UP.Runtime = UP.UpperBound = true;
683 UP.PartialThreshold = MaxOps;
684
685 // Avoid unrolling when optimizing for size.
686 UP.OptSizeThreshold = 0;
688
689 // Set number of instructions optimized when "back edge"
690 // becomes "fall through" to default value of 2.
691 UP.BEInsns = 2;
692 }
693
696 PP.PeelCount = 0;
697 PP.AllowPeeling = true;
698 PP.AllowLoopNestsPeeling = false;
699 PP.PeelProfiledIterations = true;
700 }
701
703 AssumptionCache &AC,
704 TargetLibraryInfo *LibInfo,
705 HardwareLoopInfo &HWLoopInfo) {
706 return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
707 }
708
711 }
712
715 }
716
718 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) {
719 return BaseT::getPreferredTailFoldingStyle(IVUpdateMayOverflow);
720 }
721
722 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
723 IntrinsicInst &II) {
725 }
726
727 std::optional<Value *>
729 APInt DemandedMask, KnownBits &Known,
730 bool &KnownBitsComputed) {
731 return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
732 KnownBitsComputed);
733 }
734
736 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
737 APInt &UndefElts2, APInt &UndefElts3,
738 std::function<void(Instruction *, unsigned, APInt, APInt &)>
739 SimplifyAndSetOp) {
741 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
742 SimplifyAndSetOp);
743 }
744
745 virtual std::optional<unsigned>
747 return std::optional<unsigned>(
748 getST()->getCacheSize(static_cast<unsigned>(Level)));
749 }
750
751 virtual std::optional<unsigned>
753 std::optional<unsigned> TargetResult =
754 getST()->getCacheAssociativity(static_cast<unsigned>(Level));
755
756 if (TargetResult)
757 return TargetResult;
758
759 return BaseT::getCacheAssociativity(Level);
760 }
761
762 virtual unsigned getCacheLineSize() const {
763 return getST()->getCacheLineSize();
764 }
765
766 virtual unsigned getPrefetchDistance() const {
767 return getST()->getPrefetchDistance();
768 }
769
770 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
771 unsigned NumStridedMemAccesses,
772 unsigned NumPrefetches,
773 bool HasCall) const {
774 return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
775 NumPrefetches, HasCall);
776 }
777
778 virtual unsigned getMaxPrefetchIterationsAhead() const {
779 return getST()->getMaxPrefetchIterationsAhead();
780 }
781
782 virtual bool enableWritePrefetching() const {
783 return getST()->enableWritePrefetching();
784 }
785
786 virtual bool shouldPrefetchAddressSpace(unsigned AS) const {
787 return getST()->shouldPrefetchAddressSpace(AS);
788 }
789
790 /// @}
791
792 /// \name Vector TTI Implementations
793 /// @{
794
796 return TypeSize::getFixed(32);
797 }
798
799 std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
800 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
801 bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
802
803 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
804 /// are set if the demanded result elements need to be inserted and/or
805 /// extracted from vectors.
807 const APInt &DemandedElts,
808 bool Insert, bool Extract,
810 ArrayRef<Value *> VL = {}) {
811 /// FIXME: a bitfield is not a reasonable abstraction for talking about
812 /// which elements are needed from a scalable vector
813 if (isa<ScalableVectorType>(InTy))
815 auto *Ty = cast<FixedVectorType>(InTy);
816
817 assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&
818 (VL.empty() || VL.size() == Ty->getNumElements()) &&
819 "Vector size mismatch");
820
822
823 for (int i = 0, e = Ty->getNumElements(); i < e; ++i) {
824 if (!DemandedElts[i])
825 continue;
826 if (Insert) {
827 Value *InsertedVal = VL.empty() ? nullptr : VL[i];
828 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty,
829 CostKind, i, nullptr, InsertedVal);
830 }
831 if (Extract)
832 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
833 CostKind, i, nullptr, nullptr);
834 }
835
836 return Cost;
837 }
838
840 return false;
841 }
842
844 unsigned ScalarOpdIdx) const {
845 return false;
846 }
847
849 int OpdIdx) const {
850 return OpdIdx == -1;
851 }
852
854 int RetIdx) const {
855 return RetIdx == 0;
856 }
857
858 /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
860 bool Extract,
862 if (isa<ScalableVectorType>(InTy))
864 auto *Ty = cast<FixedVectorType>(InTy);
865
866 APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());
867 return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
868 CostKind);
869 }
870
871 /// Estimate the overhead of scalarizing an instructions unique
872 /// non-constant operands. The (potentially vector) types to use for each of
873 /// argument are passes via Tys.
878 assert(Args.size() == Tys.size() && "Expected matching Args and Tys");
879
881 SmallPtrSet<const Value*, 4> UniqueOperands;
882 for (int I = 0, E = Args.size(); I != E; I++) {
883 // Disregard things like metadata arguments.
884 const Value *A = Args[I];
885 Type *Ty = Tys[I];
886 if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&
887 !Ty->isPtrOrPtrVectorTy())
888 continue;
889
890 if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
891 if (auto *VecTy = dyn_cast<VectorType>(Ty))
892 Cost += getScalarizationOverhead(VecTy, /*Insert*/ false,
893 /*Extract*/ true, CostKind);
894 }
895 }
896
897 return Cost;
898 }
899
900 /// Estimate the overhead of scalarizing the inputs and outputs of an
901 /// instruction, with return type RetTy and arguments Args of type Tys. If
902 /// Args are unknown (empty), then the cost associated with one argument is
903 /// added as a heuristic.
909 RetTy, /*Insert*/ true, /*Extract*/ false, CostKind);
910 if (!Args.empty())
912 else
913 // When no information on arguments is provided, we add the cost
914 // associated with one argument as a heuristic.
915 Cost += getScalarizationOverhead(RetTy, /*Insert*/ false,
916 /*Extract*/ true, CostKind);
917
918 return Cost;
919 }
920
921 /// Estimate the cost of type-legalization and the legalized type.
922 std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const {
923 LLVMContext &C = Ty->getContext();
924 EVT MTy = getTLI()->getValueType(DL, Ty);
925
927 // We keep legalizing the type until we find a legal kind. We assume that
928 // the only operation that costs anything is the split. After splitting
929 // we need to handle two types.
930 while (true) {
932
934 // Ensure we return a sensible simple VT here, since many callers of
935 // this function require it.
936 MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;
937 return std::make_pair(InstructionCost::getInvalid(), VT);
938 }
939
940 if (LK.first == TargetLoweringBase::TypeLegal)
941 return std::make_pair(Cost, MTy.getSimpleVT());
942
943 if (LK.first == TargetLoweringBase::TypeSplitVector ||
945 Cost *= 2;
946
947 // Do not loop with f128 type.
948 if (MTy == LK.second)
949 return std::make_pair(Cost, MTy.getSimpleVT());
950
951 // Keep legalizing the type.
952 MTy = LK.second;
953 }
954 }
955
956 unsigned getMaxInterleaveFactor(ElementCount VF) { return 1; }
957
959 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
962 ArrayRef<const Value *> Args = {}, const Instruction *CxtI = nullptr) {
963 // Check if any of the operands are vector operands.
964 const TargetLoweringBase *TLI = getTLI();
965 int ISD = TLI->InstructionOpcodeToISD(Opcode);
966 assert(ISD && "Invalid opcode");
967
968 // TODO: Handle more cost kinds.
970 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind,
971 Opd1Info, Opd2Info,
972 Args, CxtI);
973
974 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
975
976 bool IsFloat = Ty->isFPOrFPVectorTy();
977 // Assume that floating point arithmetic operations cost twice as much as
978 // integer operations.
979 InstructionCost OpCost = (IsFloat ? 2 : 1);
980
981 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
982 // The operation is legal. Assume it costs 1.
983 // TODO: Once we have extract/insert subvector cost we need to use them.
984 return LT.first * OpCost;
985 }
986
987 if (!TLI->isOperationExpand(ISD, LT.second)) {
988 // If the operation is custom lowered, then assume that the code is twice
989 // as expensive.
990 return LT.first * 2 * OpCost;
991 }
992
993 // An 'Expand' of URem and SRem is special because it may default
994 // to expanding the operation into a sequence of sub-operations
995 // i.e. X % Y -> X-(X/Y)*Y.
996 if (ISD == ISD::UREM || ISD == ISD::SREM) {
997 bool IsSigned = ISD == ISD::SREM;
998 if (TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIVREM : ISD::UDIVREM,
999 LT.second) ||
1000 TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIV : ISD::UDIV,
1001 LT.second)) {
1002 unsigned DivOpc = IsSigned ? Instruction::SDiv : Instruction::UDiv;
1003 InstructionCost DivCost = thisT()->getArithmeticInstrCost(
1004 DivOpc, Ty, CostKind, Opd1Info, Opd2Info);
1005 InstructionCost MulCost =
1006 thisT()->getArithmeticInstrCost(Instruction::Mul, Ty, CostKind);
1007 InstructionCost SubCost =
1008 thisT()->getArithmeticInstrCost(Instruction::Sub, Ty, CostKind);
1009 return DivCost + MulCost + SubCost;
1010 }
1011 }
1012
1013 // We cannot scalarize scalable vectors, so return Invalid.
1014 if (isa<ScalableVectorType>(Ty))
1016
1017 // Else, assume that we need to scalarize this op.
1018 // TODO: If one of the types get legalized by splitting, handle this
1019 // similarly to what getCastInstrCost() does.
1020 if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
1021 InstructionCost Cost = thisT()->getArithmeticInstrCost(
1022 Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
1023 Args, CxtI);
1024 // Return the cost of multiple scalar invocation plus the cost of
1025 // inserting and extracting the values.
1026 SmallVector<Type *> Tys(Args.size(), Ty);
1027 return getScalarizationOverhead(VTy, Args, Tys, CostKind) +
1028 VTy->getNumElements() * Cost;
1029 }
1030
1031 // We don't know anything about this scalar instruction.
1032 return OpCost;
1033 }
1034
1036 ArrayRef<int> Mask,
1037 VectorType *Ty, int &Index,
1038 VectorType *&SubTy) const {
1039 if (Mask.empty())
1040 return Kind;
1041 int NumSrcElts = Ty->getElementCount().getKnownMinValue();
1042 switch (Kind) {
1044 if (ShuffleVectorInst::isReverseMask(Mask, NumSrcElts))
1045 return TTI::SK_Reverse;
1046 if (ShuffleVectorInst::isZeroEltSplatMask(Mask, NumSrcElts))
1047 return TTI::SK_Broadcast;
1048 if (isSplatMask(Mask, NumSrcElts, Index))
1049 return TTI::SK_Broadcast;
1050 if (ShuffleVectorInst::isExtractSubvectorMask(Mask, NumSrcElts, Index) &&
1051 (Index + Mask.size()) <= (size_t)NumSrcElts) {
1052 SubTy = FixedVectorType::get(Ty->getElementType(), Mask.size());
1054 }
1055 break;
1056 }
1057 case TTI::SK_PermuteTwoSrc: {
1058 int NumSubElts;
1059 if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask(
1060 Mask, NumSrcElts, NumSubElts, Index)) {
1061 if (Index + NumSubElts > NumSrcElts)
1062 return Kind;
1063 SubTy = FixedVectorType::get(Ty->getElementType(), NumSubElts);
1065 }
1066 if (ShuffleVectorInst::isSelectMask(Mask, NumSrcElts))
1067 return TTI::SK_Select;
1068 if (ShuffleVectorInst::isTransposeMask(Mask, NumSrcElts))
1069 return TTI::SK_Transpose;
1070 if (ShuffleVectorInst::isSpliceMask(Mask, NumSrcElts, Index))
1071 return TTI::SK_Splice;
1072 break;
1073 }
1074 case TTI::SK_Select:
1075 case TTI::SK_Reverse:
1076 case TTI::SK_Broadcast:
1077 case TTI::SK_Transpose:
1080 case TTI::SK_Splice:
1081 break;
1082 }
1083 return Kind;
1084 }
1085
1087 ArrayRef<int> Mask,
1089 VectorType *SubTp,
1090 ArrayRef<const Value *> Args = {},
1091 const Instruction *CxtI = nullptr) {
1092 switch (improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp)) {
1093 case TTI::SK_Broadcast:
1094 if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
1095 return getBroadcastShuffleOverhead(FVT, CostKind);
1097 case TTI::SK_Select:
1098 case TTI::SK_Splice:
1099 case TTI::SK_Reverse:
1100 case TTI::SK_Transpose:
1103 if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
1104 return getPermuteShuffleOverhead(FVT, CostKind);
1107 return getExtractSubvectorOverhead(Tp, CostKind, Index,
1108 cast<FixedVectorType>(SubTp));
1110 return getInsertSubvectorOverhead(Tp, CostKind, Index,
1111 cast<FixedVectorType>(SubTp));
1112 }
1113 llvm_unreachable("Unknown TTI::ShuffleKind");
1114 }
1115
1116 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1119 const Instruction *I = nullptr) {
1120 if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0)
1121 return 0;
1122
1123 const TargetLoweringBase *TLI = getTLI();
1124 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1125 assert(ISD && "Invalid opcode");
1126 std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Src);
1127 std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst);
1128
1129 TypeSize SrcSize = SrcLT.second.getSizeInBits();
1130 TypeSize DstSize = DstLT.second.getSizeInBits();
1131 bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
1132 bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
1133
1134 switch (Opcode) {
1135 default:
1136 break;
1137 case Instruction::Trunc:
1138 // Check for NOOP conversions.
1139 if (TLI->isTruncateFree(SrcLT.second, DstLT.second))
1140 return 0;
1141 [[fallthrough]];
1142 case Instruction::BitCast:
1143 // Bitcast between types that are legalized to the same type are free and
1144 // assume int to/from ptr of the same size is also free.
1145 if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
1146 SrcSize == DstSize)
1147 return 0;
1148 break;
1149 case Instruction::FPExt:
1150 if (I && getTLI()->isExtFree(I))
1151 return 0;
1152 break;
1153 case Instruction::ZExt:
1154 if (TLI->isZExtFree(SrcLT.second, DstLT.second))
1155 return 0;
1156 [[fallthrough]];
1157 case Instruction::SExt:
1158 if (I && getTLI()->isExtFree(I))
1159 return 0;
1160
1161 // If this is a zext/sext of a load, return 0 if the corresponding
1162 // extending load exists on target and the result type is legal.
1163 if (CCH == TTI::CastContextHint::Normal) {
1164 EVT ExtVT = EVT::getEVT(Dst);
1165 EVT LoadVT = EVT::getEVT(Src);
1166 unsigned LType =
1167 ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
1168 if (DstLT.first == SrcLT.first &&
1169 TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
1170 return 0;
1171 }
1172 break;
1173 case Instruction::AddrSpaceCast:
1174 if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
1175 Dst->getPointerAddressSpace()))
1176 return 0;
1177 break;
1178 }
1179
1180 auto *SrcVTy = dyn_cast<VectorType>(Src);
1181 auto *DstVTy = dyn_cast<VectorType>(Dst);
1182
1183 // If the cast is marked as legal (or promote) then assume low cost.
1184 if (SrcLT.first == DstLT.first &&
1185 TLI->isOperationLegalOrPromote(ISD, DstLT.second))
1186 return SrcLT.first;
1187
1188 // Handle scalar conversions.
1189 if (!SrcVTy && !DstVTy) {
1190 // Just check the op cost. If the operation is legal then assume it costs
1191 // 1.
1192 if (!TLI->isOperationExpand(ISD, DstLT.second))
1193 return 1;
1194
1195 // Assume that illegal scalar instruction are expensive.
1196 return 4;
1197 }
1198
1199 // Check vector-to-vector casts.
1200 if (DstVTy && SrcVTy) {
1201 // If the cast is between same-sized registers, then the check is simple.
1202 if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
1203
1204 // Assume that Zext is done using AND.
1205 if (Opcode == Instruction::ZExt)
1206 return SrcLT.first;
1207
1208 // Assume that sext is done using SHL and SRA.
1209 if (Opcode == Instruction::SExt)
1210 return SrcLT.first * 2;
1211
1212 // Just check the op cost. If the operation is legal then assume it
1213 // costs
1214 // 1 and multiply by the type-legalization overhead.
1215 if (!TLI->isOperationExpand(ISD, DstLT.second))
1216 return SrcLT.first * 1;
1217 }
1218
1219 // If we are legalizing by splitting, query the concrete TTI for the cost
1220 // of casting the original vector twice. We also need to factor in the
1221 // cost of the split itself. Count that as 1, to be consistent with
1222 // getTypeLegalizationCost().
1223 bool SplitSrc =
1224 TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
1226 bool SplitDst =
1227 TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
1229 if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() &&
1230 DstVTy->getElementCount().isVector()) {
1231 Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy);
1232 Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy);
1233 T *TTI = static_cast<T *>(this);
1234 // If both types need to be split then the split is free.
1235 InstructionCost SplitCost =
1236 (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
1237 return SplitCost +
1238 (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH,
1239 CostKind, I));
1240 }
1241
1242 // Scalarization cost is Invalid, can't assume any num elements.
1243 if (isa<ScalableVectorType>(DstVTy))
1245
1246 // In other cases where the source or destination are illegal, assume
1247 // the operation will get scalarized.
1248 unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
1249 InstructionCost Cost = thisT()->getCastInstrCost(
1250 Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I);
1251
1252 // Return the cost of multiple scalar invocation plus the cost of
1253 // inserting and extracting the values.
1254 return getScalarizationOverhead(DstVTy, /*Insert*/ true, /*Extract*/ true,
1255 CostKind) +
1256 Num * Cost;
1257 }
1258
1259 // We already handled vector-to-vector and scalar-to-scalar conversions.
1260 // This
1261 // is where we handle bitcast between vectors and scalars. We need to assume
1262 // that the conversion is scalarized in one way or another.
1263 if (Opcode == Instruction::BitCast) {
1264 // Illegal bitcasts are done by storing and loading from a stack slot.
1265 return (SrcVTy ? getScalarizationOverhead(SrcVTy, /*Insert*/ false,
1266 /*Extract*/ true, CostKind)
1267 : 0) +
1268 (DstVTy ? getScalarizationOverhead(DstVTy, /*Insert*/ true,
1269 /*Extract*/ false, CostKind)
1270 : 0);
1271 }
1272
1273 llvm_unreachable("Unhandled cast");
1274 }
1275
1277 VectorType *VecTy, unsigned Index) {
1279 return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
1280 CostKind, Index, nullptr, nullptr) +
1281 thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
1283 }
1284
1286 const Instruction *I = nullptr) {
1287 return BaseT::getCFInstrCost(Opcode, CostKind, I);
1288 }
1289
1291 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
1295 const Instruction *I = nullptr) {
1296 const TargetLoweringBase *TLI = getTLI();
1297 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1298 assert(ISD && "Invalid opcode");
1299
1300 // TODO: Handle other cost kinds.
1302 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
1303 Op1Info, Op2Info, I);
1304
1305 // Selects on vectors are actually vector selects.
1306 if (ISD == ISD::SELECT) {
1307 assert(CondTy && "CondTy must exist");
1308 if (CondTy->isVectorTy())
1309 ISD = ISD::VSELECT;
1310 }
1311 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
1312
1313 if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
1314 !TLI->isOperationExpand(ISD, LT.second)) {
1315 // The operation is legal. Assume it costs 1. Multiply
1316 // by the type-legalization overhead.
1317 return LT.first * 1;
1318 }
1319
1320 // Otherwise, assume that the cast is scalarized.
1321 // TODO: If one of the types get legalized by splitting, handle this
1322 // similarly to what getCastInstrCost() does.
1323 if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
1324 if (isa<ScalableVectorType>(ValTy))
1326
1327 unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
1328 if (CondTy)
1329 CondTy = CondTy->getScalarType();
1330 InstructionCost Cost =
1331 thisT()->getCmpSelInstrCost(Opcode, ValVTy->getScalarType(), CondTy,
1332 VecPred, CostKind, Op1Info, Op2Info, I);
1333
1334 // Return the cost of multiple scalar invocation plus the cost of
1335 // inserting and extracting the values.
1336 return getScalarizationOverhead(ValVTy, /*Insert*/ true,
1337 /*Extract*/ false, CostKind) +
1338 Num * Cost;
1339 }
1340
1341 // Unknown scalar opcode.
1342 return 1;
1343 }
1344
1347 unsigned Index, Value *Op0, Value *Op1) {
1348 return getRegUsageForType(Val->getScalarType());
1349 }
1350
1351 /// \param ScalarUserAndIdx encodes the information about extracts from a
1352 /// vector with 'Scalar' being the value being extracted,'User' being the user
1353 /// of the extract(nullptr if user is not known before vectorization) and
1354 /// 'Idx' being the extract lane.
1356 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
1357 Value *Scalar,
1358 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
1359 return thisT()->getVectorInstrCost(Opcode, Val, CostKind, Index, nullptr,
1360 nullptr);
1361 }
1362
1365 unsigned Index) {
1366 Value *Op0 = nullptr;
1367 Value *Op1 = nullptr;
1368 if (auto *IE = dyn_cast<InsertElementInst>(&I)) {
1369 Op0 = IE->getOperand(0);
1370 Op1 = IE->getOperand(1);
1371 }
1372 return thisT()->getVectorInstrCost(I.getOpcode(), Val, CostKind, Index, Op0,
1373 Op1);
1374 }
1375
1376 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1377 int VF,
1378 const APInt &DemandedDstElts,
1380 assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor &&
1381 "Unexpected size of DemandedDstElts.");
1382
1384
1385 auto *SrcVT = FixedVectorType::get(EltTy, VF);
1386 auto *ReplicatedVT = FixedVectorType::get(EltTy, VF * ReplicationFactor);
1387
1388 // The Mask shuffling cost is extract all the elements of the Mask
1389 // and insert each of them Factor times into the wide vector:
1390 //
1391 // E.g. an interleaved group with factor 3:
1392 // %mask = icmp ult <8 x i32> %vec1, %vec2
1393 // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1394 // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1395 // The cost is estimated as extract all mask elements from the <8xi1> mask
1396 // vector and insert them factor times into the <24xi1> shuffled mask
1397 // vector.
1398 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);
1399 Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
1400 /*Insert*/ false,
1401 /*Extract*/ true, CostKind);
1402 Cost += thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
1403 /*Insert*/ true,
1404 /*Extract*/ false, CostKind);
1405
1406 return Cost;
1407 }
1408
1410 getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
1413 const Instruction *I = nullptr) {
1414 assert(!Src->isVoidTy() && "Invalid type");
1415 // Assume types, such as structs, are expensive.
1416 if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
1417 return 4;
1418 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
1419
1420 // Assuming that all loads of legal types cost 1.
1421 InstructionCost Cost = LT.first;
1423 return Cost;
1424
1425 const DataLayout &DL = this->getDataLayout();
1426 if (Src->isVectorTy() &&
1427 // In practice it's not currently possible to have a change in lane
1428 // length for extending loads or truncating stores so both types should
1429 // have the same scalable property.
1431 LT.second.getSizeInBits())) {
1432 // This is a vector load that legalizes to a larger type than the vector
1433 // itself. Unless the corresponding extending load or truncating store is
1434 // legal, then this will scalarize.
1436 EVT MemVT = getTLI()->getValueType(DL, Src);
1437 if (Opcode == Instruction::Store)
1438 LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
1439 else
1440 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
1441
1442 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
1443 // This is a vector load/store for some illegal type that is scalarized.
1444 // We must account for the cost of building or decomposing the vector.
1446 cast<VectorType>(Src), Opcode != Instruction::Store,
1447 Opcode == Instruction::Store, CostKind);
1448 }
1449 }
1450
1451 return Cost;
1452 }
1453
1455 Align Alignment, unsigned AddressSpace,
1457 // TODO: Pass on AddressSpace when we have test coverage.
1458 return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, true, false,
1459 CostKind);
1460 }
1461
1463 const Value *Ptr, bool VariableMask,
1464 Align Alignment,
1466 const Instruction *I = nullptr) {
1467 return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
1468 true, CostKind);
1469 }
1470
1472 unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
1473 TTI::TargetCostKind CostKind, const Instruction *I = nullptr) {
1474 // Treat expand load/compress store as gather/scatter operation.
1475 // TODO: implement more precise cost estimation for these intrinsics.
1476 return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
1477 /*IsGatherScatter*/ true, CostKind);
1478 }
1479
1481 const Value *Ptr, bool VariableMask,
1482 Align Alignment,
1484 const Instruction *I) {
1485 // For a target without strided memory operations (or for an illegal
1486 // operation type on one which does), assume we lower to a gather/scatter
1487 // operation. (Which may in turn be scalarized.)
1488 return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1489 Alignment, CostKind, I);
1490 }
1491
1493 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1494 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1495 bool UseMaskForCond = false, bool UseMaskForGaps = false) {
1496
1497 // We cannot scalarize scalable vectors, so return Invalid.
1498 if (isa<ScalableVectorType>(VecTy))
1500
1501 auto *VT = cast<FixedVectorType>(VecTy);
1502
1503 unsigned NumElts = VT->getNumElements();
1504 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
1505
1506 unsigned NumSubElts = NumElts / Factor;
1507 auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts);
1508
1509 // Firstly, the cost of load/store operation.
1511 if (UseMaskForCond || UseMaskForGaps)
1512 Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
1514 else
1515 Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
1516 CostKind);
1517
1518 // Legalize the vector type, and get the legalized and unlegalized type
1519 // sizes.
1520 MVT VecTyLT = getTypeLegalizationCost(VecTy).second;
1521 unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
1522 unsigned VecTyLTSize = VecTyLT.getStoreSize();
1523
1524 // Scale the cost of the memory operation by the fraction of legalized
1525 // instructions that will actually be used. We shouldn't account for the
1526 // cost of dead instructions since they will be removed.
1527 //
1528 // E.g., An interleaved load of factor 8:
1529 // %vec = load <16 x i64>, <16 x i64>* %ptr
1530 // %v0 = shufflevector %vec, undef, <0, 8>
1531 //
1532 // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
1533 // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
1534 // type). The other loads are unused.
1535 //
1536 // TODO: Note that legalization can turn masked loads/stores into unmasked
1537 // (legalized) loads/stores. This can be reflected in the cost.
1538 if (Cost.isValid() && VecTySize > VecTyLTSize) {
1539 // The number of loads of a legal type it will take to represent a load
1540 // of the unlegalized vector type.
1541 unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize);
1542
1543 // The number of elements of the unlegalized type that correspond to a
1544 // single legal instruction.
1545 unsigned NumEltsPerLegalInst = divideCeil(NumElts, NumLegalInsts);
1546
1547 // Determine which legal instructions will be used.
1548 BitVector UsedInsts(NumLegalInsts, false);
1549 for (unsigned Index : Indices)
1550 for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1551 UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
1552
1553 // Scale the cost of the load by the fraction of legal instructions that
1554 // will be used.
1555 Cost = divideCeil(UsedInsts.count() * *Cost.getValue(), NumLegalInsts);
1556 }
1557
1558 // Then plus the cost of interleave operation.
1559 assert(Indices.size() <= Factor &&
1560 "Interleaved memory op has too many members");
1561
1562 const APInt DemandedAllSubElts = APInt::getAllOnes(NumSubElts);
1563 const APInt DemandedAllResultElts = APInt::getAllOnes(NumElts);
1564
1565 APInt DemandedLoadStoreElts = APInt::getZero(NumElts);
1566 for (unsigned Index : Indices) {
1567 assert(Index < Factor && "Invalid index for interleaved memory op");
1568 for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
1569 DemandedLoadStoreElts.setBit(Index + Elm * Factor);
1570 }
1571
1572 if (Opcode == Instruction::Load) {
1573 // The interleave cost is similar to extract sub vectors' elements
1574 // from the wide vector, and insert them into sub vectors.
1575 //
1576 // E.g. An interleaved load of factor 2 (with one member of index 0):
1577 // %vec = load <8 x i32>, <8 x i32>* %ptr
1578 // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
1579 // The cost is estimated as extract elements at 0, 2, 4, 6 from the
1580 // <8 x i32> vector and insert them into a <4 x i32> vector.
1581 InstructionCost InsSubCost = thisT()->getScalarizationOverhead(
1582 SubVT, DemandedAllSubElts,
1583 /*Insert*/ true, /*Extract*/ false, CostKind);
1584 Cost += Indices.size() * InsSubCost;
1585 Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1586 /*Insert*/ false,
1587 /*Extract*/ true, CostKind);
1588 } else {
1589 // The interleave cost is extract elements from sub vectors, and
1590 // insert them into the wide vector.
1591 //
1592 // E.g. An interleaved store of factor 3 with 2 members at indices 0,1:
1593 // (using VF=4):
1594 // %v0_v1 = shuffle %v0, %v1, <0,4,undef,1,5,undef,2,6,undef,3,7,undef>
1595 // %gaps.mask = <true, true, false, true, true, false,
1596 // true, true, false, true, true, false>
1597 // call llvm.masked.store <12 x i32> %v0_v1, <12 x i32>* %ptr,
1598 // i32 Align, <12 x i1> %gaps.mask
1599 // The cost is estimated as extract all elements (of actual members,
1600 // excluding gaps) from both <4 x i32> vectors and insert into the <12 x
1601 // i32> vector.
1602 InstructionCost ExtSubCost = thisT()->getScalarizationOverhead(
1603 SubVT, DemandedAllSubElts,
1604 /*Insert*/ false, /*Extract*/ true, CostKind);
1605 Cost += ExtSubCost * Indices.size();
1606 Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1607 /*Insert*/ true,
1608 /*Extract*/ false, CostKind);
1609 }
1610
1611 if (!UseMaskForCond)
1612 return Cost;
1613
1614 Type *I8Type = Type::getInt8Ty(VT->getContext());
1615
1616 Cost += thisT()->getReplicationShuffleCost(
1617 I8Type, Factor, NumSubElts,
1618 UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts,
1619 CostKind);
1620
1621 // The Gaps mask is invariant and created outside the loop, therefore the
1622 // cost of creating it is not accounted for here. However if we have both
1623 // a MaskForGaps and some other mask that guards the execution of the
1624 // memory access, we need to account for the cost of And-ing the two masks
1625 // inside the loop.
1626 if (UseMaskForGaps) {
1627 auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
1628 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1629 CostKind);
1630 }
1631
1632 return Cost;
1633 }
1634
1635 /// Get intrinsic cost based on arguments.
1638 // Check for generically free intrinsics.
1640 return 0;
1641
1642 // Assume that target intrinsics are cheap.
1643 Intrinsic::ID IID = ICA.getID();
1646
1647 // VP Intrinsics should have the same cost as their non-vp counterpart.
1648 // TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
1649 // counterpart when the vector length argument is smaller than the maximum
1650 // vector length.
1651 // TODO: Support other kinds of VPIntrinsics
1652 if (VPIntrinsic::isVPIntrinsic(ICA.getID())) {
1653 std::optional<unsigned> FOp =
1655 if (FOp) {
1656 if (ICA.getID() == Intrinsic::vp_load) {
1657 Align Alignment;
1658 if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
1659 Alignment = VPI->getPointerAlignment().valueOrOne();
1660 unsigned AS = 0;
1661 if (ICA.getArgTypes().size() > 1)
1662 if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes()[0]))
1663 AS = PtrTy->getAddressSpace();
1664 return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment,
1665 AS, CostKind);
1666 }
1667 if (ICA.getID() == Intrinsic::vp_store) {
1668 Align Alignment;
1669 if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
1670 Alignment = VPI->getPointerAlignment().valueOrOne();
1671 unsigned AS = 0;
1672 if (ICA.getArgTypes().size() >= 2)
1673 if (auto *PtrTy = dyn_cast<PointerType>(ICA.getArgTypes()[1]))
1674 AS = PtrTy->getAddressSpace();
1675 return thisT()->getMemoryOpCost(*FOp, ICA.getArgTypes()[0], Alignment,
1676 AS, CostKind);
1677 }
1679 return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(),
1680 CostKind);
1681 }
1682 if (VPCastIntrinsic::isVPCast(ICA.getID())) {
1683 return thisT()->getCastInstrCost(
1684 *FOp, ICA.getReturnType(), ICA.getArgTypes()[0],
1686 }
1687 if (VPCmpIntrinsic::isVPCmp(ICA.getID())) {
1688 // We can only handle vp_cmp intrinsics with underlying instructions.
1689 if (ICA.getInst()) {
1690 assert(FOp);
1691 auto *UI = cast<VPCmpIntrinsic>(ICA.getInst());
1692 return thisT()->getCmpSelInstrCost(*FOp, ICA.getArgTypes()[0],
1693 ICA.getReturnType(),
1694 UI->getPredicate(), CostKind);
1695 }
1696 }
1697 }
1698
1699 std::optional<Intrinsic::ID> FID =
1701 if (FID) {
1702 // Non-vp version will have same arg types except mask and vector
1703 // length.
1704 assert(ICA.getArgTypes().size() >= 2 &&
1705 "Expected VPIntrinsic to have Mask and Vector Length args and "
1706 "types");
1708
1709 // VPReduction intrinsics have a start value argument that their non-vp
1710 // counterparts do not have, except for the fadd and fmul non-vp
1711 // counterpart.
1713 *FID != Intrinsic::vector_reduce_fadd &&
1714 *FID != Intrinsic::vector_reduce_fmul)
1715 NewTys = NewTys.drop_front();
1716
1717 IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewTys,
1718 ICA.getFlags());
1719 return thisT()->getIntrinsicInstrCost(NewICA, CostKind);
1720 }
1721 }
1722
1723 if (ICA.isTypeBasedOnly())
1725
1726 Type *RetTy = ICA.getReturnType();
1727
1728 ElementCount RetVF =
1729 (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1731 const IntrinsicInst *I = ICA.getInst();
1732 const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
1733 FastMathFlags FMF = ICA.getFlags();
1734 switch (IID) {
1735 default:
1736 break;
1737
1738 case Intrinsic::powi:
1739 if (auto *RHSC = dyn_cast<ConstantInt>(Args[1])) {
1740 bool ShouldOptForSize = I->getParent()->getParent()->hasOptSize();
1741 if (getTLI()->isBeneficialToExpandPowI(RHSC->getSExtValue(),
1742 ShouldOptForSize)) {
1743 // The cost is modeled on the expansion performed by ExpandPowI in
1744 // SelectionDAGBuilder.
1745 APInt Exponent = RHSC->getValue().abs();
1746 unsigned ActiveBits = Exponent.getActiveBits();
1747 unsigned PopCount = Exponent.popcount();
1748 InstructionCost Cost = (ActiveBits + PopCount - 2) *
1749 thisT()->getArithmeticInstrCost(
1750 Instruction::FMul, RetTy, CostKind);
1751 if (RHSC->isNegative())
1752 Cost += thisT()->getArithmeticInstrCost(Instruction::FDiv, RetTy,
1753 CostKind);
1754 return Cost;
1755 }
1756 }
1757 break;
1758 case Intrinsic::cttz:
1759 // FIXME: If necessary, this should go in target-specific overrides.
1760 if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz(RetTy))
1762 break;
1763
1764 case Intrinsic::ctlz:
1765 // FIXME: If necessary, this should go in target-specific overrides.
1766 if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz(RetTy))
1768 break;
1769
1770 case Intrinsic::memcpy:
1771 return thisT()->getMemcpyCost(ICA.getInst());
1772
1773 case Intrinsic::masked_scatter: {
1774 const Value *Mask = Args[3];
1775 bool VarMask = !isa<Constant>(Mask);
1776 Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
1777 return thisT()->getGatherScatterOpCost(Instruction::Store,
1778 ICA.getArgTypes()[0], Args[1],
1779 VarMask, Alignment, CostKind, I);
1780 }
1781 case Intrinsic::masked_gather: {
1782 const Value *Mask = Args[2];
1783 bool VarMask = !isa<Constant>(Mask);
1784 Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
1785 return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
1786 VarMask, Alignment, CostKind, I);
1787 }
1788 case Intrinsic::masked_compressstore: {
1789 const Value *Data = Args[0];
1790 const Value *Mask = Args[2];
1791 Align Alignment = I->getParamAlign(1).valueOrOne();
1792 return thisT()->getExpandCompressMemoryOpCost(
1793 Instruction::Store, Data->getType(), !isa<Constant>(Mask), Alignment,
1794 CostKind, I);
1795 }
1796 case Intrinsic::masked_expandload: {
1797 const Value *Mask = Args[1];
1798 Align Alignment = I->getParamAlign(0).valueOrOne();
1799 return thisT()->getExpandCompressMemoryOpCost(Instruction::Load, RetTy,
1800 !isa<Constant>(Mask),
1801 Alignment, CostKind, I);
1802 }
1803 case Intrinsic::experimental_vp_strided_store: {
1804 const Value *Data = Args[0];
1805 const Value *Ptr = Args[1];
1806 const Value *Mask = Args[3];
1807 const Value *EVL = Args[4];
1808 bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
1809 Type *EltTy = cast<VectorType>(Data->getType())->getElementType();
1810 Align Alignment =
1811 I->getParamAlign(1).value_or(thisT()->DL.getABITypeAlign(EltTy));
1812 return thisT()->getStridedMemoryOpCost(Instruction::Store,
1813 Data->getType(), Ptr, VarMask,
1814 Alignment, CostKind, I);
1815 }
1816 case Intrinsic::experimental_vp_strided_load: {
1817 const Value *Ptr = Args[0];
1818 const Value *Mask = Args[2];
1819 const Value *EVL = Args[3];
1820 bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
1821 Type *EltTy = cast<VectorType>(RetTy)->getElementType();
1822 Align Alignment =
1823 I->getParamAlign(0).value_or(thisT()->DL.getABITypeAlign(EltTy));
1824 return thisT()->getStridedMemoryOpCost(Instruction::Load, RetTy, Ptr,
1825 VarMask, Alignment, CostKind, I);
1826 }
1827 case Intrinsic::stepvector: {
1828 if (isa<ScalableVectorType>(RetTy))
1830 // The cost of materialising a constant integer vector.
1832 }
1833 case Intrinsic::vector_extract: {
1834 // FIXME: Handle case where a scalable vector is extracted from a scalable
1835 // vector
1836 if (isa<ScalableVectorType>(RetTy))
1838 unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
1839 return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1840 cast<VectorType>(Args[0]->getType()), {},
1841 CostKind, Index, cast<VectorType>(RetTy));
1842 }
1843 case Intrinsic::vector_insert: {
1844 // FIXME: Handle case where a scalable vector is inserted into a scalable
1845 // vector
1846 if (isa<ScalableVectorType>(Args[1]->getType()))
1848 unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1849 return thisT()->getShuffleCost(
1850 TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), {},
1851 CostKind, Index, cast<VectorType>(Args[1]->getType()));
1852 }
1853 case Intrinsic::vector_reverse: {
1854 return thisT()->getShuffleCost(TTI::SK_Reverse,
1855 cast<VectorType>(Args[0]->getType()), {},
1856 CostKind, 0, cast<VectorType>(RetTy));
1857 }
1858 case Intrinsic::vector_splice: {
1859 unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1860 return thisT()->getShuffleCost(TTI::SK_Splice,
1861 cast<VectorType>(Args[0]->getType()), {},
1862 CostKind, Index, cast<VectorType>(RetTy));
1863 }
1864 case Intrinsic::vector_reduce_add:
1865 case Intrinsic::vector_reduce_mul:
1866 case Intrinsic::vector_reduce_and:
1867 case Intrinsic::vector_reduce_or:
1868 case Intrinsic::vector_reduce_xor:
1869 case Intrinsic::vector_reduce_smax:
1870 case Intrinsic::vector_reduce_smin:
1871 case Intrinsic::vector_reduce_fmax:
1872 case Intrinsic::vector_reduce_fmin:
1873 case Intrinsic::vector_reduce_fmaximum:
1874 case Intrinsic::vector_reduce_fminimum:
1875 case Intrinsic::vector_reduce_umax:
1876 case Intrinsic::vector_reduce_umin: {
1877 IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);
1879 }
1880 case Intrinsic::vector_reduce_fadd:
1881 case Intrinsic::vector_reduce_fmul: {
1883 IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, I, 1);
1885 }
1886 case Intrinsic::fshl:
1887 case Intrinsic::fshr: {
1888 const Value *X = Args[0];
1889 const Value *Y = Args[1];
1890 const Value *Z = Args[2];
1893 const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(Z);
1894 const TTI::OperandValueInfo OpInfoBW =
1896 isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1897 : TTI::OP_None};
1898
1899 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1900 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1902 Cost +=
1903 thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
1904 Cost +=
1905 thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
1906 Cost += thisT()->getArithmeticInstrCost(
1907 BinaryOperator::Shl, RetTy, CostKind, OpInfoX,
1908 {OpInfoZ.Kind, TTI::OP_None});
1909 Cost += thisT()->getArithmeticInstrCost(
1910 BinaryOperator::LShr, RetTy, CostKind, OpInfoY,
1911 {OpInfoZ.Kind, TTI::OP_None});
1912 // Non-constant shift amounts requires a modulo.
1913 if (!OpInfoZ.isConstant())
1914 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1915 CostKind, OpInfoZ, OpInfoBW);
1916 // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1917 if (X != Y) {
1918 Type *CondTy = RetTy->getWithNewBitWidth(1);
1919 Cost +=
1920 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1922 Cost +=
1923 thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1925 }
1926 return Cost;
1927 }
1928 case Intrinsic::get_active_lane_mask: {
1929 EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
1930 EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
1931
1932 // If we're not expanding the intrinsic then we assume this is cheap
1933 // to implement.
1934 if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
1935 return getTypeLegalizationCost(RetTy).first;
1936 }
1937
1938 // Create the expanded types that will be used to calculate the uadd_sat
1939 // operation.
1940 Type *ExpRetTy = VectorType::get(
1941 ICA.getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());
1942 IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF);
1944 thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1945 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
1947 return Cost;
1948 }
1949 case Intrinsic::experimental_cttz_elts: {
1950 EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
1951
1952 // If we're not expanding the intrinsic then we assume this is cheap
1953 // to implement.
1954 if (!getTLI()->shouldExpandCttzElements(ArgType))
1955 return getTypeLegalizationCost(RetTy).first;
1956
1957 // TODO: The costs below reflect the expansion code in
1958 // SelectionDAGBuilder, but we may want to sacrifice some accuracy in
1959 // favour of compile time.
1960
1961 // Find the smallest "sensible" element type to use for the expansion.
1962 bool ZeroIsPoison = !cast<ConstantInt>(Args[1])->isZero();
1963 ConstantRange VScaleRange(APInt(64, 1), APInt::getZero(64));
1964 if (isa<ScalableVectorType>(ICA.getArgTypes()[0]) && I && I->getCaller())
1965 VScaleRange = getVScaleRange(I->getCaller(), 64);
1966
1967 unsigned EltWidth = getTLI()->getBitWidthForCttzElements(
1968 RetTy, ArgType.getVectorElementCount(), ZeroIsPoison, &VScaleRange);
1969 Type *NewEltTy = IntegerType::getIntNTy(RetTy->getContext(), EltWidth);
1970
1971 // Create the new vector type & get the vector length
1972 Type *NewVecTy = VectorType::get(
1973 NewEltTy, cast<VectorType>(Args[0]->getType())->getElementCount());
1974
1975 IntrinsicCostAttributes StepVecAttrs(Intrinsic::stepvector, NewVecTy, {},
1976 FMF);
1978 thisT()->getIntrinsicInstrCost(StepVecAttrs, CostKind);
1979
1980 Cost +=
1981 thisT()->getArithmeticInstrCost(Instruction::Sub, NewVecTy, CostKind);
1982 Cost += thisT()->getCastInstrCost(Instruction::SExt, NewVecTy,
1983 Args[0]->getType(),
1985 Cost +=
1986 thisT()->getArithmeticInstrCost(Instruction::And, NewVecTy, CostKind);
1987
1988 IntrinsicCostAttributes ReducAttrs(Intrinsic::vector_reduce_umax,
1989 NewEltTy, NewVecTy, FMF, I, 1);
1990 Cost += thisT()->getTypeBasedIntrinsicInstrCost(ReducAttrs, CostKind);
1991 Cost +=
1992 thisT()->getArithmeticInstrCost(Instruction::Sub, NewEltTy, CostKind);
1993
1994 return Cost;
1995 }
1996 case Intrinsic::experimental_vector_match:
1997 return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
1998 }
1999
2000 // Assume that we need to scalarize this intrinsic.)
2001 // Compute the scalarization overhead based on Args for a vector
2002 // intrinsic.
2003 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
2004 if (RetVF.isVector() && !RetVF.isScalable()) {
2005 ScalarizationCost = 0;
2006 if (!RetTy->isVoidTy())
2007 ScalarizationCost += getScalarizationOverhead(
2008 cast<VectorType>(RetTy),
2009 /*Insert*/ true, /*Extract*/ false, CostKind);
2010 ScalarizationCost +=
2012 }
2013
2014 IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,
2015 ScalarizationCost);
2016 return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
2017 }
2018
2019 /// Get intrinsic cost based on argument types.
2020 /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
2021 /// cost of scalarizing the arguments and the return value will be computed
2022 /// based on types.
2026 Intrinsic::ID IID = ICA.getID();
2027 Type *RetTy = ICA.getReturnType();
2028 const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes();
2029 FastMathFlags FMF = ICA.getFlags();
2030 InstructionCost ScalarizationCostPassed = ICA.getScalarizationCost();
2031 bool SkipScalarizationCost = ICA.skipScalarizationCost();
2032
2033 VectorType *VecOpTy = nullptr;
2034 if (!Tys.empty()) {
2035 // The vector reduction operand is operand 0 except for fadd/fmul.
2036 // Their operand 0 is a scalar start value, so the vector op is operand 1.
2037 unsigned VecTyIndex = 0;
2038 if (IID == Intrinsic::vector_reduce_fadd ||
2039 IID == Intrinsic::vector_reduce_fmul)
2040 VecTyIndex = 1;
2041 assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes");
2042 VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
2043 }
2044
2045 // Library call cost - other than size, make it expensive.
2046 unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;
2047 unsigned ISD = 0;
2048 switch (IID) {
2049 default: {
2050 // Scalable vectors cannot be scalarized, so return Invalid.
2051 if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
2052 return isa<ScalableVectorType>(Ty);
2053 }))
2055
2056 // Assume that we need to scalarize this intrinsic.
2057 InstructionCost ScalarizationCost =
2058 SkipScalarizationCost ? ScalarizationCostPassed : 0;
2059 unsigned ScalarCalls = 1;
2060 Type *ScalarRetTy = RetTy;
2061 if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2062 if (!SkipScalarizationCost)
2063 ScalarizationCost = getScalarizationOverhead(
2064 RetVTy, /*Insert*/ true, /*Extract*/ false, CostKind);
2065 ScalarCalls = std::max(ScalarCalls,
2066 cast<FixedVectorType>(RetVTy)->getNumElements());
2067 ScalarRetTy = RetTy->getScalarType();
2068 }
2069 SmallVector<Type *, 4> ScalarTys;
2070 for (Type *Ty : Tys) {
2071 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2072 if (!SkipScalarizationCost)
2073 ScalarizationCost += getScalarizationOverhead(
2074 VTy, /*Insert*/ false, /*Extract*/ true, CostKind);
2075 ScalarCalls = std::max(ScalarCalls,
2076 cast<FixedVectorType>(VTy)->getNumElements());
2077 Ty = Ty->getScalarType();
2078 }
2079 ScalarTys.push_back(Ty);
2080 }
2081 if (ScalarCalls == 1)
2082 return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
2083
2084 IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF);
2085 InstructionCost ScalarCost =
2086 thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind);
2087
2088 return ScalarCalls * ScalarCost + ScalarizationCost;
2089 }
2090 // Look for intrinsics that can be lowered directly or turned into a scalar
2091 // intrinsic call.
2092 case Intrinsic::sqrt:
2093 ISD = ISD::FSQRT;
2094 break;
2095 case Intrinsic::sin:
2096 ISD = ISD::FSIN;
2097 break;
2098 case Intrinsic::cos:
2099 ISD = ISD::FCOS;
2100 break;
2101 case Intrinsic::sincos:
2102 ISD = ISD::FSINCOS;
2103 break;
2104 case Intrinsic::sincospi:
2105 ISD = ISD::FSINCOSPI;
2106 break;
2107 case Intrinsic::modf:
2108 ISD = ISD::FMODF;
2109 break;
2110 case Intrinsic::tan:
2111 ISD = ISD::FTAN;
2112 break;
2113 case Intrinsic::asin:
2114 ISD = ISD::FASIN;
2115 break;
2116 case Intrinsic::acos:
2117 ISD = ISD::FACOS;
2118 break;
2119 case Intrinsic::atan:
2120 ISD = ISD::FATAN;
2121 break;
2122 case Intrinsic::atan2:
2123 ISD = ISD::FATAN2;
2124 break;
2125 case Intrinsic::sinh:
2126 ISD = ISD::FSINH;
2127 break;
2128 case Intrinsic::cosh:
2129 ISD = ISD::FCOSH;
2130 break;
2131 case Intrinsic::tanh:
2132 ISD = ISD::FTANH;
2133 break;
2134 case Intrinsic::exp:
2135 ISD = ISD::FEXP;
2136 break;
2137 case Intrinsic::exp2:
2138 ISD = ISD::FEXP2;
2139 break;
2140 case Intrinsic::exp10:
2141 ISD = ISD::FEXP10;
2142 break;
2143 case Intrinsic::log:
2144 ISD = ISD::FLOG;
2145 break;
2146 case Intrinsic::log10:
2147 ISD = ISD::FLOG10;
2148 break;
2149 case Intrinsic::log2:
2150 ISD = ISD::FLOG2;
2151 break;
2152 case Intrinsic::fabs:
2153 ISD = ISD::FABS;
2154 break;
2155 case Intrinsic::canonicalize:
2156 ISD = ISD::FCANONICALIZE;
2157 break;
2158 case Intrinsic::minnum:
2159 ISD = ISD::FMINNUM;
2160 break;
2161 case Intrinsic::maxnum:
2162 ISD = ISD::FMAXNUM;
2163 break;
2164 case Intrinsic::minimum:
2165 ISD = ISD::FMINIMUM;
2166 break;
2167 case Intrinsic::maximum:
2168 ISD = ISD::FMAXIMUM;
2169 break;
2170 case Intrinsic::minimumnum:
2171 ISD = ISD::FMINIMUMNUM;
2172 break;
2173 case Intrinsic::maximumnum:
2174 ISD = ISD::FMAXIMUMNUM;
2175 break;
2176 case Intrinsic::copysign:
2177 ISD = ISD::FCOPYSIGN;
2178 break;
2179 case Intrinsic::floor:
2180 ISD = ISD::FFLOOR;
2181 break;
2182 case Intrinsic::ceil:
2183 ISD = ISD::FCEIL;
2184 break;
2185 case Intrinsic::trunc:
2186 ISD = ISD::FTRUNC;
2187 break;
2188 case Intrinsic::nearbyint:
2189 ISD = ISD::FNEARBYINT;
2190 break;
2191 case Intrinsic::rint:
2192 ISD = ISD::FRINT;
2193 break;
2194 case Intrinsic::lrint:
2195 ISD = ISD::LRINT;
2196 break;
2197 case Intrinsic::llrint:
2198 ISD = ISD::LLRINT;
2199 break;
2200 case Intrinsic::round:
2201 ISD = ISD::FROUND;
2202 break;
2203 case Intrinsic::roundeven:
2204 ISD = ISD::FROUNDEVEN;
2205 break;
2206 case Intrinsic::pow:
2207 ISD = ISD::FPOW;
2208 break;
2209 case Intrinsic::fma:
2210 ISD = ISD::FMA;
2211 break;
2212 case Intrinsic::fmuladd:
2213 ISD = ISD::FMA;
2214 break;
2215 case Intrinsic::experimental_constrained_fmuladd:
2216 ISD = ISD::STRICT_FMA;
2217 break;
2218 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
2219 case Intrinsic::lifetime_start:
2220 case Intrinsic::lifetime_end:
2221 case Intrinsic::sideeffect:
2222 case Intrinsic::pseudoprobe:
2223 case Intrinsic::arithmetic_fence:
2224 return 0;
2225 case Intrinsic::masked_store: {
2226 Type *Ty = Tys[0];
2227 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
2228 return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
2229 CostKind);
2230 }
2231 case Intrinsic::masked_load: {
2232 Type *Ty = RetTy;
2233 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
2234 return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
2235 CostKind);
2236 }
2237 case Intrinsic::experimental_vp_strided_store: {
2238 auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
2239 Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
2240 return thisT()->getStridedMemoryOpCost(
2241 Instruction::Store, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
2242 Alignment, CostKind, ICA.getInst());
2243 }
2244 case Intrinsic::experimental_vp_strided_load: {
2245 auto *Ty = cast<VectorType>(ICA.getReturnType());
2246 Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
2247 return thisT()->getStridedMemoryOpCost(
2248 Instruction::Load, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
2249 Alignment, CostKind, ICA.getInst());
2250 }
2251 case Intrinsic::vector_reduce_add:
2252 case Intrinsic::vector_reduce_mul:
2253 case Intrinsic::vector_reduce_and:
2254 case Intrinsic::vector_reduce_or:
2255 case Intrinsic::vector_reduce_xor:
2256 return thisT()->getArithmeticReductionCost(
2257 getArithmeticReductionInstruction(IID), VecOpTy, std::nullopt,
2258 CostKind);
2259 case Intrinsic::vector_reduce_fadd:
2260 case Intrinsic::vector_reduce_fmul:
2261 return thisT()->getArithmeticReductionCost(
2262 getArithmeticReductionInstruction(IID), VecOpTy, FMF, CostKind);
2263 case Intrinsic::vector_reduce_smax:
2264 case Intrinsic::vector_reduce_smin:
2265 case Intrinsic::vector_reduce_umax:
2266 case Intrinsic::vector_reduce_umin:
2267 case Intrinsic::vector_reduce_fmax:
2268 case Intrinsic::vector_reduce_fmin:
2269 case Intrinsic::vector_reduce_fmaximum:
2270 case Intrinsic::vector_reduce_fminimum:
2271 return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
2272 VecOpTy, ICA.getFlags(), CostKind);
2273 case Intrinsic::experimental_vector_match: {
2274 auto *SearchTy = cast<VectorType>(ICA.getArgTypes()[0]);
2275 auto *NeedleTy = cast<FixedVectorType>(ICA.getArgTypes()[1]);
2276 unsigned SearchSize = NeedleTy->getNumElements();
2277
2278 // If we're not expanding the intrinsic then we assume this is cheap to
2279 // implement.
2280 EVT SearchVT = getTLI()->getValueType(DL, SearchTy);
2281 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize))
2282 return getTypeLegalizationCost(RetTy).first;
2283
2284 // Approximate the cost based on the expansion code in
2285 // SelectionDAGBuilder.
2287 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, NeedleTy,
2288 CostKind, 1, nullptr, nullptr);
2289 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy,
2290 CostKind, 0, nullptr, nullptr);
2291 Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, std::nullopt,
2292 CostKind, 0, nullptr);
2293 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy, RetTy,
2295 Cost +=
2296 thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
2297 Cost *= SearchSize;
2298 Cost +=
2299 thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind);
2300 return Cost;
2301 }
2302 case Intrinsic::abs:
2303 ISD = ISD::ABS;
2304 break;
2305 case Intrinsic::fshl:
2306 ISD = ISD::FSHL;
2307 break;
2308 case Intrinsic::fshr:
2309 ISD = ISD::FSHR;
2310 break;
2311 case Intrinsic::smax:
2312 ISD = ISD::SMAX;
2313 break;
2314 case Intrinsic::smin:
2315 ISD = ISD::SMIN;
2316 break;
2317 case Intrinsic::umax:
2318 ISD = ISD::UMAX;
2319 break;
2320 case Intrinsic::umin:
2321 ISD = ISD::UMIN;
2322 break;
2323 case Intrinsic::sadd_sat:
2324 ISD = ISD::SADDSAT;
2325 break;
2326 case Intrinsic::ssub_sat:
2327 ISD = ISD::SSUBSAT;
2328 break;
2329 case Intrinsic::uadd_sat:
2330 ISD = ISD::UADDSAT;
2331 break;
2332 case Intrinsic::usub_sat:
2333 ISD = ISD::USUBSAT;
2334 break;
2335 case Intrinsic::smul_fix:
2336 ISD = ISD::SMULFIX;
2337 break;
2338 case Intrinsic::umul_fix:
2339 ISD = ISD::UMULFIX;
2340 break;
2341 case Intrinsic::sadd_with_overflow:
2342 ISD = ISD::SADDO;
2343 break;
2344 case Intrinsic::ssub_with_overflow:
2345 ISD = ISD::SSUBO;
2346 break;
2347 case Intrinsic::uadd_with_overflow:
2348 ISD = ISD::UADDO;
2349 break;
2350 case Intrinsic::usub_with_overflow:
2351 ISD = ISD::USUBO;
2352 break;
2353 case Intrinsic::smul_with_overflow:
2354 ISD = ISD::SMULO;
2355 break;
2356 case Intrinsic::umul_with_overflow:
2357 ISD = ISD::UMULO;
2358 break;
2359 case Intrinsic::fptosi_sat:
2360 ISD = ISD::FP_TO_SINT_SAT;
2361 break;
2362 case Intrinsic::fptoui_sat:
2363 ISD = ISD::FP_TO_UINT_SAT;
2364 break;
2365 case Intrinsic::ctpop:
2366 ISD = ISD::CTPOP;
2367 // In case of legalization use TCC_Expensive. This is cheaper than a
2368 // library call but still not a cheap instruction.
2369 SingleCallCost = TargetTransformInfo::TCC_Expensive;
2370 break;
2371 case Intrinsic::ctlz:
2372 ISD = ISD::CTLZ;
2373 break;
2374 case Intrinsic::cttz:
2375 ISD = ISD::CTTZ;
2376 break;
2377 case Intrinsic::bswap:
2378 ISD = ISD::BSWAP;
2379 break;
2380 case Intrinsic::bitreverse:
2381 ISD = ISD::BITREVERSE;
2382 break;
2383 case Intrinsic::ucmp:
2384 ISD = ISD::UCMP;
2385 break;
2386 case Intrinsic::scmp:
2387 ISD = ISD::SCMP;
2388 break;
2389 }
2390
2391 auto *ST = dyn_cast<StructType>(RetTy);
2392 Type *LegalizeTy = ST ? ST->getContainedType(0) : RetTy;
2393 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(LegalizeTy);
2394
2395 const TargetLoweringBase *TLI = getTLI();
2396
2397 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
2398 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
2399 TLI->isFAbsFree(LT.second)) {
2400 return 0;
2401 }
2402
2403 // The operation is legal. Assume it costs 1.
2404 // If the type is split to multiple registers, assume that there is some
2405 // overhead to this.
2406 // TODO: Once we have extract/insert subvector cost we need to use them.
2407 if (LT.first > 1)
2408 return (LT.first * 2);
2409 else
2410 return (LT.first * 1);
2411 } else if (!TLI->isOperationExpand(ISD, LT.second)) {
2412 // If the operation is custom lowered then assume
2413 // that the code is twice as expensive.
2414 return (LT.first * 2);
2415 }
2416
2417 switch (IID) {
2418 case Intrinsic::fmuladd: {
2419 // If we can't lower fmuladd into an FMA estimate the cost as a floating
2420 // point mul followed by an add.
2421
2422 return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
2423 CostKind) +
2424 thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
2425 CostKind);
2426 }
2427 case Intrinsic::experimental_constrained_fmuladd: {
2428 IntrinsicCostAttributes FMulAttrs(
2429 Intrinsic::experimental_constrained_fmul, RetTy, Tys);
2430 IntrinsicCostAttributes FAddAttrs(
2431 Intrinsic::experimental_constrained_fadd, RetTy, Tys);
2432 return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +
2433 thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
2434 }
2435 case Intrinsic::smin:
2436 case Intrinsic::smax:
2437 case Intrinsic::umin:
2438 case Intrinsic::umax: {
2439 // minmax(X,Y) = select(icmp(X,Y),X,Y)
2440 Type *CondTy = RetTy->getWithNewBitWidth(1);
2441 bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin;
2442 CmpInst::Predicate Pred =
2443 IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT;
2445 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2446 Pred, CostKind);
2447 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2448 Pred, CostKind);
2449 return Cost;
2450 }
2451 case Intrinsic::sadd_with_overflow:
2452 case Intrinsic::ssub_with_overflow: {
2453 Type *SumTy = RetTy->getContainedType(0);
2454 Type *OverflowTy = RetTy->getContainedType(1);
2455 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
2456 ? BinaryOperator::Add
2457 : BinaryOperator::Sub;
2458
2459 // Add:
2460 // Overflow -> (Result < LHS) ^ (RHS < 0)
2461 // Sub:
2462 // Overflow -> (Result < LHS) ^ (RHS > 0)
2464 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2465 Cost +=
2466 2 * thisT()->getCmpSelInstrCost(Instruction::ICmp, SumTy, OverflowTy,
2468 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
2469 CostKind);
2470 return Cost;
2471 }
2472 case Intrinsic::uadd_with_overflow:
2473 case Intrinsic::usub_with_overflow: {
2474 Type *SumTy = RetTy->getContainedType(0);
2475 Type *OverflowTy = RetTy->getContainedType(1);
2476 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
2477 ? BinaryOperator::Add
2478 : BinaryOperator::Sub;
2479 CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
2482
2484 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2485 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
2486 OverflowTy, Pred, CostKind);
2487 return Cost;
2488 }
2489 case Intrinsic::smul_with_overflow:
2490 case Intrinsic::umul_with_overflow: {
2491 Type *MulTy = RetTy->getContainedType(0);
2492 Type *OverflowTy = RetTy->getContainedType(1);
2493 unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
2494 Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
2495 bool IsSigned = IID == Intrinsic::smul_with_overflow;
2496
2497 unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
2499
2501 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
2502 Cost +=
2503 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
2504 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
2505 CCH, CostKind);
2506 Cost += thisT()->getArithmeticInstrCost(
2507 Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
2509
2510 if (IsSigned)
2511 Cost += thisT()->getArithmeticInstrCost(
2512 Instruction::AShr, MulTy, CostKind,
2515
2516 Cost += thisT()->getCmpSelInstrCost(
2517 BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
2518 return Cost;
2519 }
2520 case Intrinsic::sadd_sat:
2521 case Intrinsic::ssub_sat: {
2522 // Assume a default expansion.
2523 Type *CondTy = RetTy->getWithNewBitWidth(1);
2524
2525 Type *OpTy = StructType::create({RetTy, CondTy});
2526 Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
2527 ? Intrinsic::sadd_with_overflow
2528 : Intrinsic::ssub_with_overflow;
2530
2531 // SatMax -> Overflow && SumDiff < 0
2532 // SatMin -> Overflow && SumDiff >= 0
2534 IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
2535 nullptr, ScalarizationCostPassed);
2536 Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
2537 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2538 Pred, CostKind);
2539 Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
2540 CondTy, Pred, CostKind);
2541 return Cost;
2542 }
2543 case Intrinsic::uadd_sat:
2544 case Intrinsic::usub_sat: {
2545 Type *CondTy = RetTy->getWithNewBitWidth(1);
2546
2547 Type *OpTy = StructType::create({RetTy, CondTy});
2548 Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
2549 ? Intrinsic::uadd_with_overflow
2550 : Intrinsic::usub_with_overflow;
2551
2553 IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
2554 nullptr, ScalarizationCostPassed);
2555 Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
2556 Cost +=
2557 thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2559 return Cost;
2560 }
2561 case Intrinsic::smul_fix:
2562 case Intrinsic::umul_fix: {
2563 unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
2564 Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
2565
2566 unsigned ExtOp =
2567 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
2569
2571 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind);
2572 Cost +=
2573 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
2574 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
2575 CCH, CostKind);
2576 Cost += thisT()->getArithmeticInstrCost(
2577 Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
2579 Cost += thisT()->getArithmeticInstrCost(
2580 Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
2582 Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
2583 return Cost;
2584 }
2585 case Intrinsic::abs: {
2586 // abs(X) = select(icmp(X,0),X,sub(0,X))
2587 Type *CondTy = RetTy->getWithNewBitWidth(1);
2590 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2591 Pred, CostKind);
2592 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2593 Pred, CostKind);
2594 // TODO: Should we add an OperandValueProperties::OP_Zero property?
2595 Cost += thisT()->getArithmeticInstrCost(
2596 BinaryOperator::Sub, RetTy, CostKind,
2598 return Cost;
2599 }
2600 case Intrinsic::fshl:
2601 case Intrinsic::fshr: {
2602 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
2603 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
2604 Type *CondTy = RetTy->getWithNewBitWidth(1);
2606 Cost +=
2607 thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
2608 Cost +=
2609 thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
2610 Cost +=
2611 thisT()->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, CostKind);
2612 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
2613 CostKind);
2614 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
2615 CostKind);
2616 // Shift-by-zero handling.
2617 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2619 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2621 return Cost;
2622 }
2623 case Intrinsic::fptosi_sat:
2624 case Intrinsic::fptoui_sat: {
2625 if (Tys.empty())
2626 break;
2627 Type *FromTy = Tys[0];
2628 bool IsSigned = IID == Intrinsic::fptosi_sat;
2629
2631 IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy,
2632 {FromTy, FromTy});
2633 Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind);
2634 IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy,
2635 {FromTy, FromTy});
2636 Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind);
2637 Cost += thisT()->getCastInstrCost(
2638 IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy,
2640 if (IsSigned) {
2641 Type *CondTy = RetTy->getWithNewBitWidth(1);
2642 Cost += thisT()->getCmpSelInstrCost(
2643 BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind);
2644 Cost += thisT()->getCmpSelInstrCost(
2645 BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, CostKind);
2646 }
2647 return Cost;
2648 }
2649 case Intrinsic::ucmp:
2650 case Intrinsic::scmp: {
2651 Type *CmpTy = Tys[0];
2652 Type *CondTy = RetTy->getWithNewBitWidth(1);
2654 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, CondTy,
2656 CostKind) +
2657 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, CondTy,
2659 CostKind);
2660
2661 EVT VT = TLI->getValueType(DL, CmpTy, true);
2662 if (TLI->shouldExpandCmpUsingSelects(VT)) {
2663 // x < y ? -1 : (x > y ? 1 : 0)
2664 Cost += 2 * thisT()->getCmpSelInstrCost(
2665 BinaryOperator::Select, RetTy, CondTy,
2667 } else {
2668 // zext(x > y) - zext(x < y)
2669 Cost +=
2670 2 * thisT()->getCastInstrCost(CastInst::ZExt, RetTy, CondTy,
2672 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy,
2673 CostKind);
2674 }
2675 return Cost;
2676 }
2677 default:
2678 break;
2679 }
2680
2681 // Else, assume that we need to scalarize this intrinsic. For math builtins
2682 // this will emit a costly libcall, adding call overhead and spills. Make it
2683 // very expensive.
2684 if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2685 // Scalable vectors cannot be scalarized, so return Invalid.
2686 if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
2687 return isa<ScalableVectorType>(Ty);
2688 }))
2690
2691 InstructionCost ScalarizationCost =
2692 SkipScalarizationCost
2693 ? ScalarizationCostPassed
2694 : getScalarizationOverhead(RetVTy, /*Insert*/ true,
2695 /*Extract*/ false, CostKind);
2696
2697 unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
2698 SmallVector<Type *, 4> ScalarTys;
2699 for (Type *Ty : Tys) {
2700 if (Ty->isVectorTy())
2701 Ty = Ty->getScalarType();
2702 ScalarTys.push_back(Ty);
2703 }
2704 IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
2705 InstructionCost ScalarCost =
2706 thisT()->getIntrinsicInstrCost(Attrs, CostKind);
2707 for (Type *Ty : Tys) {
2708 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2709 if (!ICA.skipScalarizationCost())
2710 ScalarizationCost += getScalarizationOverhead(
2711 VTy, /*Insert*/ false, /*Extract*/ true, CostKind);
2712 ScalarCalls = std::max(ScalarCalls,
2713 cast<FixedVectorType>(VTy)->getNumElements());
2714 }
2715 }
2716 return ScalarCalls * ScalarCost + ScalarizationCost;
2717 }
2718
2719 // This is going to be turned into a library call, make it expensive.
2720 return SingleCallCost;
2721 }
2722
2723 /// Compute a cost of the given call instruction.
2724 ///
2725 /// Compute the cost of calling function F with return type RetTy and
2726 /// argument types Tys. F might be nullptr, in this case the cost of an
2727 /// arbitrary call with the specified signature will be returned.
2728 /// This is used, for instance, when we estimate call of a vector
2729 /// counterpart of the given function.
2730 /// \param F Called function, might be nullptr.
2731 /// \param RetTy Return value types.
2732 /// \param Tys Argument types.
2733 /// \returns The cost of Call instruction.
2735 ArrayRef<Type *> Tys,
2737 return 10;
2738 }
2739
2740 unsigned getNumberOfParts(Type *Tp) {
2741 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
2742 if (!LT.first.isValid())
2743 return 0;
2744 // Try to find actual number of parts for non-power-of-2 elements as
2745 // ceil(num-of-elements/num-of-subtype-elements).
2746 if (auto *FTp = dyn_cast<FixedVectorType>(Tp);
2747 Tp && LT.second.isFixedLengthVector() &&
2748 !has_single_bit(FTp->getNumElements())) {
2749 if (auto *SubTp = dyn_cast_if_present<FixedVectorType>(
2750 EVT(LT.second).getTypeForEVT(Tp->getContext()));
2751 SubTp && SubTp->getElementType() == FTp->getElementType())
2752 return divideCeil(FTp->getNumElements(), SubTp->getNumElements());
2753 }
2754 return *LT.first.getValue();
2755 }
2756
2758 const SCEV *) {
2759 return 0;
2760 }
2761
2762 /// Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
2763 /// We're assuming that reduction operation are performing the following way:
2764 ///
2765 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
2766 /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
2767 /// \----------------v-------------/ \----------v------------/
2768 /// n/2 elements n/2 elements
2769 /// %red1 = op <n x t> %val, <n x t> val1
2770 /// After this operation we have a vector %red1 where only the first n/2
2771 /// elements are meaningful, the second n/2 elements are undefined and can be
2772 /// dropped. All other operations are actually working with the vector of
2773 /// length n/2, not n, though the real vector length is still n.
2774 /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
2775 /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
2776 /// \----------------v-------------/ \----------v------------/
2777 /// n/4 elements 3*n/4 elements
2778 /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
2779 /// length n/2, the resulting vector has length n/4 etc.
2780 ///
2781 /// The cost model should take into account that the actual length of the
2782 /// vector is reduced on each iteration.
2785 // Targets must implement a default value for the scalable case, since
2786 // we don't know how many lanes the vector has.
2787 if (isa<ScalableVectorType>(Ty))
2789
2790 Type *ScalarTy = Ty->getElementType();
2791 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2792 if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&
2793 ScalarTy == IntegerType::getInt1Ty(Ty->getContext()) &&
2794 NumVecElts >= 2) {
2795 // Or reduction for i1 is represented as:
2796 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2797 // %res = cmp ne iReduxWidth %val, 0
2798 // And reduction for i1 is represented as:
2799 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2800 // %res = cmp eq iReduxWidth %val, 11111
2801 Type *ValTy = IntegerType::get(Ty->getContext(), NumVecElts);
2802 return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty,
2804 thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy,
2807 }
2808 unsigned NumReduxLevels = Log2_32(NumVecElts);
2809 InstructionCost ArithCost = 0;
2810 InstructionCost ShuffleCost = 0;
2811 std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
2812 unsigned LongVectorCount = 0;
2813 unsigned MVTLen =
2814 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2815 while (NumVecElts > MVTLen) {
2816 NumVecElts /= 2;
2817 VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2818 ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, {},
2819 CostKind, NumVecElts, SubTy);
2820 ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
2821 Ty = SubTy;
2822 ++LongVectorCount;
2823 }
2824
2825 NumReduxLevels -= LongVectorCount;
2826
2827 // The minimal length of the vector is limited by the real length of vector
2828 // operations performed on the current platform. That's why several final
2829 // reduction operations are performed on the vectors with the same
2830 // architecture-dependent length.
2831
2832 // By default reductions need one shuffle per reduction level.
2833 ShuffleCost +=
2834 NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
2835 {}, CostKind, 0, Ty);
2836 ArithCost +=
2837 NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
2838 return ShuffleCost + ArithCost +
2839 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
2840 CostKind, 0, nullptr, nullptr);
2841 }
2842
2843 /// Try to calculate the cost of performing strict (in-order) reductions,
2844 /// which involves doing a sequence of floating point additions in lane
2845 /// order, starting with an initial value. For example, consider a scalar
2846 /// initial value 'InitVal' of type float and a vector of type <4 x float>:
2847 ///
2848 /// Vector = <float %v0, float %v1, float %v2, float %v3>
2849 ///
2850 /// %add1 = %InitVal + %v0
2851 /// %add2 = %add1 + %v1
2852 /// %add3 = %add2 + %v2
2853 /// %add4 = %add3 + %v3
2854 ///
2855 /// As a simple estimate we can say the cost of such a reduction is 4 times
2856 /// the cost of a scalar FP addition. We can only estimate the costs for
2857 /// fixed-width vectors here because for scalable vectors we do not know the
2858 /// runtime number of operations.
2861 // Targets must implement a default value for the scalable case, since
2862 // we don't know how many lanes the vector has.
2863 if (isa<ScalableVectorType>(Ty))
2865
2866 auto *VTy = cast<FixedVectorType>(Ty);
2868 VTy, /*Insert=*/false, /*Extract=*/true, CostKind);
2869 InstructionCost ArithCost = thisT()->getArithmeticInstrCost(
2870 Opcode, VTy->getElementType(), CostKind);
2871 ArithCost *= VTy->getNumElements();
2872
2873 return ExtractCost + ArithCost;
2874 }
2875
2877 std::optional<FastMathFlags> FMF,
2879 assert(Ty && "Unknown reduction vector type");
2881 return getOrderedReductionCost(Opcode, Ty, CostKind);
2882 return getTreeReductionCost(Opcode, Ty, CostKind);
2883 }
2884
2885 /// Try to calculate op costs for min/max reduction operations.
2886 /// \param CondTy Conditional type for the Select instruction.
2888 FastMathFlags FMF,
2890 // Targets must implement a default value for the scalable case, since
2891 // we don't know how many lanes the vector has.
2892 if (isa<ScalableVectorType>(Ty))
2894
2895 Type *ScalarTy = Ty->getElementType();
2896 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2897 unsigned NumReduxLevels = Log2_32(NumVecElts);
2898 InstructionCost MinMaxCost = 0;
2899 InstructionCost ShuffleCost = 0;
2900 std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
2901 unsigned LongVectorCount = 0;
2902 unsigned MVTLen =
2903 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2904 while (NumVecElts > MVTLen) {
2905 NumVecElts /= 2;
2906 auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2907
2908 ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, {},
2909 CostKind, NumVecElts, SubTy);
2910
2911 IntrinsicCostAttributes Attrs(IID, SubTy, {SubTy, SubTy}, FMF);
2912 MinMaxCost += getIntrinsicInstrCost(Attrs, CostKind);
2913 Ty = SubTy;
2914 ++LongVectorCount;
2915 }
2916
2917 NumReduxLevels -= LongVectorCount;
2918
2919 // The minimal length of the vector is limited by the real length of vector
2920 // operations performed on the current platform. That's why several final
2921 // reduction opertions are perfomed on the vectors with the same
2922 // architecture-dependent length.
2923 ShuffleCost +=
2924 NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
2925 {}, CostKind, 0, Ty);
2926 IntrinsicCostAttributes Attrs(IID, Ty, {Ty, Ty}, FMF);
2927 MinMaxCost += NumReduxLevels * getIntrinsicInstrCost(Attrs, CostKind);
2928 // The last min/max should be in vector registers and we counted it above.
2929 // So just need a single extractelement.
2930 return ShuffleCost + MinMaxCost +
2931 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
2932 CostKind, 0, nullptr, nullptr);
2933 }
2934
2935 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
2936 Type *ResTy, VectorType *Ty,
2937 FastMathFlags FMF,
2939 if (auto *FTy = dyn_cast<FixedVectorType>(Ty);
2940 FTy && IsUnsigned && Opcode == Instruction::Add &&
2941 FTy->getElementType() == IntegerType::getInt1Ty(Ty->getContext())) {
2942 // Represent vector_reduce_add(ZExt(<n x i1>)) as
2943 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
2944 auto *IntTy =
2945 IntegerType::get(ResTy->getContext(), FTy->getNumElements());
2946 IntrinsicCostAttributes ICA(Intrinsic::ctpop, IntTy, {IntTy}, FMF);
2947 return thisT()->getCastInstrCost(Instruction::BitCast, IntTy, FTy,
2949 thisT()->getIntrinsicInstrCost(ICA, CostKind);
2950 }
2951 // Without any native support, this is equivalent to the cost of
2952 // vecreduce.opcode(ext(Ty A)).
2953 VectorType *ExtTy = VectorType::get(ResTy, Ty);
2954 InstructionCost RedCost =
2955 thisT()->getArithmeticReductionCost(Opcode, ExtTy, FMF, CostKind);
2956 InstructionCost ExtCost = thisT()->getCastInstrCost(
2957 IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2959
2960 return RedCost + ExtCost;
2961 }
2962
2964 VectorType *Ty,
2966 // Without any native support, this is equivalent to the cost of
2967 // vecreduce.add(mul(ext(Ty A), ext(Ty B))) or
2968 // vecreduce.add(mul(A, B)).
2969 VectorType *ExtTy = VectorType::get(ResTy, Ty);
2970 InstructionCost RedCost = thisT()->getArithmeticReductionCost(
2971 Instruction::Add, ExtTy, std::nullopt, CostKind);
2972 InstructionCost ExtCost = thisT()->getCastInstrCost(
2973 IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2975
2976 InstructionCost MulCost =
2977 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
2978
2979 return RedCost + MulCost + 2 * ExtCost;
2980 }
2981
2983
2984 /// @}
2985};
2986
2987/// Concrete BasicTTIImpl that can be used if no further customization
2988/// is needed.
2989class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
2991
2992 friend class BasicTTIImplBase<BasicTTIImpl>;
2993
2994 const TargetSubtargetInfo *ST;
2995 const TargetLoweringBase *TLI;
2996
2997 const TargetSubtargetInfo *getST() const { return ST; }
2998 const TargetLoweringBase *getTLI() const { return TLI; }
2999
3000public:
3001 explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
3002};
3003
3004} // end namespace llvm
3005
3006#endif // LLVM_CODEGEN_BASICTTIIMPL_H
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
uint32_t Index
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
if(PassOpts->AAPipeline)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
This file provides helpers for the implementation of a TargetTransformInfo-conforming class.
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1201
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1130
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:200
an instruction to allocate memory on the stack
Definition: Instructions.h:63
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:207
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
Definition: ArrayRef.h:213
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:80
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar, ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx)
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:468
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: BasicTTIImpl.h:326
virtual unsigned getPrefetchDistance() const
Definition: BasicTTIImpl.h:766
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:622
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
Definition: BasicTTIImpl.h:595
unsigned getMaxInterleaveFactor(ElementCount VF)
Definition: BasicTTIImpl.h:956
unsigned getNumberOfParts(Type *Tp)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: BasicTTIImpl.h:795
std::optional< unsigned > getVScaleForTuning() const
Definition: BasicTTIImpl.h:800
InstructionCost getOrderedReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate the cost of performing strict (in-order) reductions, which involves doing a sequence...
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:458
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: BasicTTIImpl.h:702
InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
Definition: BasicTTIImpl.h:713
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
Definition: BasicTTIImpl.h:786
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:375
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:462
virtual unsigned getMaxPrefetchIterationsAhead() const
Definition: BasicTTIImpl.h:778
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)
std::optional< unsigned > getMaxVScale() const
Definition: BasicTTIImpl.h:799
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
unsigned getRegUsageForType(Type *Ty)
Definition: BasicTTIImpl.h:473
bool shouldBuildRelLookupTables() const
Definition: BasicTTIImpl.h:549
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
Definition: BasicTTIImpl.h:616
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Definition: BasicTTIImpl.h:484
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:416
bool shouldDropLSRSolutionIfLessProfitable() const
Definition: BasicTTIImpl.h:436
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:428
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
Definition: BasicTTIImpl.h:728
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Definition: BasicTTIImpl.h:770
bool hasBranchDivergence(const Function *F=nullptr)
Definition: BasicTTIImpl.h:320
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:422
unsigned getAssumedAddrSpace(const Value *V) const
Definition: BasicTTIImpl.h:348
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:875
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
unsigned getEpilogueVectorizationMinVF()
Definition: BasicTTIImpl.h:709
int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset)
Definition: BasicTTIImpl.h:392
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:478
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:581
bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const
Definition: BasicTTIImpl.h:839
virtual std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:746
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:444
bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const
Definition: BasicTTIImpl.h:853
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:324
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true)
Definition: BasicTTIImpl.h:718
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
Definition: BasicTTIImpl.h:298
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
Definition: BasicTTIImpl.h:396
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: BasicTTIImpl.h:306
InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
Definition: BasicTTIImpl.h:859
virtual std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:752
virtual bool enableWritePrefetching() const
Definition: BasicTTIImpl.h:782
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Definition: BasicTTIImpl.h:362
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:694
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Definition: BasicTTIImpl.h:339
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:585
InstructionCost getVectorSplitCost()
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: BasicTTIImpl.h:922
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:574
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
Definition: BasicTTIImpl.h:358
unsigned getInliningThresholdMultiplier() const
Definition: BasicTTIImpl.h:614
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
virtual ~BasicTTIImplBase()=default
bool isLegalAddScalableImmediate(int64_t Imm)
Definition: BasicTTIImpl.h:371
InstructionCost getScalarizationOverhead(VectorType *RetTy, ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing the inputs and outputs of an instruction, with return type RetTy...
Definition: BasicTTIImpl.h:904
bool isVScaleKnownToBeAPowerOfTwo() const
Definition: BasicTTIImpl.h:801
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
Definition: BasicTTIImpl.h:722
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
Definition: BasicTTIImpl.h:330
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:367
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:334
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:806
virtual unsigned getCacheLineSize() const
Definition: BasicTTIImpl.h:762
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: BasicTTIImpl.h:344
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:322
bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const
Definition: BasicTTIImpl.h:843
int getInlinerVectorBonusPercent() const
Definition: BasicTTIImpl.h:620
InstructionCost getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on argument types.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:958
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Definition: BasicTTIImpl.h:735
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Definition: BasicTTIImpl.h:379
bool isSingleThreaded() const
Definition: BasicTTIImpl.h:352
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:289
bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const
Definition: BasicTTIImpl.h:848
unsigned adjustInliningThreshold(const CallBase *CB)
Definition: BasicTTIImpl.h:615
bool isProfitableLSRChainElement(Instruction *I)
Definition: BasicTTIImpl.h:440
Concrete BasicTTIImpl that can be used if no further customization is needed.
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:162
BitVector & set()
Definition: BitVector.h:351
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:980
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:683
CmpInst::Predicate getLTPredicate() const
CmpInst::Predicate getGTPredicate() const
This class represents a range of values.
Definition: ConstantRange.h:47
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:434
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:843
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:369
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:572
unsigned getNumElements() const
Definition: DerivedTypes.h:615
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:359
The core instruction combiner logic.
Definition: InstCombiner.h:48
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:310
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
const SmallVectorImpl< Type * > & getArgTypes() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Return the minimum stride necessary to trigger software prefetching.
virtual bool enableWritePrefetching() const
virtual unsigned getMaxPrefetchIterationsAhead() const
Return the maximum prefetch distance in terms of loop iterations.
virtual unsigned getPrefetchDistance() const
Return the preferred prefetch distance in terms of instructions.
virtual std::optional< unsigned > getCacheAssociativity(unsigned Level) const
Return the cache associatvity for the given level of cache.
virtual std::optional< unsigned > getCacheLineSize(unsigned Level) const
Return the target cache line size in bytes at a given level.
Machine Value Type.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for applied optimization remarks.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Analysis providing profile information.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isZeroEltSplatMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses all elements with the same value as the first element of exa...
static bool isSpliceMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is a splice mask, concatenating the two inputs together and then ext...
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isTransposeMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask is a transpose mask.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:612
Multiway switch.
Provides information about what library functions are available for the current target.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isProfitableToHoist(Instruction *I) const
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool isLegalAddScalableImmediate(int64_t) const
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:81
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
If the specified predicate checks whether a generic pointer falls within a specified address space,...
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
virtual unsigned getAssumedAddrSpace(const Value *V) const
If the specified generic pointer could be assumed as a pointer to a specific address space,...
TargetOptions Options
ThreadModel::Model ThreadModel
ThreadModel - This flag specifies the type of threading model to assume for things like atomics.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
const DataLayout & getDataLayout() const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
bool isProfitableLSRChainElement(Instruction *I) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
bool isLoweredToCall(const Function *F) const
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, const Instruction *I) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Basic
The cost of a typical 'add' instruction.
MemIndexedMode
The type of load/store indexing.
@ MIM_PostInc
Post-incrementing.
@ MIM_PostDec
Post-decrementing.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
CacheLevel
The possible cache levels.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:395
bool isArch64Bit() const
Test whether the architecture is 64-bit.
Definition: Triple.cpp:1734
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:588
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:243
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:267
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:225
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
Value * getOperand(unsigned i) const
Definition: User.h:228
static bool isVPBinOp(Intrinsic::ID ID)
static bool isVPCast(Intrinsic::ID ID)
static bool isVPCmp(Intrinsic::ID ID)
static std::optional< unsigned > getFunctionalOpcodeForVP(Intrinsic::ID ID)
static std::optional< Intrinsic::ID > getFunctionalIntrinsicIDForVP(Intrinsic::ID ID)
static bool isVPIntrinsic(Intrinsic::ID)
static bool isVPReduction(Intrinsic::ID ID)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
Definition: DerivedTypes.h:531
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:674
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
Definition: DerivedTypes.h:460
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:218
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:2982
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ FMODF
FMODF - Decomposes the operand into integral and fractional parts, each having the same type and sign...
Definition: ISDOpcodes.h:1067
@ FATAN2
FATAN2 - atan2, inspired by libm.
Definition: ISDOpcodes.h:999
@ FSINCOSPI
FSINCOSPI - Compute both the sine and cosine times pi more accurately than FSINCOS(pi*x),...
Definition: ISDOpcodes.h:1063
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1131
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1135
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:515
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:907
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ SCMP
[US]CMP - 3-way comparison of signed or unsigned integers.
Definition: ISDOpcodes.h:705
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:906
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1055
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1567
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
bool isTargetIntrinsic(ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
Definition: Intrinsics.cpp:642
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:989
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
Definition: LoopUtils.cpp:960
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:341
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:404
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
InstructionCost Cost
cl::opt< unsigned > PartialUnrollingThreshold
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:289
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
Attributes of a target dependent hardware loop.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).