LLVM 21.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass exposes codegen information to IR-level passes. Every
10/// transformation that uses codegen information is broken into three parts:
11/// 1. The IR-level analysis pass.
12/// 2. The IR-level transformation interface which provides the needed
13/// information.
14/// 3. Codegen-level implementation which uses target-specific hooks.
15///
16/// This file defines #2, which is the interface that IR-level transformations
17/// use for querying the codegen.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/IR/FMF.h"
27#include "llvm/IR/InstrTypes.h"
28#include "llvm/IR/PassManager.h"
29#include "llvm/Pass.h"
33#include <functional>
34#include <optional>
35#include <utility>
36
37namespace llvm {
38
39namespace Intrinsic {
40typedef unsigned ID;
41}
42
43class AllocaInst;
44class AssumptionCache;
45class BlockFrequencyInfo;
46class DominatorTree;
47class BranchInst;
48class Function;
49class GlobalValue;
50class InstCombiner;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
53class IntrinsicInst;
54class LoadInst;
55class Loop;
56class LoopInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
60class SCEV;
61class ScalarEvolution;
62class SmallBitVector;
63class StoreInst;
64class SwitchInst;
65class TargetLibraryInfo;
66class Type;
67class VPIntrinsic;
68struct KnownBits;
69
70/// Information about a load/store intrinsic defined by the target.
72 /// This is the pointer that the intrinsic is loading from or storing to.
73 /// If this is non-null, then analysis/optimization passes can assume that
74 /// this intrinsic is functionally equivalent to a load/store from this
75 /// pointer.
76 Value *PtrVal = nullptr;
77
78 // Ordering for atomic operations.
80
81 // Same Id is set by the target for corresponding load/store intrinsics.
82 unsigned short MatchingId = 0;
83
84 bool ReadMem = false;
85 bool WriteMem = false;
86 bool IsVolatile = false;
87
88 bool isUnordered() const {
92 }
93};
94
95/// Attributes of a target dependent hardware loop.
97 HardwareLoopInfo() = delete;
99 Loop *L = nullptr;
102 const SCEV *ExitCount = nullptr;
104 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
105 // value in every iteration.
106 bool IsNestingLegal = false; // Can a hardware loop be a parent to
107 // another hardware loop?
108 bool CounterInReg = false; // Should loop counter be updated in
109 // the loop via a phi?
110 bool PerformEntryTest = false; // Generate the intrinsic which also performs
111 // icmp ne zero on the loop counter value and
112 // produces an i1 to guard the loop entry.
114 DominatorTree &DT, bool ForceNestedLoop = false,
115 bool ForceHardwareLoopPHI = false);
116 bool canAnalyze(LoopInfo &LI);
117};
118
120 const IntrinsicInst *II = nullptr;
121 Type *RetTy = nullptr;
122 Intrinsic::ID IID;
123 SmallVector<Type *, 4> ParamTys;
125 FastMathFlags FMF;
126 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
127 // arguments and the return value will be computed based on types.
128 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
129
130public:
132 Intrinsic::ID Id, const CallBase &CI,
134 bool TypeBasedOnly = false);
135
137 Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
138 FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
140
143
147 const IntrinsicInst *I = nullptr,
149
150 Intrinsic::ID getID() const { return IID; }
151 const IntrinsicInst *getInst() const { return II; }
152 Type *getReturnType() const { return RetTy; }
153 FastMathFlags getFlags() const { return FMF; }
154 InstructionCost getScalarizationCost() const { return ScalarizationCost; }
156 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
157
158 bool isTypeBasedOnly() const {
159 return Arguments.empty();
160 }
161
162 bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
163};
164
166 /// Don't use tail folding
167 None,
168 /// Use predicate only to mask operations on data in the loop.
169 /// When the VL is not known to be a power-of-2, this method requires a
170 /// runtime overflow check for the i + VL in the loop because it compares the
171 /// scalar induction variable against the tripcount rounded up by VL which may
172 /// overflow. When the VL is a power-of-2, both the increment and uprounded
173 /// tripcount will overflow to 0, which does not require a runtime check
174 /// since the loop is exited when the loop induction variable equals the
175 /// uprounded trip-count, which are both 0.
176 Data,
177 /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
178 /// calculate the mask and instead implements this with a
179 /// splat/stepvector/cmp.
180 /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
181 /// active.lane.mask intrinsic when it is not natively supported?
183 /// Use predicate to control both data and control flow.
184 /// This method always requires a runtime overflow check for the i + VL
185 /// increment inside the loop, because it uses the result direclty in the
186 /// active.lane.mask to calculate the mask for the next iteration. If the
187 /// increment overflows, the mask is no longer correct.
189 /// Use predicate to control both data and control flow, but modify
190 /// the trip count so that a runtime overflow check can be avoided
191 /// and such that the scalar epilogue loop can always be removed.
193 /// Use predicated EVL instructions for tail-folding.
194 /// Indicates that VP intrinsics should be used.
196};
197
204 : TLI(TLI), LVL(LVL), IAI(IAI) {}
205};
206
207class TargetTransformInfo;
209
210/// This pass provides access to the codegen interfaces that are needed
211/// for IR-level transformations.
213public:
215
216 /// Get the kind of extension that an instruction represents.
219
220 /// Construct a TTI object using a type implementing the \c Concept
221 /// API below.
222 ///
223 /// This is used by targets to construct a TTI wrapping their target-specific
224 /// implementation that encodes appropriate costs for their target.
225 template <typename T> TargetTransformInfo(T Impl);
226
227 /// Construct a baseline TTI object using a minimal implementation of
228 /// the \c Concept API below.
229 ///
230 /// The TTI implementation will reflect the information in the DataLayout
231 /// provided if non-null.
232 explicit TargetTransformInfo(const DataLayout &DL);
233
234 // Provide move semantics.
237
238 // We need to define the destructor out-of-line to define our sub-classes
239 // out-of-line.
241
242 /// Handle the invalidation of this information.
243 ///
244 /// When used as a result of \c TargetIRAnalysis this method will be called
245 /// when the function this was computed for changes. When it returns false,
246 /// the information is preserved across those changes.
249 // FIXME: We should probably in some way ensure that the subtarget
250 // information for a function hasn't changed.
251 return false;
252 }
253
254 /// \name Generic Target Information
255 /// @{
256
257 /// The kind of cost model.
258 ///
259 /// There are several different cost models that can be customized by the
260 /// target. The normalization of each cost model may be target specific.
261 /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
262 /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
264 TCK_RecipThroughput, ///< Reciprocal throughput.
265 TCK_Latency, ///< The latency of instruction.
266 TCK_CodeSize, ///< Instruction code size.
267 TCK_SizeAndLatency ///< The weighted sum of size and latency.
268 };
269
270 /// Underlying constants for 'cost' values in this interface.
271 ///
272 /// Many APIs in this interface return a cost. This enum defines the
273 /// fundamental values that should be used to interpret (and produce) those
274 /// costs. The costs are returned as an int rather than a member of this
275 /// enumeration because it is expected that the cost of one IR instruction
276 /// may have a multiplicative factor to it or otherwise won't fit directly
277 /// into the enum. Moreover, it is common to sum or average costs which works
278 /// better as simple integral values. Thus this enum only provides constants.
279 /// Also note that the returned costs are signed integers to make it natural
280 /// to add, subtract, and test with zero (a common boundary condition). It is
281 /// not expected that 2^32 is a realistic cost to be modeling at any point.
282 ///
283 /// Note that these costs should usually reflect the intersection of code-size
284 /// cost and execution cost. A free instruction is typically one that folds
285 /// into another instruction. For example, reg-to-reg moves can often be
286 /// skipped by renaming the registers in the CPU, but they still are encoded
287 /// and thus wouldn't be considered 'free' here.
289 TCC_Free = 0, ///< Expected to fold away in lowering.
290 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
291 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
292 };
293
294 /// Estimate the cost of a GEP operation when lowered.
295 ///
296 /// \p PointeeType is the source element type of the GEP.
297 /// \p Ptr is the base pointer operand.
298 /// \p Operands is the list of indices following the base pointer.
299 ///
300 /// \p AccessType is a hint as to what type of memory might be accessed by
301 /// users of the GEP. getGEPCost will use it to determine if the GEP can be
302 /// folded into the addressing mode of a load/store. If AccessType is null,
303 /// then the resulting target type based off of PointeeType will be used as an
304 /// approximation.
306 getGEPCost(Type *PointeeType, const Value *Ptr,
307 ArrayRef<const Value *> Operands, Type *AccessType = nullptr,
309
310 /// Describe known properties for a set of pointers.
312 /// All the GEPs in a set have same base address.
313 unsigned IsSameBaseAddress : 1;
314 /// These properties only valid if SameBaseAddress is set.
315 /// True if all pointers are separated by a unit stride.
316 unsigned IsUnitStride : 1;
317 /// True if distance between any two neigbouring pointers is a known value.
318 unsigned IsKnownStride : 1;
319 unsigned Reserved : 29;
320
321 bool isSameBase() const { return IsSameBaseAddress; }
322 bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
324
326 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/1,
327 /*IsKnownStride=*/1, 0};
328 }
330 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
331 /*IsKnownStride=*/1, 0};
332 }
334 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
335 /*IsKnownStride=*/0, 0};
336 }
337 };
338 static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
339
340 /// Estimate the cost of a chain of pointers (typically pointer operands of a
341 /// chain of loads or stores within same block) operations set when lowered.
342 /// \p AccessTy is the type of the loads/stores that will ultimately use the
343 /// \p Ptrs.
346 const PointersChainInfo &Info, Type *AccessTy,
348
349 /// \returns A value by which our inlining threshold should be multiplied.
350 /// This is primarily used to bump up the inlining threshold wholesale on
351 /// targets where calls are unusually expensive.
352 ///
353 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
354 /// individual classes of instructions would be better.
355 unsigned getInliningThresholdMultiplier() const;
356
359
360 /// \returns The bonus of inlining the last call to a static function.
362
363 /// \returns A value to be added to the inlining threshold.
364 unsigned adjustInliningThreshold(const CallBase *CB) const;
365
366 /// \returns The cost of having an Alloca in the caller if not inlined, to be
367 /// added to the threshold
368 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
369
370 /// \returns Vector bonus in percent.
371 ///
372 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
373 /// and apply this bonus based on the percentage of vector instructions. A
374 /// bonus is applied if the vector instructions exceed 50% and half that
375 /// amount is applied if it exceeds 10%. Note that these bonuses are some what
376 /// arbitrary and evolved over time by accident as much as because they are
377 /// principled bonuses.
378 /// FIXME: It would be nice to base the bonus values on something more
379 /// scientific. A target may has no bonus on vector instructions.
381
382 /// \return the expected cost of a memcpy, which could e.g. depend on the
383 /// source/destination type and alignment and the number of bytes copied.
385
386 /// Returns the maximum memset / memcpy size in bytes that still makes it
387 /// profitable to inline the call.
389
390 /// \return The estimated number of case clusters when lowering \p 'SI'.
391 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
392 /// table.
394 unsigned &JTSize,
396 BlockFrequencyInfo *BFI) const;
397
398 /// Estimate the cost of a given IR user when lowered.
399 ///
400 /// This can estimate the cost of either a ConstantExpr or Instruction when
401 /// lowered.
402 ///
403 /// \p Operands is a list of operands which can be a result of transformations
404 /// of the current operands. The number of the operands on the list must equal
405 /// to the number of the current operands the IR user has. Their order on the
406 /// list must be the same as the order of the current operands the IR user
407 /// has.
408 ///
409 /// The returned cost is defined in terms of \c TargetCostConstants, see its
410 /// comments for a detailed explanation of the cost values.
414
415 /// This is a helper function which calls the three-argument
416 /// getInstructionCost with \p Operands which are the current operands U has.
418 TargetCostKind CostKind) const {
419 SmallVector<const Value *, 4> Operands(U->operand_values());
421 }
422
423 /// If a branch or a select condition is skewed in one direction by more than
424 /// this factor, it is very likely to be predicted correctly.
426
427 /// Returns estimated penalty of a branch misprediction in latency. Indicates
428 /// how aggressive the target wants for eliminating unpredictable branches. A
429 /// zero return value means extra optimization applied to them should be
430 /// minimal.
432
433 /// Return true if branch divergence exists.
434 ///
435 /// Branch divergence has a significantly negative impact on GPU performance
436 /// when threads in the same wavefront take different paths due to conditional
437 /// branches.
438 ///
439 /// If \p F is passed, provides a context function. If \p F is known to only
440 /// execute in a single threaded environment, the target may choose to skip
441 /// uniformity analysis and assume all values are uniform.
442 bool hasBranchDivergence(const Function *F = nullptr) const;
443
444 /// Returns whether V is a source of divergence.
445 ///
446 /// This function provides the target-dependent information for
447 /// the target-independent UniformityAnalysis.
448 bool isSourceOfDivergence(const Value *V) const;
449
450 // Returns true for the target specific
451 // set of operations which produce uniform result
452 // even taking non-uniform arguments
453 bool isAlwaysUniform(const Value *V) const;
454
455 /// Query the target whether the specified address space cast from FromAS to
456 /// ToAS is valid.
457 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
458
459 /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
460 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
461
462 /// Returns the address space ID for a target's 'flat' address space. Note
463 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
464 /// refers to as the generic address space. The flat address space is a
465 /// generic address space that can be used access multiple segments of memory
466 /// with different address spaces. Access of a memory location through a
467 /// pointer with this address space is expected to be legal but slower
468 /// compared to the same memory location accessed through a pointer with a
469 /// different address space.
470 //
471 /// This is for targets with different pointer representations which can
472 /// be converted with the addrspacecast instruction. If a pointer is converted
473 /// to this address space, optimizations should attempt to replace the access
474 /// with the source address space.
475 ///
476 /// \returns ~0u if the target does not have such a flat address space to
477 /// optimize away.
478 unsigned getFlatAddressSpace() const;
479
480 /// Return any intrinsic address operand indexes which may be rewritten if
481 /// they use a flat address space pointer.
482 ///
483 /// \returns true if the intrinsic was handled.
485 Intrinsic::ID IID) const;
486
487 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
488
489 /// Return true if globals in this address space can have initializers other
490 /// than `undef`.
492
493 unsigned getAssumedAddrSpace(const Value *V) const;
494
495 bool isSingleThreaded() const;
496
497 std::pair<const Value *, unsigned>
498 getPredicatedAddrSpace(const Value *V) const;
499
500 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
501 /// NewV, which has a different address space. This should happen for every
502 /// operand index that collectFlatAddressOperands returned for the intrinsic.
503 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
504 /// new value (which may be the original \p II with modified operands).
506 Value *NewV) const;
507
508 /// Test whether calls to a function lower to actual program function
509 /// calls.
510 ///
511 /// The idea is to test whether the program is likely to require a 'call'
512 /// instruction or equivalent in order to call the given function.
513 ///
514 /// FIXME: It's not clear that this is a good or useful query API. Client's
515 /// should probably move to simpler cost metrics using the above.
516 /// Alternatively, we could split the cost interface into distinct code-size
517 /// and execution-speed costs. This would allow modelling the core of this
518 /// query more accurately as a call is a single small instruction, but
519 /// incurs significant execution cost.
520 bool isLoweredToCall(const Function *F) const;
521
522 struct LSRCost {
523 /// TODO: Some of these could be merged. Also, a lexical ordering
524 /// isn't always optimal.
525 unsigned Insns;
526 unsigned NumRegs;
527 unsigned AddRecCost;
528 unsigned NumIVMuls;
529 unsigned NumBaseAdds;
530 unsigned ImmCost;
531 unsigned SetupCost;
532 unsigned ScaleCost;
533 };
534
535 /// Parameters that control the generic loop unrolling transformation.
537 /// The cost threshold for the unrolled loop. Should be relative to the
538 /// getInstructionCost values returned by this API, and the expectation is
539 /// that the unrolled loop's instructions when run through that interface
540 /// should not exceed this cost. However, this is only an estimate. Also,
541 /// specific loops may be unrolled even with a cost above this threshold if
542 /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
543 /// restriction.
544 unsigned Threshold;
545 /// If complete unrolling will reduce the cost of the loop, we will boost
546 /// the Threshold by a certain percent to allow more aggressive complete
547 /// unrolling. This value provides the maximum boost percentage that we
548 /// can apply to Threshold (The value should be no less than 100).
549 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
550 /// MaxPercentThresholdBoost / 100)
551 /// E.g. if complete unrolling reduces the loop execution time by 50%
552 /// then we boost the threshold by the factor of 2x. If unrolling is not
553 /// expected to reduce the running time, then we do not increase the
554 /// threshold.
556 /// The cost threshold for the unrolled loop when optimizing for size (set
557 /// to UINT_MAX to disable).
559 /// The cost threshold for the unrolled loop, like Threshold, but used
560 /// for partial/runtime unrolling (set to UINT_MAX to disable).
562 /// The cost threshold for the unrolled loop when optimizing for size, like
563 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
564 /// UINT_MAX to disable).
566 /// A forced unrolling factor (the number of concatenated bodies of the
567 /// original loop in the unrolled loop body). When set to 0, the unrolling
568 /// transformation will select an unrolling factor based on the current cost
569 /// threshold and other factors.
570 unsigned Count;
571 /// Default unroll count for loops with run-time trip count.
573 // Set the maximum unrolling factor. The unrolling factor may be selected
574 // using the appropriate cost threshold, but may not exceed this number
575 // (set to UINT_MAX to disable). This does not apply in cases where the
576 // loop is being fully unrolled.
577 unsigned MaxCount;
578 /// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
579 /// to be overrided by a target gives more flexiblity on certain cases.
580 /// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
582 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
583 /// applies even if full unrolling is selected. This allows a target to fall
584 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
586 // Represents number of instructions optimized when "back edge"
587 // becomes "fall through" in unrolled loop.
588 // For now we count a conditional branch on a backedge and a comparison
589 // feeding it.
590 unsigned BEInsns;
591 /// Allow partial unrolling (unrolling of loops to expand the size of the
592 /// loop body, not only to eliminate small constant-trip-count loops).
594 /// Allow runtime unrolling (unrolling of loops to expand the size of the
595 /// loop body even when the number of loop iterations is not known at
596 /// compile time).
598 /// Allow generation of a loop remainder (extra iterations after unroll).
600 /// Allow emitting expensive instructions (such as divisions) when computing
601 /// the trip count of a loop for runtime unrolling.
603 /// Apply loop unroll on any kind of loop
604 /// (mainly to loops that fail runtime unrolling).
605 bool Force;
606 /// Allow using trip count upper bound to unroll loops.
608 /// Allow unrolling of all the iterations of the runtime loop remainder.
610 /// Allow unroll and jam. Used to enable unroll and jam for the target.
612 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
613 /// value above is used during unroll and jam for the outer loop size.
614 /// This value is used in the same manner to limit the size of the inner
615 /// loop.
617 /// Don't allow loop unrolling to simulate more than this number of
618 /// iterations when checking full unroll profitability
620 /// Don't disable runtime unroll for the loops which were vectorized.
622 /// Don't allow runtime unrolling if expanding the trip count takes more
623 /// than SCEVExpansionBudget.
625 /// Allow runtime unrolling multi-exit loops. Should only be set if the
626 /// target determined that multi-exit unrolling is profitable for the loop.
627 /// Fall back to the generic logic to determine whether multi-exit unrolling
628 /// is profitable if set to false.
630 };
631
632 /// Get target-customized preferences for the generic loop unrolling
633 /// transformation. The caller will initialize UP with the current
634 /// target-independent defaults.
637 OptimizationRemarkEmitter *ORE) const;
638
639 /// Query the target whether it would be profitable to convert the given loop
640 /// into a hardware loop.
643 HardwareLoopInfo &HWLoopInfo) const;
644
645 // Query the target for which minimum vectorization factor epilogue
646 // vectorization should be considered.
647 unsigned getEpilogueVectorizationMinVF() const;
648
649 /// Query the target whether it would be prefered to create a predicated
650 /// vector loop, which can avoid the need to emit a scalar epilogue loop.
652
653 /// Query the target what the preferred style of tail folding is.
654 /// \param IVUpdateMayOverflow Tells whether it is known if the IV update
655 /// may (or will never) overflow for the suggested VF/UF in the given loop.
656 /// Targets can use this information to select a more optimal tail folding
657 /// style. The value conservatively defaults to true, such that no assumptions
658 /// are made on overflow.
660 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
661
662 // Parameters that control the loop peeling transformation
664 /// A forced peeling factor (the number of bodied of the original loop
665 /// that should be peeled off before the loop body). When set to 0, the
666 /// a peeling factor based on profile information and other factors.
667 unsigned PeelCount;
668 /// Allow peeling off loop iterations.
670 /// Allow peeling off loop iterations for loop nests.
672 /// Allow peeling basing on profile. Uses to enable peeling off all
673 /// iterations basing on provided profile.
674 /// If the value is true the peeling cost model can decide to peel only
675 /// some iterations and in this case it will set this to false.
677 };
678
679 /// Get target-customized preferences for the generic loop peeling
680 /// transformation. The caller will initialize \p PP with the current
681 /// target-independent defaults with information from \p L and \p SE.
683 PeelingPreferences &PP) const;
684
685 /// Targets can implement their own combinations for target-specific
686 /// intrinsics. This function will be called from the InstCombine pass every
687 /// time a target-specific intrinsic is encountered.
688 ///
689 /// \returns std::nullopt to not do anything target specific or a value that
690 /// will be returned from the InstCombiner. It is possible to return null and
691 /// stop further processing of the intrinsic by returning nullptr.
692 std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
693 IntrinsicInst & II) const;
694 /// Can be used to implement target-specific instruction combining.
695 /// \see instCombineIntrinsic
696 std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
697 InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
698 KnownBits & Known, bool &KnownBitsComputed) const;
699 /// Can be used to implement target-specific instruction combining.
700 /// \see instCombineIntrinsic
701 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
702 InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
703 APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
704 std::function<void(Instruction *, unsigned, APInt, APInt &)>
705 SimplifyAndSetOp) const;
706 /// @}
707
708 /// \name Scalar Target Information
709 /// @{
710
711 /// Flags indicating the kind of support for population count.
712 ///
713 /// Compared to the SW implementation, HW support is supposed to
714 /// significantly boost the performance when the population is dense, and it
715 /// may or may not degrade performance if the population is sparse. A HW
716 /// support is considered as "Fast" if it can outperform, or is on a par
717 /// with, SW implementation when the population is sparse; otherwise, it is
718 /// considered as "Slow".
720
721 /// Return true if the specified immediate is legal add immediate, that
722 /// is the target has add instructions which can add a register with the
723 /// immediate without having to materialize the immediate into a register.
724 bool isLegalAddImmediate(int64_t Imm) const;
725
726 /// Return true if adding the specified scalable immediate is legal, that is
727 /// the target has add instructions which can add a register with the
728 /// immediate (multiplied by vscale) without having to materialize the
729 /// immediate into a register.
730 bool isLegalAddScalableImmediate(int64_t Imm) const;
731
732 /// Return true if the specified immediate is legal icmp immediate,
733 /// that is the target has icmp instructions which can compare a register
734 /// against the immediate without having to materialize the immediate into a
735 /// register.
736 bool isLegalICmpImmediate(int64_t Imm) const;
737
738 /// Return true if the addressing mode represented by AM is legal for
739 /// this target, for a load/store of the specified type.
740 /// The type may be VoidTy, in which case only return true if the addressing
741 /// mode is legal for a load/store of any legal type.
742 /// If target returns true in LSRWithInstrQueries(), I may be valid.
743 /// \param ScalableOffset represents a quantity of bytes multiplied by vscale,
744 /// an invariant value known only at runtime. Most targets should not accept
745 /// a scalable offset.
746 ///
747 /// TODO: Handle pre/postinc as well.
748 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
749 bool HasBaseReg, int64_t Scale,
750 unsigned AddrSpace = 0, Instruction *I = nullptr,
751 int64_t ScalableOffset = 0) const;
752
753 /// Return true if LSR cost of C1 is lower than C2.
755 const TargetTransformInfo::LSRCost &C2) const;
756
757 /// Return true if LSR major cost is number of registers. Targets which
758 /// implement their own isLSRCostLess and unset number of registers as major
759 /// cost should return false, otherwise return true.
760 bool isNumRegsMajorCostOfLSR() const;
761
762 /// Return true if LSR should drop a found solution if it's calculated to be
763 /// less profitable than the baseline.
765
766 /// \returns true if LSR should not optimize a chain that includes \p I.
768
769 /// Return true if the target can fuse a compare and branch.
770 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
771 /// calculation for the instructions in a loop.
772 bool canMacroFuseCmp() const;
773
774 /// Return true if the target can save a compare for loop count, for example
775 /// hardware loop saves a compare.
776 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
778 TargetLibraryInfo *LibInfo) const;
779
784 };
785
786 /// Return the preferred addressing mode LSR should make efforts to generate.
788 ScalarEvolution *SE) const;
789
790 /// Return true if the target supports masked store.
791 bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
792 /// Return true if the target supports masked load.
793 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
794
795 /// Return true if the target supports nontemporal store.
796 bool isLegalNTStore(Type *DataType, Align Alignment) const;
797 /// Return true if the target supports nontemporal load.
798 bool isLegalNTLoad(Type *DataType, Align Alignment) const;
799
800 /// \Returns true if the target supports broadcasting a load to a vector of
801 /// type <NumElements x ElementTy>.
802 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
803
804 /// Return true if the target supports masked scatter.
805 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
806 /// Return true if the target supports masked gather.
807 bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
808 /// Return true if the target forces scalarizing of llvm.masked.gather
809 /// intrinsics.
810 bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
811 /// Return true if the target forces scalarizing of llvm.masked.scatter
812 /// intrinsics.
813 bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
814
815 /// Return true if the target supports masked compress store.
816 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const;
817 /// Return true if the target supports masked expand load.
818 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const;
819
820 /// Return true if the target supports strided load.
821 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const;
822
823 /// Return true is the target supports interleaved access for the given vector
824 /// type \p VTy, interleave factor \p Factor, alignment \p Alignment and
825 /// address space \p AddrSpace.
826 bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
827 Align Alignment, unsigned AddrSpace) const;
828
829 // Return true if the target supports masked vector histograms.
830 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const;
831
832 /// Return true if this is an alternating opcode pattern that can be lowered
833 /// to a single instruction on the target. In X86 this is for the addsub
834 /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
835 /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
836 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
837 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
838 /// \p VecTy is the vector type of the instruction to be generated.
839 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
840 const SmallBitVector &OpcodeMask) const;
841
842 /// Return true if we should be enabling ordered reductions for the target.
843 bool enableOrderedReductions() const;
844
845 /// Return true if the target has a unified operation to calculate division
846 /// and remainder. If so, the additional implicit multiplication and
847 /// subtraction required to calculate a remainder from division are free. This
848 /// can enable more aggressive transformations for division and remainder than
849 /// would typically be allowed using throughput or size cost models.
850 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
851
852 /// Return true if the given instruction (assumed to be a memory access
853 /// instruction) has a volatile variant. If that's the case then we can avoid
854 /// addrspacecast to generic AS for volatile loads/stores. Default
855 /// implementation returns false, which prevents address space inference for
856 /// volatile loads/stores.
857 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
858
859 /// Return true if target doesn't mind addresses in vectors.
860 bool prefersVectorizedAddressing() const;
861
862 /// Return the cost of the scaling factor used in the addressing
863 /// mode represented by AM for this target, for a load/store
864 /// of the specified type.
865 /// If the AM is supported, the return value must be >= 0.
866 /// If the AM is not supported, it returns a negative value.
867 /// TODO: Handle pre/postinc as well.
869 StackOffset BaseOffset, bool HasBaseReg,
870 int64_t Scale,
871 unsigned AddrSpace = 0) const;
872
873 /// Return true if the loop strength reduce pass should make
874 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
875 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
876 /// immediate offset and no index register.
877 bool LSRWithInstrQueries() const;
878
879 /// Return true if it's free to truncate a value of type Ty1 to type
880 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
881 /// by referencing its sub-register AX.
882 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
883
884 /// Return true if it is profitable to hoist instruction in the
885 /// then/else to before if.
886 bool isProfitableToHoist(Instruction *I) const;
887
888 bool useAA() const;
889
890 /// Return true if this type is legal.
891 bool isTypeLegal(Type *Ty) const;
892
893 /// Returns the estimated number of registers required to represent \p Ty.
894 unsigned getRegUsageForType(Type *Ty) const;
895
896 /// Return true if switches should be turned into lookup tables for the
897 /// target.
898 bool shouldBuildLookupTables() const;
899
900 /// Return true if switches should be turned into lookup tables
901 /// containing this constant value for the target.
903
904 /// Return true if lookup tables should be turned into relative lookup tables.
905 bool shouldBuildRelLookupTables() const;
906
907 /// Return true if the input function which is cold at all call sites,
908 /// should use coldcc calling convention.
909 bool useColdCCForColdCall(Function &F) const;
910
912
913 /// Identifies if the vector form of the intrinsic has a scalar operand.
915 unsigned ScalarOpdIdx) const;
916
917 /// Identifies if the vector form of the intrinsic is overloaded on the type
918 /// of the operand at index \p OpdIdx, or on the return type if \p OpdIdx is
919 /// -1.
921 int OpdIdx) const;
922
923 /// Identifies if the vector form of the intrinsic that returns a struct is
924 /// overloaded at the struct element index \p RetIdx.
926 int RetIdx) const;
927
928 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
929 /// are set if the demanded result elements need to be inserted and/or
930 /// extracted from vectors. The involved values may be passed in VL if
931 /// Insert is true.
933 const APInt &DemandedElts,
934 bool Insert, bool Extract,
936 ArrayRef<Value *> VL = {}) const;
937
938 /// Estimate the overhead of scalarizing an instructions unique
939 /// non-constant operands. The (potentially vector) types to use for each of
940 /// argument are passes via Tys.
941 InstructionCost
942 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
943 ArrayRef<Type *> Tys,
945
946 /// If target has efficient vector element load/store instructions, it can
947 /// return true here so that insertion/extraction costs are not added to
948 /// the scalarization cost of a load/store.
950
951 /// If the target supports tail calls.
952 bool supportsTailCalls() const;
953
954 /// If target supports tail call on \p CB
955 bool supportsTailCallFor(const CallBase *CB) const;
956
957 /// Don't restrict interleaved unrolling to small loops.
958 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
959
960 /// Returns options for expansion of memcmp. IsZeroCmp is
961 // true if this is the expansion of memcmp(p1, p2, s) == 0.
963 // Return true if memcmp expansion is enabled.
964 operator bool() const { return MaxNumLoads > 0; }
965
966 // Maximum number of load operations.
967 unsigned MaxNumLoads = 0;
968
969 // The list of available load sizes (in bytes), sorted in decreasing order.
971
972 // For memcmp expansion when the memcmp result is only compared equal or
973 // not-equal to 0, allow up to this number of load pairs per block. As an
974 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
975 // a0 = load2bytes &a[0]
976 // b0 = load2bytes &b[0]
977 // a2 = load1byte &a[2]
978 // b2 = load1byte &b[2]
979 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
980 unsigned NumLoadsPerBlock = 1;
981
982 // Set to true to allow overlapping loads. For example, 7-byte compares can
983 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
984 // requires all loads in LoadSizes to be doable in an unaligned way.
986
987 // Sometimes, the amount of data that needs to be compared is smaller than
988 // the standard register size, but it cannot be loaded with just one load
989 // instruction. For example, if the size of the memory comparison is 6
990 // bytes, we can handle it more efficiently by loading all 6 bytes in a
991 // single block and generating an 8-byte number, instead of generating two
992 // separate blocks with conditional jumps for 4 and 2 byte loads. This
993 // approach simplifies the process and produces the comparison result as
994 // normal. This array lists the allowed sizes of memcmp tails that can be
995 // merged into one block
997 };
999 bool IsZeroCmp) const;
1000
1001 /// Should the Select Optimization pass be enabled and ran.
1002 bool enableSelectOptimize() const;
1003
1004 /// Should the Select Optimization pass treat the given instruction like a
1005 /// select, potentially converting it to a conditional branch. This can
1006 /// include select-like instructions like or(zext(c), x) that can be converted
1007 /// to selects.
1009
1010 /// Enable matching of interleaved access groups.
1012
1013 /// Enable matching of interleaved access groups that contain predicated
1014 /// accesses or gaps and therefore vectorized using masked
1015 /// vector loads/stores.
1017
1018 /// Indicate that it is potentially unsafe to automatically vectorize
1019 /// floating-point operations because the semantics of vector and scalar
1020 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
1021 /// does not support IEEE-754 denormal numbers, while depending on the
1022 /// platform, scalar floating-point math does.
1023 /// This applies to floating-point math operations and calls, not memory
1024 /// operations, shuffles, or casts.
1026
1027 /// Determine if the target supports unaligned memory accesses.
1028 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
1029 unsigned AddressSpace = 0,
1030 Align Alignment = Align(1),
1031 unsigned *Fast = nullptr) const;
1032
1033 /// Return hardware support for population count.
1034 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
1035
1036 /// Return true if the hardware has a fast square-root instruction.
1037 bool haveFastSqrt(Type *Ty) const;
1038
1039 /// Return true if the cost of the instruction is too high to speculatively
1040 /// execute and should be kept behind a branch.
1041 /// This normally just wraps around a getInstructionCost() call, but some
1042 /// targets might report a low TCK_SizeAndLatency value that is incompatible
1043 /// with the fixed TCC_Expensive value.
1044 /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
1046
1047 /// Return true if it is faster to check if a floating-point value is NaN
1048 /// (or not-NaN) versus a comparison against a constant FP zero value.
1049 /// Targets should override this if materializing a 0.0 for comparison is
1050 /// generally as cheap as checking for ordered/unordered.
1051 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
1052
1053 /// Return the expected cost of supporting the floating point operation
1054 /// of the specified type.
1055 InstructionCost getFPOpCost(Type *Ty) const;
1056
1057 /// Return the expected cost of materializing for the given integer
1058 /// immediate of the specified type.
1059 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1060 TargetCostKind CostKind) const;
1061
1062 /// Return the expected cost of materialization for the given integer
1063 /// immediate of the specified type for a given instruction. The cost can be
1064 /// zero if the immediate can be folded into the specified instruction.
1065 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1066 const APInt &Imm, Type *Ty,
1068 Instruction *Inst = nullptr) const;
1070 const APInt &Imm, Type *Ty,
1071 TargetCostKind CostKind) const;
1072
1073 /// Return the expected cost for the given integer when optimising
1074 /// for size. This is different than the other integer immediate cost
1075 /// functions in that it is subtarget agnostic. This is useful when you e.g.
1076 /// target one ISA such as Aarch32 but smaller encodings could be possible
1077 /// with another such as Thumb. This return value is used as a penalty when
1078 /// the total costs for a constant is calculated (the bigger the cost, the
1079 /// more beneficial constant hoisting is).
1080 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1081 const APInt &Imm, Type *Ty) const;
1082
1083 /// It can be advantageous to detach complex constants from their uses to make
1084 /// their generation cheaper. This hook allows targets to report when such
1085 /// transformations might negatively effect the code generation of the
1086 /// underlying operation. The motivating example is divides whereby hoisting
1087 /// constants prevents the code generator's ability to transform them into
1088 /// combinations of simpler operations.
1090 const Function &Fn) const;
1091
1092 /// @}
1093
1094 /// \name Vector Target Information
1095 /// @{
1096
1097 /// The various kinds of shuffle patterns for vector queries.
1099 SK_Broadcast, ///< Broadcast element 0 to all other elements.
1100 SK_Reverse, ///< Reverse the order of the vector.
1101 SK_Select, ///< Selects elements from the corresponding lane of
1102 ///< either source operand. This is equivalent to a
1103 ///< vector select with a constant condition operand.
1104 SK_Transpose, ///< Transpose two vectors.
1105 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
1106 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
1107 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
1108 ///< with any shuffle mask.
1109 SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
1110 ///< shuffle mask.
1111 SK_Splice ///< Concatenates elements from the first input vector
1112 ///< with elements of the second input vector. Returning
1113 ///< a vector of the same type as the input vectors.
1114 ///< Index indicates start offset in first input vector.
1116
1117 /// Additional information about an operand's possible values.
1119 OK_AnyValue, // Operand can have any value.
1120 OK_UniformValue, // Operand is uniform (splat of a value).
1121 OK_UniformConstantValue, // Operand is uniform constant.
1122 OK_NonUniformConstantValue // Operand is a non uniform constant value.
1124
1125 /// Additional properties of an operand's values.
1130 };
1131
1132 // Describe the values an operand can take. We're in the process
1133 // of migrating uses of OperandValueKind and OperandValueProperties
1134 // to use this class, and then will change the internal representation.
1138
1139 bool isConstant() const {
1141 }
1142 bool isUniform() const {
1144 }
1145 bool isPowerOf2() const {
1146 return Properties == OP_PowerOf2;
1147 }
1148 bool isNegatedPowerOf2() const {
1150 }
1151
1153 return {Kind, OP_None};
1154 }
1155 };
1156
1157 /// \return the number of registers in the target-provided register class.
1158 unsigned getNumberOfRegisters(unsigned ClassID) const;
1159
1160 /// \return true if the target supports load/store that enables fault
1161 /// suppression of memory operands when the source condition is false.
1162 bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const;
1163
1164 /// \return the target-provided register class ID for the provided type,
1165 /// accounting for type promotion and other type-legalization techniques that
1166 /// the target might apply. However, it specifically does not account for the
1167 /// scalarization or splitting of vector types. Should a vector type require
1168 /// scalarization or splitting into multiple underlying vector registers, that
1169 /// type should be mapped to a register class containing no registers.
1170 /// Specifically, this is designed to provide a simple, high-level view of the
1171 /// register allocation later performed by the backend. These register classes
1172 /// don't necessarily map onto the register classes used by the backend.
1173 /// FIXME: It's not currently possible to determine how many registers
1174 /// are used by the provided type.
1175 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
1176
1177 /// \return the target-provided register class name
1178 const char *getRegisterClassName(unsigned ClassID) const;
1179
1181
1182 /// \return The width of the largest scalar or vector register type.
1184
1185 /// \return The width of the smallest vector register type.
1186 unsigned getMinVectorRegisterBitWidth() const;
1187
1188 /// \return The maximum value of vscale if the target specifies an
1189 /// architectural maximum vector length, and std::nullopt otherwise.
1190 std::optional<unsigned> getMaxVScale() const;
1191
1192 /// \return the value of vscale to tune the cost model for.
1193 std::optional<unsigned> getVScaleForTuning() const;
1194
1195 /// \return true if vscale is known to be a power of 2
1196 bool isVScaleKnownToBeAPowerOfTwo() const;
1197
1198 /// \return True if the vectorization factor should be chosen to
1199 /// make the vector of the smallest element type match the size of a
1200 /// vector register. For wider element types, this could result in
1201 /// creating vectors that span multiple vector registers.
1202 /// If false, the vectorization factor will be chosen based on the
1203 /// size of the widest element type.
1204 /// \p K Register Kind for vectorization.
1206
1207 /// \return The minimum vectorization factor for types of given element
1208 /// bit width, or 0 if there is no minimum VF. The returned value only
1209 /// applies when shouldMaximizeVectorBandwidth returns true.
1210 /// If IsScalable is true, the returned ElementCount must be a scalable VF.
1211 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1212
1213 /// \return The maximum vectorization factor for types of given element
1214 /// bit width and opcode, or 0 if there is no maximum VF.
1215 /// Currently only used by the SLP vectorizer.
1216 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1217
1218 /// \return The minimum vectorization factor for the store instruction. Given
1219 /// the initial estimation of the minimum vector factor and store value type,
1220 /// it tries to find possible lowest VF, which still might be profitable for
1221 /// the vectorization.
1222 /// \param VF Initial estimation of the minimum vector factor.
1223 /// \param ScalarMemTy Scalar memory type of the store operation.
1224 /// \param ScalarValTy Scalar type of the stored value.
1225 /// Currently only used by the SLP vectorizer.
1226 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1227 Type *ScalarValTy) const;
1228
1229 /// \return True if it should be considered for address type promotion.
1230 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1231 /// profitable without finding other extensions fed by the same input.
1233 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1234
1235 /// \return The size of a cache line in bytes.
1236 unsigned getCacheLineSize() const;
1237
1238 /// The possible cache levels
1239 enum class CacheLevel {
1240 L1D, // The L1 data cache
1241 L2D, // The L2 data cache
1242
1243 // We currently do not model L3 caches, as their sizes differ widely between
1244 // microarchitectures. Also, we currently do not have a use for L3 cache
1245 // size modeling yet.
1246 };
1247
1248 /// \return The size of the cache level in bytes, if available.
1249 std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1250
1251 /// \return The associativity of the cache level, if available.
1252 std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1253
1254 /// \return The minimum architectural page size for the target.
1255 std::optional<unsigned> getMinPageSize() const;
1256
1257 /// \return How much before a load we should place the prefetch
1258 /// instruction. This is currently measured in number of
1259 /// instructions.
1260 unsigned getPrefetchDistance() const;
1261
1262 /// Some HW prefetchers can handle accesses up to a certain constant stride.
1263 /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1264 /// and the arguments provided are meant to serve as a basis for deciding this
1265 /// for a particular loop.
1266 ///
1267 /// \param NumMemAccesses Number of memory accesses in the loop.
1268 /// \param NumStridedMemAccesses Number of the memory accesses that
1269 /// ScalarEvolution could find a known stride
1270 /// for.
1271 /// \param NumPrefetches Number of software prefetches that will be
1272 /// emitted as determined by the addresses
1273 /// involved and the cache line size.
1274 /// \param HasCall True if the loop contains a call.
1275 ///
1276 /// \return This is the minimum stride in bytes where it makes sense to start
1277 /// adding SW prefetches. The default is 1, i.e. prefetch with any
1278 /// stride.
1279 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1280 unsigned NumStridedMemAccesses,
1281 unsigned NumPrefetches, bool HasCall) const;
1282
1283 /// \return The maximum number of iterations to prefetch ahead. If
1284 /// the required number of iterations is more than this number, no
1285 /// prefetching is performed.
1286 unsigned getMaxPrefetchIterationsAhead() const;
1287
1288 /// \return True if prefetching should also be done for writes.
1289 bool enableWritePrefetching() const;
1290
1291 /// \return if target want to issue a prefetch in address space \p AS.
1292 bool shouldPrefetchAddressSpace(unsigned AS) const;
1293
1294 /// \return The cost of a partial reduction, which is a reduction from a
1295 /// vector to another vector with fewer elements of larger size. They are
1296 /// represented by the llvm.experimental.partial.reduce.add intrinsic, which
1297 /// takes an accumulator and a binary operation operand that itself is fed by
1298 /// two extends. An example of an operation that uses a partial reduction is a
1299 /// dot product, which reduces two vectors to another of 4 times fewer and 4
1300 /// times larger elements.
1302 getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB,
1303 Type *AccumType, ElementCount VF,
1306 std::optional<unsigned> BinOp = std::nullopt) const;
1307
1308 /// \return The maximum interleave factor that any transform should try to
1309 /// perform for this target. This number depends on the level of parallelism
1310 /// and the number of execution units in the CPU.
1311 unsigned getMaxInterleaveFactor(ElementCount VF) const;
1312
1313 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1314 static OperandValueInfo getOperandInfo(const Value *V);
1315
1316 /// This is an approximation of reciprocal throughput of a math/logic op.
1317 /// A higher cost indicates less expected throughput.
1318 /// From Agner Fog's guides, reciprocal throughput is "the average number of
1319 /// clock cycles per instruction when the instructions are not part of a
1320 /// limiting dependency chain."
1321 /// Therefore, costs should be scaled to account for multiple execution units
1322 /// on the target that can process this type of instruction. For example, if
1323 /// there are 5 scalar integer units and 2 vector integer units that can
1324 /// calculate an 'add' in a single cycle, this model should indicate that the
1325 /// cost of the vector add instruction is 2.5 times the cost of the scalar
1326 /// add instruction.
1327 /// \p Args is an optional argument which holds the instruction operands
1328 /// values so the TTI can analyze those values searching for special
1329 /// cases or optimizations based on those values.
1330 /// \p CxtI is the optional original context instruction, if one exists, to
1331 /// provide even more information.
1332 /// \p TLibInfo is used to search for platform specific vector library
1333 /// functions for instructions that might be converted to calls (e.g. frem).
1335 unsigned Opcode, Type *Ty,
1338 TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
1339 ArrayRef<const Value *> Args = {}, const Instruction *CxtI = nullptr,
1340 const TargetLibraryInfo *TLibInfo = nullptr) const;
1341
1342 /// Returns the cost estimation for alternating opcode pattern that can be
1343 /// lowered to a single instruction on the target. In X86 this is for the
1344 /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
1345 /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
1346 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
1347 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
1348 /// \p VecTy is the vector type of the instruction to be generated.
1349 InstructionCost getAltInstrCost(
1350 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1351 const SmallBitVector &OpcodeMask,
1353
1354 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1355 /// The exact mask may be passed as Mask, or else the array will be empty.
1356 /// The index and subtype parameters are used by the subvector insertion and
1357 /// extraction shuffle kinds to show the insert/extract point and the type of
1358 /// the subvector being inserted/extracted. The operands of the shuffle can be
1359 /// passed through \p Args, which helps improve the cost estimation in some
1360 /// cases, like in broadcast loads.
1361 /// NOTE: For subvector extractions Tp represents the source type.
1362 InstructionCost
1363 getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = {},
1365 int Index = 0, VectorType *SubTp = nullptr,
1366 ArrayRef<const Value *> Args = {},
1367 const Instruction *CxtI = nullptr) const;
1368
1369 /// Represents a hint about the context in which a cast is used.
1370 ///
1371 /// For zext/sext, the context of the cast is the operand, which must be a
1372 /// load of some kind. For trunc, the context is of the cast is the single
1373 /// user of the instruction, which must be a store of some kind.
1374 ///
1375 /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1376 /// type of cast it's dealing with, as not every cast is equal. For instance,
1377 /// the zext of a load may be free, but the zext of an interleaving load can
1378 //// be (very) expensive!
1379 ///
1380 /// See \c getCastContextHint to compute a CastContextHint from a cast
1381 /// Instruction*. Callers can use it if they don't need to override the
1382 /// context and just want it to be calculated from the instruction.
1383 ///
1384 /// FIXME: This handles the types of load/store that the vectorizer can
1385 /// produce, which are the cases where the context instruction is most
1386 /// likely to be incorrect. There are other situations where that can happen
1387 /// too, which might be handled here but in the long run a more general
1388 /// solution of costing multiple instructions at the same times may be better.
1390 None, ///< The cast is not used with a load/store of any kind.
1391 Normal, ///< The cast is used with a normal load/store.
1392 Masked, ///< The cast is used with a masked load/store.
1393 GatherScatter, ///< The cast is used with a gather/scatter.
1394 Interleave, ///< The cast is used with an interleaved load/store.
1395 Reversed, ///< The cast is used with a reversed load/store.
1396 };
1397
1398 /// Calculates a CastContextHint from \p I.
1399 /// This should be used by callers of getCastInstrCost if they wish to
1400 /// determine the context from some instruction.
1401 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1402 /// or if it's another type of cast.
1404
1405 /// \return The expected cost of cast instructions, such as bitcast, trunc,
1406 /// zext, etc. If there is an existing instruction that holds Opcode, it
1407 /// may be passed in the 'I' parameter.
1409 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1412 const Instruction *I = nullptr) const;
1413
1414 /// \return The expected cost of a sign- or zero-extended vector extract. Use
1415 /// Index = -1 to indicate that there is no information about the index value.
1416 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1417 VectorType *VecTy,
1418 unsigned Index) const;
1419
1420 /// \return The expected cost of control-flow related instructions such as
1421 /// Phi, Ret, Br, Switch.
1423 getCFInstrCost(unsigned Opcode,
1425 const Instruction *I = nullptr) const;
1426
1427 /// \returns The expected cost of compare and select instructions. If there
1428 /// is an existing instruction that holds Opcode, it may be passed in the
1429 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1430 /// is using a compare with the specified predicate as condition. When vector
1431 /// types are passed, \p VecPred must be used for all lanes. For a
1432 /// comparison, the two operands are the natural values. For a select, the
1433 /// two operands are the *value* operands, not the condition operand.
1435 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1436 CmpInst::Predicate VecPred,
1438 OperandValueInfo Op1Info = {OK_AnyValue, OP_None},
1439 OperandValueInfo Op2Info = {OK_AnyValue, OP_None},
1440 const Instruction *I = nullptr) const;
1441
1442 /// \return The expected cost of vector Insert and Extract.
1443 /// Use -1 to indicate that there is no information on the index value.
1444 /// This is used when the instruction is not available; a typical use
1445 /// case is to provision the cost of vectorization/scalarization in
1446 /// vectorizer passes.
1447 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1449 unsigned Index = -1, Value *Op0 = nullptr,
1450 Value *Op1 = nullptr) const;
1451
1452 /// \return The expected cost of vector Insert and Extract.
1453 /// Use -1 to indicate that there is no information on the index value.
1454 /// This is used when the instruction is not available; a typical use
1455 /// case is to provision the cost of vectorization/scalarization in
1456 /// vectorizer passes.
1457 /// \param ScalarUserAndIdx encodes the information about extracts from a
1458 /// vector with 'Scalar' being the value being extracted,'User' being the user
1459 /// of the extract(nullptr if user is not known before vectorization) and
1460 /// 'Idx' being the extract lane.
1461 InstructionCost getVectorInstrCost(
1462 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
1463 Value *Scalar,
1464 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const;
1465
1466 /// \return The expected cost of vector Insert and Extract.
1467 /// This is used when instruction is available, and implementation
1468 /// asserts 'I' is not nullptr.
1469 ///
1470 /// A typical suitable use case is cost estimation when vector instruction
1471 /// exists (e.g., from basic blocks during transformation).
1472 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
1474 unsigned Index = -1) const;
1475
1476 /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1477 /// \p ReplicationFactor times.
1478 ///
1479 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1480 /// <0,0,0,1,1,1,2,2,2,3,3,3>
1481 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1482 int VF,
1483 const APInt &DemandedDstElts,
1485
1486 /// \return The cost of Load and Store instructions.
1487 InstructionCost
1488 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1489 unsigned AddressSpace,
1491 OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
1492 const Instruction *I = nullptr) const;
1493
1494 /// \return The cost of VP Load and Store instructions.
1495 InstructionCost
1496 getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1497 unsigned AddressSpace,
1499 const Instruction *I = nullptr) const;
1500
1501 /// \return The cost of masked Load and Store instructions.
1503 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1505
1506 /// \return The cost of Gather or Scatter operation
1507 /// \p Opcode - is a type of memory access Load or Store
1508 /// \p DataTy - a vector type of the data to be loaded or stored
1509 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1510 /// \p VariableMask - true when the memory access is predicated with a mask
1511 /// that is not a compile-time constant
1512 /// \p Alignment - alignment of single element
1513 /// \p I - the optional original context instruction, if one exists, e.g. the
1514 /// load/store to transform or the call to the gather/scatter intrinsic
1516 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1518 const Instruction *I = nullptr) const;
1519
1520 /// \return The cost of Expand Load or Compress Store operation
1521 /// \p Opcode - is a type of memory access Load or Store
1522 /// \p Src - a vector type of the data to be loaded or stored
1523 /// \p VariableMask - true when the memory access is predicated with a mask
1524 /// that is not a compile-time constant
1525 /// \p Alignment - alignment of single element
1526 /// \p I - the optional original context instruction, if one exists, e.g. the
1527 /// load/store to transform or the call to the gather/scatter intrinsic
1529 unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
1531 const Instruction *I = nullptr) const;
1532
1533 /// \return The cost of strided memory operations.
1534 /// \p Opcode - is a type of memory access Load or Store
1535 /// \p DataTy - a vector type of the data to be loaded or stored
1536 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1537 /// \p VariableMask - true when the memory access is predicated with a mask
1538 /// that is not a compile-time constant
1539 /// \p Alignment - alignment of single element
1540 /// \p I - the optional original context instruction, if one exists, e.g. the
1541 /// load/store to transform or the call to the gather/scatter intrinsic
1543 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1545 const Instruction *I = nullptr) const;
1546
1547 /// \return The cost of the interleaved memory operation.
1548 /// \p Opcode is the memory operation code
1549 /// \p VecTy is the vector type of the interleaved access.
1550 /// \p Factor is the interleave factor
1551 /// \p Indices is the indices for interleaved load members (as interleaved
1552 /// load allows gaps)
1553 /// \p Alignment is the alignment of the memory operation
1554 /// \p AddressSpace is address space of the pointer.
1555 /// \p UseMaskForCond indicates if the memory access is predicated.
1556 /// \p UseMaskForGaps indicates if gaps should be masked.
1558 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1559 Align Alignment, unsigned AddressSpace,
1561 bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1562
1563 /// A helper function to determine the type of reduction algorithm used
1564 /// for a given \p Opcode and set of FastMathFlags \p FMF.
1565 static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1566 return FMF && !(*FMF).allowReassoc();
1567 }
1568
1569 /// Calculate the cost of vector reduction intrinsics.
1570 ///
1571 /// This is the cost of reducing the vector value of type \p Ty to a scalar
1572 /// value using the operation denoted by \p Opcode. The FastMathFlags
1573 /// parameter \p FMF indicates what type of reduction we are performing:
1574 /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1575 /// involves successively splitting a vector into half and doing the
1576 /// operation on the pair of halves until you have a scalar value. For
1577 /// example:
1578 /// (v0, v1, v2, v3)
1579 /// ((v0+v2), (v1+v3), undef, undef)
1580 /// ((v0+v2+v1+v3), undef, undef, undef)
1581 /// This is the default behaviour for integer operations, whereas for
1582 /// floating point we only do this if \p FMF indicates that
1583 /// reassociation is allowed.
1584 /// 2. Ordered. For a vector with N elements this involves performing N
1585 /// operations in lane order, starting with an initial scalar value, i.e.
1586 /// result = InitVal + v0
1587 /// result = result + v1
1588 /// result = result + v2
1589 /// result = result + v3
1590 /// This is only the case for FP operations and when reassociation is not
1591 /// allowed.
1592 ///
1594 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1596
1600
1601 /// Calculate the cost of an extended reduction pattern, similar to
1602 /// getArithmeticReductionCost of an Add reduction with multiply and optional
1603 /// extensions. This is the cost of as:
1604 /// ResTy vecreduce.add(mul (A, B)).
1605 /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1607 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1609
1610 /// Calculate the cost of an extended reduction pattern, similar to
1611 /// getArithmeticReductionCost of a reduction with an extension.
1612 /// This is the cost of as:
1613 /// ResTy vecreduce.opcode(ext(Ty A)).
1615 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1616 FastMathFlags FMF,
1618
1619 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1620 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1621 /// 3. scalar instruction which is to be vectorized.
1624
1625 /// \returns The cost of Call instructions.
1629
1630 /// \returns The number of pieces into which the provided type must be
1631 /// split during legalization. Zero is returned when the answer is unknown.
1632 unsigned getNumberOfParts(Type *Tp) const;
1633
1634 /// \returns The cost of the address computation. For most targets this can be
1635 /// merged into the instruction indexing mode. Some targets might want to
1636 /// distinguish between address computation for memory operations on vector
1637 /// types and scalar types. Such targets should override this function.
1638 /// The 'SE' parameter holds pointer for the scalar evolution object which
1639 /// is used in order to get the Ptr step value in case of constant stride.
1640 /// The 'Ptr' parameter holds SCEV of the access pointer.
1642 ScalarEvolution *SE = nullptr,
1643 const SCEV *Ptr = nullptr) const;
1644
1645 /// \returns The cost, if any, of keeping values of the given types alive
1646 /// over a callsite.
1647 ///
1648 /// Some types may require the use of register classes that do not have
1649 /// any callee-saved registers, so would require a spill and fill.
1651
1652 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1653 /// will contain additional information - whether the intrinsic may write
1654 /// or read to memory, volatility and the pointer. Info is undefined
1655 /// if false is returned.
1657
1658 /// \returns The maximum element size, in bytes, for an element
1659 /// unordered-atomic memory intrinsic.
1660 unsigned getAtomicMemIntrinsicMaxElementSize() const;
1661
1662 /// \returns A value which is the result of the given memory intrinsic. New
1663 /// instructions may be created to extract the result from the given intrinsic
1664 /// memory operation. Returns nullptr if the target cannot create a result
1665 /// from the given intrinsic.
1667 Type *ExpectedType) const;
1668
1669 /// \returns The type to use in a loop expansion of a memcpy call.
1671 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1672 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
1673 std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1674
1675 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1676 /// \param RemainingBytes The number of bytes to copy.
1677 ///
1678 /// Calculates the operand types to use when copying \p RemainingBytes of
1679 /// memory, where source and destination alignments are \p SrcAlign and
1680 /// \p DestAlign respectively.
1682 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1683 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1684 Align SrcAlign, Align DestAlign,
1685 std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1686
1687 /// \returns True if the two functions have compatible attributes for inlining
1688 /// purposes.
1689 bool areInlineCompatible(const Function *Caller,
1690 const Function *Callee) const;
1691
1692 /// Returns a penalty for invoking call \p Call in \p F.
1693 /// For example, if a function F calls a function G, which in turn calls
1694 /// function H, then getInlineCallPenalty(F, H()) would return the
1695 /// penalty of calling H from F, e.g. after inlining G into F.
1696 /// \p DefaultCallPenalty is passed to give a default penalty that
1697 /// the target can amend or override.
1698 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1699 unsigned DefaultCallPenalty) const;
1700
1701 /// \returns True if the caller and callee agree on how \p Types will be
1702 /// passed to or returned from the callee.
1703 /// to the callee.
1704 /// \param Types List of types to check.
1705 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1706 const ArrayRef<Type *> &Types) const;
1707
1708 /// The type of load/store indexing.
1710 MIM_Unindexed, ///< No indexing.
1711 MIM_PreInc, ///< Pre-incrementing.
1712 MIM_PreDec, ///< Pre-decrementing.
1713 MIM_PostInc, ///< Post-incrementing.
1714 MIM_PostDec ///< Post-decrementing.
1716
1717 /// \returns True if the specified indexed load for the given type is legal.
1718 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1719
1720 /// \returns True if the specified indexed store for the given type is legal.
1721 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1722
1723 /// \returns The bitwidth of the largest vector type that should be used to
1724 /// load/store in the given address space.
1725 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1726
1727 /// \returns True if the load instruction is legal to vectorize.
1728 bool isLegalToVectorizeLoad(LoadInst *LI) const;
1729
1730 /// \returns True if the store instruction is legal to vectorize.
1731 bool isLegalToVectorizeStore(StoreInst *SI) const;
1732
1733 /// \returns True if it is legal to vectorize the given load chain.
1734 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1735 unsigned AddrSpace) const;
1736
1737 /// \returns True if it is legal to vectorize the given store chain.
1738 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1739 unsigned AddrSpace) const;
1740
1741 /// \returns True if it is legal to vectorize the given reduction kind.
1743 ElementCount VF) const;
1744
1745 /// \returns True if the given type is supported for scalable vectors
1747
1748 /// \returns The new vector factor value if the target doesn't support \p
1749 /// SizeInBytes loads or has a better vector factor.
1750 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1751 unsigned ChainSizeInBytes,
1752 VectorType *VecTy) const;
1753
1754 /// \returns The new vector factor value if the target doesn't support \p
1755 /// SizeInBytes stores or has a better vector factor.
1756 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1757 unsigned ChainSizeInBytes,
1758 VectorType *VecTy) const;
1759
1760 /// Flags describing the kind of vector reduction.
1762 ReductionFlags() = default;
1763 bool IsMaxOp =
1764 false; ///< If the op a min/max kind, true if it's a max operation.
1765 bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1766 bool NoNaN =
1767 false; ///< If op is an fp min/max, whether NaNs may be present.
1768 };
1769
1770 /// \returns True if the targets prefers fixed width vectorization if the
1771 /// loop vectorizer's cost-model assigns an equal cost to the fixed and
1772 /// scalable version of the vectorized loop.
1774
1775 /// \returns True if the target prefers reductions in loop.
1776 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1777 ReductionFlags Flags) const;
1778
1779 /// \returns True if the target prefers reductions select kept in the loop
1780 /// when tail folding. i.e.
1781 /// loop:
1782 /// p = phi (0, s)
1783 /// a = add (p, x)
1784 /// s = select (mask, a, p)
1785 /// vecreduce.add(s)
1786 ///
1787 /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1788 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1789 /// by the target, this can lead to cleaner code generation.
1790 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1791 ReductionFlags Flags) const;
1792
1793 /// Return true if the loop vectorizer should consider vectorizing an
1794 /// otherwise scalar epilogue loop.
1795 bool preferEpilogueVectorization() const;
1796
1797 /// \returns True if the target wants to expand the given reduction intrinsic
1798 /// into a shuffle sequence.
1799 bool shouldExpandReduction(const IntrinsicInst *II) const;
1800
1802
1803 /// \returns The shuffle sequence pattern used to expand the given reduction
1804 /// intrinsic.
1807
1808 /// \returns the size cost of rematerializing a GlobalValue address relative
1809 /// to a stack reload.
1810 unsigned getGISelRematGlobalCost() const;
1811
1812 /// \returns the lower bound of a trip count to decide on vectorization
1813 /// while tail-folding.
1814 unsigned getMinTripCountTailFoldingThreshold() const;
1815
1816 /// \returns True if the target supports scalable vectors.
1817 bool supportsScalableVectors() const;
1818
1819 /// \return true when scalable vectorization is preferred.
1820 bool enableScalableVectorization() const;
1821
1822 /// \name Vector Predication Information
1823 /// @{
1824 /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1825 /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1826 /// Reference - "Vector Predication Intrinsics").
1827 /// Use of %evl is discouraged when that is not the case.
1828 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1829 Align Alignment) const;
1830
1831 /// Return true if sinking I's operands to the same basic block as I is
1832 /// profitable, e.g. because the operands can be folded into a target
1833 /// instruction during instruction selection. After calling the function
1834 /// \p Ops contains the Uses to sink ordered by dominance (dominating users
1835 /// come first).
1837 SmallVectorImpl<Use *> &Ops) const;
1838
1839 /// Return true if it's significantly cheaper to shift a vector by a uniform
1840 /// scalar than by an amount which will vary across each lane. On x86 before
1841 /// AVX2 for example, there is a "psllw" instruction for the former case, but
1842 /// no simple instruction for a general "a << b" operation on vectors.
1843 /// This should also apply to lowering for vector funnel shifts (rotates).
1844 bool isVectorShiftByScalarCheap(Type *Ty) const;
1845
1848 // keep the predicating parameter
1850 // where legal, discard the predicate parameter
1852 // transform into something else that is also predicating
1853 Convert = 2
1855
1856 // How to transform the EVL parameter.
1857 // Legal: keep the EVL parameter as it is.
1858 // Discard: Ignore the EVL parameter where it is safe to do so.
1859 // Convert: Fold the EVL into the mask parameter.
1861
1862 // How to transform the operator.
1863 // Legal: The target supports this operator.
1864 // Convert: Convert this to a non-VP operation.
1865 // The 'Discard' strategy is invalid.
1867
1868 bool shouldDoNothing() const {
1869 return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1870 }
1873 };
1874
1875 /// \returns How the target needs this vector-predicated operation to be
1876 /// transformed.
1878 /// @}
1879
1880 /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1881 /// state.
1882 ///
1883 /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1884 /// node containing a jump table in a format suitable for the target, so it
1885 /// needs to know what format of jump table it can legally use.
1886 ///
1887 /// For non-Arm targets, this function isn't used. It defaults to returning
1888 /// false, but it shouldn't matter what it returns anyway.
1889 bool hasArmWideBranch(bool Thumb) const;
1890
1891 /// Returns a bitmask constructed from the target-features or fmv-features
1892 /// metadata of a function.
1893 uint64_t getFeatureMask(const Function &F) const;
1894
1895 /// Returns true if this is an instance of a function with multiple versions.
1896 bool isMultiversionedFunction(const Function &F) const;
1897
1898 /// \return The maximum number of function arguments the target supports.
1899 unsigned getMaxNumArgs() const;
1900
1901 /// \return For an array of given Size, return alignment boundary to
1902 /// pad to. Default is no padding.
1903 unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const;
1904
1905 /// @}
1906
1907 /// Collect kernel launch bounds for \p F into \p LB.
1909 const Function &F,
1910 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
1911
1912private:
1913 /// The abstract base class used to type erase specific TTI
1914 /// implementations.
1915 class Concept;
1916
1917 /// The template model for the base class which wraps a concrete
1918 /// implementation in a type erased interface.
1919 template <typename T> class Model;
1920
1921 std::unique_ptr<Concept> TTIImpl;
1922};
1923
1925public:
1926 virtual ~Concept() = 0;
1927 virtual const DataLayout &getDataLayout() const = 0;
1928 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1930 Type *AccessType,
1932 virtual InstructionCost
1934 const TTI::PointersChainInfo &Info, Type *AccessTy,
1936 virtual unsigned getInliningThresholdMultiplier() const = 0;
1938 virtual unsigned
1940 virtual int getInliningLastCallToStaticBonus() const = 0;
1941 virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1942 virtual int getInlinerVectorBonusPercent() const = 0;
1943 virtual unsigned getCallerAllocaCost(const CallBase *CB,
1944 const AllocaInst *AI) const = 0;
1947 virtual unsigned
1949 ProfileSummaryInfo *PSI,
1950 BlockFrequencyInfo *BFI) = 0;
1956 virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
1957 virtual bool isSourceOfDivergence(const Value *V) = 0;
1958 virtual bool isAlwaysUniform(const Value *V) = 0;
1959 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1960 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
1961 virtual unsigned getFlatAddressSpace() = 0;
1963 Intrinsic::ID IID) const = 0;
1964 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1965 virtual bool
1967 virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1968 virtual bool isSingleThreaded() const = 0;
1969 virtual std::pair<const Value *, unsigned>
1970 getPredicatedAddrSpace(const Value *V) const = 0;
1972 Value *OldV,
1973 Value *NewV) const = 0;
1974 virtual bool isLoweredToCall(const Function *F) = 0;
1977 OptimizationRemarkEmitter *ORE) = 0;
1979 PeelingPreferences &PP) = 0;
1981 AssumptionCache &AC,
1982 TargetLibraryInfo *LibInfo,
1983 HardwareLoopInfo &HWLoopInfo) = 0;
1984 virtual unsigned getEpilogueVectorizationMinVF() = 0;
1986 virtual TailFoldingStyle
1987 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
1988 virtual std::optional<Instruction *> instCombineIntrinsic(
1989 InstCombiner &IC, IntrinsicInst &II) = 0;
1990 virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1991 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1992 KnownBits & Known, bool &KnownBitsComputed) = 0;
1993 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1994 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1995 APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1996 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1997 SimplifyAndSetOp) = 0;
1998 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1999 virtual bool isLegalAddScalableImmediate(int64_t Imm) = 0;
2000 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
2001 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
2002 int64_t BaseOffset, bool HasBaseReg,
2003 int64_t Scale, unsigned AddrSpace,
2004 Instruction *I,
2005 int64_t ScalableOffset) = 0;
2007 const TargetTransformInfo::LSRCost &C2) = 0;
2008 virtual bool isNumRegsMajorCostOfLSR() = 0;
2011 virtual bool canMacroFuseCmp() = 0;
2012 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
2014 TargetLibraryInfo *LibInfo) = 0;
2015 virtual AddressingModeKind
2017 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
2018 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
2019 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
2020 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
2021 virtual bool isLegalBroadcastLoad(Type *ElementTy,
2022 ElementCount NumElements) const = 0;
2023 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
2024 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
2026 Align Alignment) = 0;
2028 Align Alignment) = 0;
2029 virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) = 0;
2030 virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) = 0;
2031 virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) = 0;
2032 virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
2033 Align Alignment,
2034 unsigned AddrSpace) = 0;
2035
2036 virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) = 0;
2037 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
2038 unsigned Opcode1,
2039 const SmallBitVector &OpcodeMask) const = 0;
2040 virtual bool enableOrderedReductions() = 0;
2041 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
2042 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
2045 StackOffset BaseOffset,
2046 bool HasBaseReg, int64_t Scale,
2047 unsigned AddrSpace) = 0;
2048 virtual bool LSRWithInstrQueries() = 0;
2049 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
2051 virtual bool useAA() = 0;
2052 virtual bool isTypeLegal(Type *Ty) = 0;
2053 virtual unsigned getRegUsageForType(Type *Ty) = 0;
2054 virtual bool shouldBuildLookupTables() = 0;
2056 virtual bool shouldBuildRelLookupTables() = 0;
2057 virtual bool useColdCCForColdCall(Function &F) = 0;
2060 unsigned ScalarOpdIdx) = 0;
2062 int OpdIdx) = 0;
2063 virtual bool
2065 int RetIdx) = 0;
2066 virtual InstructionCost
2068 bool Insert, bool Extract, TargetCostKind CostKind,
2069 ArrayRef<Value *> VL = {}) = 0;
2070 virtual InstructionCost
2072 ArrayRef<Type *> Tys,
2075 virtual bool supportsTailCalls() = 0;
2076 virtual bool supportsTailCallFor(const CallBase *CB) = 0;
2077 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
2079 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
2080 virtual bool enableSelectOptimize() = 0;
2086 unsigned BitWidth,
2087 unsigned AddressSpace,
2088 Align Alignment,
2089 unsigned *Fast) = 0;
2090 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
2091 virtual bool haveFastSqrt(Type *Ty) = 0;
2093 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
2095 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2096 const APInt &Imm, Type *Ty) = 0;
2097 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2099 virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2100 const APInt &Imm, Type *Ty,
2102 Instruction *Inst = nullptr) = 0;
2104 const APInt &Imm, Type *Ty,
2107 const Function &Fn) const = 0;
2108 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
2109 virtual bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const = 0;
2110 virtual unsigned getRegisterClassForType(bool Vector,
2111 Type *Ty = nullptr) const = 0;
2112 virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
2114 virtual unsigned getMinVectorRegisterBitWidth() const = 0;
2115 virtual std::optional<unsigned> getMaxVScale() const = 0;
2116 virtual std::optional<unsigned> getVScaleForTuning() const = 0;
2117 virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
2118 virtual bool
2120 virtual ElementCount getMinimumVF(unsigned ElemWidth,
2121 bool IsScalable) const = 0;
2122 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
2123 virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2124 Type *ScalarValTy) const = 0;
2126 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
2127 virtual unsigned getCacheLineSize() const = 0;
2128 virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
2129 virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
2130 const = 0;
2131 virtual std::optional<unsigned> getMinPageSize() const = 0;
2132
2133 /// \return How much before a load we should place the prefetch
2134 /// instruction. This is currently measured in number of
2135 /// instructions.
2136 virtual unsigned getPrefetchDistance() const = 0;
2137
2138 /// \return Some HW prefetchers can handle accesses up to a certain
2139 /// constant stride. This is the minimum stride in bytes where it
2140 /// makes sense to start adding SW prefetches. The default is 1,
2141 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
2142 /// even below the HW prefetcher limit, and the arguments provided are
2143 /// meant to serve as a basis for deciding this for a particular loop.
2144 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2145 unsigned NumStridedMemAccesses,
2146 unsigned NumPrefetches,
2147 bool HasCall) const = 0;
2148
2149 /// \return The maximum number of iterations to prefetch ahead. If
2150 /// the required number of iterations is more than this number, no
2151 /// prefetching is performed.
2152 virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
2153
2154 /// \return True if prefetching should also be done for writes.
2155 virtual bool enableWritePrefetching() const = 0;
2156
2157 /// \return if target want to issue a prefetch in address space \p AS.
2158 virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
2159
2160 /// \return The cost of a partial reduction, which is a reduction from a
2161 /// vector to another vector with fewer elements of larger size. They are
2162 /// represented by the llvm.experimental.partial.reduce.add intrinsic, which
2163 /// takes an accumulator and a binary operation operand that itself is fed by
2164 /// two extends. An example of an operation that uses a partial reduction is a
2165 /// dot product, which reduces two vectors to another of 4 times fewer and 4
2166 /// times larger elements.
2167 virtual InstructionCost
2168 getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB,
2169 Type *AccumType, ElementCount VF,
2172 std::optional<unsigned> BinOp) const = 0;
2173
2174 virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
2176 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2177 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2178 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
2180 VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2181 const SmallBitVector &OpcodeMask,
2183
2184 virtual InstructionCost
2187 ArrayRef<const Value *> Args, const Instruction *CxtI) = 0;
2188 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
2189 Type *Src, CastContextHint CCH,
2191 const Instruction *I) = 0;
2192 virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2193 VectorType *VecTy,
2194 unsigned Index) = 0;
2195 virtual InstructionCost getCFInstrCost(unsigned Opcode,
2197 const Instruction *I = nullptr) = 0;
2198 virtual InstructionCost
2199 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2201 OperandValueInfo Op1Info, OperandValueInfo Op2Info,
2202 const Instruction *I) = 0;
2203 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2205 unsigned Index, Value *Op0,
2206 Value *Op1) = 0;
2207
2208 /// \param ScalarUserAndIdx encodes the information about extracts from a
2209 /// vector with 'Scalar' being the value being extracted,'User' being the user
2210 /// of the extract(nullptr if user is not known before vectorization) and
2211 /// 'Idx' being the extract lane.
2213 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
2214 Value *Scalar,
2215 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) = 0;
2216
2219 unsigned Index) = 0;
2220
2221 virtual InstructionCost
2222 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2223 const APInt &DemandedDstElts,
2225
2226 virtual InstructionCost
2227 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2229 OperandValueInfo OpInfo, const Instruction *I) = 0;
2230 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
2231 Align Alignment,
2232 unsigned AddressSpace,
2234 const Instruction *I) = 0;
2235 virtual InstructionCost
2236 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2237 unsigned AddressSpace,
2239 virtual InstructionCost
2240 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2241 bool VariableMask, Align Alignment,
2243 const Instruction *I = nullptr) = 0;
2245 unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
2246 TTI::TargetCostKind CostKind, const Instruction *I = nullptr) = 0;
2247 virtual InstructionCost
2248 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2249 bool VariableMask, Align Alignment,
2251 const Instruction *I = nullptr) = 0;
2252
2254 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2255 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2256 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
2257 virtual InstructionCost
2259 std::optional<FastMathFlags> FMF,
2261 virtual InstructionCost
2265 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2266 FastMathFlags FMF,
2269 bool IsUnsigned, Type *ResTy, VectorType *Ty,
2271 virtual InstructionCost
2275 ArrayRef<Type *> Tys,
2277 virtual unsigned getNumberOfParts(Type *Tp) = 0;
2278 virtual InstructionCost
2280 virtual InstructionCost
2283 MemIntrinsicInfo &Info) = 0;
2284 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
2286 Type *ExpectedType) = 0;
2288 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2289 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
2290 std::optional<uint32_t> AtomicElementSize) const = 0;
2291
2293 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2294 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2295 Align SrcAlign, Align DestAlign,
2296 std::optional<uint32_t> AtomicCpySize) const = 0;
2297 virtual bool areInlineCompatible(const Function *Caller,
2298 const Function *Callee) const = 0;
2299 virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
2300 unsigned DefaultCallPenalty) const = 0;
2301 virtual bool areTypesABICompatible(const Function *Caller,
2302 const Function *Callee,
2303 const ArrayRef<Type *> &Types) const = 0;
2304 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2305 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
2306 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
2307 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
2308 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
2309 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
2310 Align Alignment,
2311 unsigned AddrSpace) const = 0;
2312 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
2313 Align Alignment,
2314 unsigned AddrSpace) const = 0;
2316 ElementCount VF) const = 0;
2317 virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
2318 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2319 unsigned ChainSizeInBytes,
2320 VectorType *VecTy) const = 0;
2321 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2322 unsigned ChainSizeInBytes,
2323 VectorType *VecTy) const = 0;
2324 virtual bool preferFixedOverScalableIfEqualCost() const = 0;
2325 virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2326 ReductionFlags) const = 0;
2327 virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2328 ReductionFlags) const = 0;
2329 virtual bool preferEpilogueVectorization() const = 0;
2330
2331 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
2332 virtual ReductionShuffle
2334 virtual unsigned getGISelRematGlobalCost() const = 0;
2335 virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
2336 virtual bool enableScalableVectorization() const = 0;
2337 virtual bool supportsScalableVectors() const = 0;
2338 virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2339 Align Alignment) const = 0;
2340 virtual bool
2342 SmallVectorImpl<Use *> &OpsToSink) const = 0;
2343
2344 virtual bool isVectorShiftByScalarCheap(Type *Ty) const = 0;
2345 virtual VPLegalization
2347 virtual bool hasArmWideBranch(bool Thumb) const = 0;
2348 virtual uint64_t getFeatureMask(const Function &F) const = 0;
2349 virtual bool isMultiversionedFunction(const Function &F) const = 0;
2350 virtual unsigned getMaxNumArgs() const = 0;
2351 virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
2352 Type *ArrayType) const = 0;
2354 const Function &F,
2355 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const = 0;
2356};
2357
2358template <typename T>
2359class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2360 T Impl;
2361
2362public:
2363 Model(T Impl) : Impl(std::move(Impl)) {}
2364 ~Model() override = default;
2365
2366 const DataLayout &getDataLayout() const override {
2367 return Impl.getDataLayout();
2368 }
2369
2370 InstructionCost
2371 getGEPCost(Type *PointeeType, const Value *Ptr,
2372 ArrayRef<const Value *> Operands, Type *AccessType,
2374 return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
2375 }
2376 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2377 const Value *Base,
2378 const PointersChainInfo &Info,
2379 Type *AccessTy,
2380 TargetCostKind CostKind) override {
2381 return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
2382 }
2383 unsigned getInliningThresholdMultiplier() const override {
2384 return Impl.getInliningThresholdMultiplier();
2385 }
2386 unsigned adjustInliningThreshold(const CallBase *CB) override {
2387 return Impl.adjustInliningThreshold(CB);
2388 }
2389 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const override {
2390 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2391 }
2392 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
2393 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2394 }
2395 int getInliningLastCallToStaticBonus() const override {
2396 return Impl.getInliningLastCallToStaticBonus();
2397 }
2398 int getInlinerVectorBonusPercent() const override {
2399 return Impl.getInlinerVectorBonusPercent();
2400 }
2401 unsigned getCallerAllocaCost(const CallBase *CB,
2402 const AllocaInst *AI) const override {
2403 return Impl.getCallerAllocaCost(CB, AI);
2404 }
2405 InstructionCost getMemcpyCost(const Instruction *I) override {
2406 return Impl.getMemcpyCost(I);
2407 }
2408
2409 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
2410 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2411 }
2412
2413 InstructionCost getInstructionCost(const User *U,
2414 ArrayRef<const Value *> Operands,
2415 TargetCostKind CostKind) override {
2416 return Impl.getInstructionCost(U, Operands, CostKind);
2417 }
2418 BranchProbability getPredictableBranchThreshold() override {
2419 return Impl.getPredictableBranchThreshold();
2420 }
2421 InstructionCost getBranchMispredictPenalty() override {
2422 return Impl.getBranchMispredictPenalty();
2423 }
2424 bool hasBranchDivergence(const Function *F = nullptr) override {
2425 return Impl.hasBranchDivergence(F);
2426 }
2427 bool isSourceOfDivergence(const Value *V) override {
2428 return Impl.isSourceOfDivergence(V);
2429 }
2430
2431 bool isAlwaysUniform(const Value *V) override {
2432 return Impl.isAlwaysUniform(V);
2433 }
2434
2435 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2436 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2437 }
2438
2439 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
2440 return Impl.addrspacesMayAlias(AS0, AS1);
2441 }
2442
2443 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2444
2445 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2446 Intrinsic::ID IID) const override {
2447 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2448 }
2449
2450 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2451 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2452 }
2453
2454 bool
2455 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2456 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2457 }
2458
2459 unsigned getAssumedAddrSpace(const Value *V) const override {
2460 return Impl.getAssumedAddrSpace(V);
2461 }
2462
2463 bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2464
2465 std::pair<const Value *, unsigned>
2466 getPredicatedAddrSpace(const Value *V) const override {
2467 return Impl.getPredicatedAddrSpace(V);
2468 }
2469
2470 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2471 Value *NewV) const override {
2472 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2473 }
2474
2475 bool isLoweredToCall(const Function *F) override {
2476 return Impl.isLoweredToCall(F);
2477 }
2478 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2479 UnrollingPreferences &UP,
2480 OptimizationRemarkEmitter *ORE) override {
2481 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2482 }
2483 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2484 PeelingPreferences &PP) override {
2485 return Impl.getPeelingPreferences(L, SE, PP);
2486 }
2487 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2488 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2489 HardwareLoopInfo &HWLoopInfo) override {
2490 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2491 }
2492 unsigned getEpilogueVectorizationMinVF() override {
2493 return Impl.getEpilogueVectorizationMinVF();
2494 }
2495 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
2496 return Impl.preferPredicateOverEpilogue(TFI);
2497 }
2499 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2500 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2501 }
2502 std::optional<Instruction *>
2503 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2504 return Impl.instCombineIntrinsic(IC, II);
2505 }
2506 std::optional<Value *>
2507 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2508 APInt DemandedMask, KnownBits &Known,
2509 bool &KnownBitsComputed) override {
2510 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2511 KnownBitsComputed);
2512 }
2513 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2514 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2515 APInt &UndefElts2, APInt &UndefElts3,
2516 std::function<void(Instruction *, unsigned, APInt, APInt &)>
2517 SimplifyAndSetOp) override {
2518 return Impl.simplifyDemandedVectorEltsIntrinsic(
2519 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2520 SimplifyAndSetOp);
2521 }
2522 bool isLegalAddImmediate(int64_t Imm) override {
2523 return Impl.isLegalAddImmediate(Imm);
2524 }
2525 bool isLegalAddScalableImmediate(int64_t Imm) override {
2526 return Impl.isLegalAddScalableImmediate(Imm);
2527 }
2528 bool isLegalICmpImmediate(int64_t Imm) override {
2529 return Impl.isLegalICmpImmediate(Imm);
2530 }
2531 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2532 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2533 Instruction *I, int64_t ScalableOffset) override {
2534 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2535 AddrSpace, I, ScalableOffset);
2536 }
2537 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2538 const TargetTransformInfo::LSRCost &C2) override {
2539 return Impl.isLSRCostLess(C1, C2);
2540 }
2541 bool isNumRegsMajorCostOfLSR() override {
2542 return Impl.isNumRegsMajorCostOfLSR();
2543 }
2544 bool shouldDropLSRSolutionIfLessProfitable() const override {
2545 return Impl.shouldDropLSRSolutionIfLessProfitable();
2546 }
2547 bool isProfitableLSRChainElement(Instruction *I) override {
2548 return Impl.isProfitableLSRChainElement(I);
2549 }
2550 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2551 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2552 DominatorTree *DT, AssumptionCache *AC,
2553 TargetLibraryInfo *LibInfo) override {
2554 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2555 }
2557 getPreferredAddressingMode(const Loop *L,
2558 ScalarEvolution *SE) const override {
2559 return Impl.getPreferredAddressingMode(L, SE);
2560 }
2561 bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2562 return Impl.isLegalMaskedStore(DataType, Alignment);
2563 }
2564 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2565 return Impl.isLegalMaskedLoad(DataType, Alignment);
2566 }
2567 bool isLegalNTStore(Type *DataType, Align Alignment) override {
2568 return Impl.isLegalNTStore(DataType, Alignment);
2569 }
2570 bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2571 return Impl.isLegalNTLoad(DataType, Alignment);
2572 }
2573 bool isLegalBroadcastLoad(Type *ElementTy,
2574 ElementCount NumElements) const override {
2575 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2576 }
2577 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2578 return Impl.isLegalMaskedScatter(DataType, Alignment);
2579 }
2580 bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2581 return Impl.isLegalMaskedGather(DataType, Alignment);
2582 }
2583 bool forceScalarizeMaskedGather(VectorType *DataType,
2584 Align Alignment) override {
2585 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2586 }
2587 bool forceScalarizeMaskedScatter(VectorType *DataType,
2588 Align Alignment) override {
2589 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2590 }
2591 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) override {
2592 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2593 }
2594 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) override {
2595 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2596 }
2597 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) override {
2598 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2599 }
2600 bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
2601 Align Alignment,
2602 unsigned AddrSpace) override {
2603 return Impl.isLegalInterleavedAccessType(VTy, Factor, Alignment, AddrSpace);
2604 }
2605 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) override {
2606 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2607 }
2608 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2609 const SmallBitVector &OpcodeMask) const override {
2610 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2611 }
2612 bool enableOrderedReductions() override {
2613 return Impl.enableOrderedReductions();
2614 }
2615 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2616 return Impl.hasDivRemOp(DataType, IsSigned);
2617 }
2618 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2619 return Impl.hasVolatileVariant(I, AddrSpace);
2620 }
2621 bool prefersVectorizedAddressing() override {
2622 return Impl.prefersVectorizedAddressing();
2623 }
2624 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2625 StackOffset BaseOffset, bool HasBaseReg,
2626 int64_t Scale,
2627 unsigned AddrSpace) override {
2628 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2629 AddrSpace);
2630 }
2631 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2632 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2633 return Impl.isTruncateFree(Ty1, Ty2);
2634 }
2635 bool isProfitableToHoist(Instruction *I) override {
2636 return Impl.isProfitableToHoist(I);
2637 }
2638 bool useAA() override { return Impl.useAA(); }
2639 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2640 unsigned getRegUsageForType(Type *Ty) override {
2641 return Impl.getRegUsageForType(Ty);
2642 }
2643 bool shouldBuildLookupTables() override {
2644 return Impl.shouldBuildLookupTables();
2645 }
2646 bool shouldBuildLookupTablesForConstant(Constant *C) override {
2647 return Impl.shouldBuildLookupTablesForConstant(C);
2648 }
2649 bool shouldBuildRelLookupTables() override {
2650 return Impl.shouldBuildRelLookupTables();
2651 }
2652 bool useColdCCForColdCall(Function &F) override {
2653 return Impl.useColdCCForColdCall(F);
2654 }
2655 bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) override {
2656 return Impl.isTargetIntrinsicTriviallyScalarizable(ID);
2657 }
2658
2659 bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
2660 unsigned ScalarOpdIdx) override {
2661 return Impl.isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx);
2662 }
2663
2664 bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
2665 int OpdIdx) override {
2666 return Impl.isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx);
2667 }
2668
2669 bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID,
2670 int RetIdx) override {
2671 return Impl.isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);
2672 }
2673
2674 InstructionCost getScalarizationOverhead(VectorType *Ty,
2675 const APInt &DemandedElts,
2676 bool Insert, bool Extract,
2678 ArrayRef<Value *> VL = {}) override {
2679 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2680 CostKind, VL);
2681 }
2682 InstructionCost
2683 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2684 ArrayRef<Type *> Tys,
2685 TargetCostKind CostKind) override {
2686 return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2687 }
2688
2689 bool supportsEfficientVectorElementLoadStore() override {
2690 return Impl.supportsEfficientVectorElementLoadStore();
2691 }
2692
2693 bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2694 bool supportsTailCallFor(const CallBase *CB) override {
2695 return Impl.supportsTailCallFor(CB);
2696 }
2697
2698 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2699 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2700 }
2701 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2702 bool IsZeroCmp) const override {
2703 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2704 }
2705 bool enableSelectOptimize() override {
2706 return Impl.enableSelectOptimize();
2707 }
2708 bool shouldTreatInstructionLikeSelect(const Instruction *I) override {
2709 return Impl.shouldTreatInstructionLikeSelect(I);
2710 }
2711 bool enableInterleavedAccessVectorization() override {
2712 return Impl.enableInterleavedAccessVectorization();
2713 }
2714 bool enableMaskedInterleavedAccessVectorization() override {
2715 return Impl.enableMaskedInterleavedAccessVectorization();
2716 }
2717 bool isFPVectorizationPotentiallyUnsafe() override {
2718 return Impl.isFPVectorizationPotentiallyUnsafe();
2719 }
2720 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2721 unsigned AddressSpace, Align Alignment,
2722 unsigned *Fast) override {
2723 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2724 Alignment, Fast);
2725 }
2726 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2727 return Impl.getPopcntSupport(IntTyWidthInBit);
2728 }
2729 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2730
2731 bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2732 return Impl.isExpensiveToSpeculativelyExecute(I);
2733 }
2734
2735 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2736 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2737 }
2738
2739 InstructionCost getFPOpCost(Type *Ty) override {
2740 return Impl.getFPOpCost(Ty);
2741 }
2742
2743 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2744 const APInt &Imm, Type *Ty) override {
2745 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2746 }
2747 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2748 TargetCostKind CostKind) override {
2749 return Impl.getIntImmCost(Imm, Ty, CostKind);
2750 }
2751 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2752 const APInt &Imm, Type *Ty,
2754 Instruction *Inst = nullptr) override {
2755 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2756 }
2757 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2758 const APInt &Imm, Type *Ty,
2759 TargetCostKind CostKind) override {
2760 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2761 }
2762 bool preferToKeepConstantsAttached(const Instruction &Inst,
2763 const Function &Fn) const override {
2764 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2765 }
2766 unsigned getNumberOfRegisters(unsigned ClassID) const override {
2767 return Impl.getNumberOfRegisters(ClassID);
2768 }
2769 bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const override {
2770 return Impl.hasConditionalLoadStoreForType(Ty);
2771 }
2772 unsigned getRegisterClassForType(bool Vector,
2773 Type *Ty = nullptr) const override {
2774 return Impl.getRegisterClassForType(Vector, Ty);
2775 }
2776 const char *getRegisterClassName(unsigned ClassID) const override {
2777 return Impl.getRegisterClassName(ClassID);
2778 }
2779 TypeSize getRegisterBitWidth(RegisterKind K) const override {
2780 return Impl.getRegisterBitWidth(K);
2781 }
2782 unsigned getMinVectorRegisterBitWidth() const override {
2783 return Impl.getMinVectorRegisterBitWidth();
2784 }
2785 std::optional<unsigned> getMaxVScale() const override {
2786 return Impl.getMaxVScale();
2787 }
2788 std::optional<unsigned> getVScaleForTuning() const override {
2789 return Impl.getVScaleForTuning();
2790 }
2791 bool isVScaleKnownToBeAPowerOfTwo() const override {
2792 return Impl.isVScaleKnownToBeAPowerOfTwo();
2793 }
2794 bool shouldMaximizeVectorBandwidth(
2795 TargetTransformInfo::RegisterKind K) const override {
2796 return Impl.shouldMaximizeVectorBandwidth(K);
2797 }
2798 ElementCount getMinimumVF(unsigned ElemWidth,
2799 bool IsScalable) const override {
2800 return Impl.getMinimumVF(ElemWidth, IsScalable);
2801 }
2802 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2803 return Impl.getMaximumVF(ElemWidth, Opcode);
2804 }
2805 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2806 Type *ScalarValTy) const override {
2807 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2808 }
2809 bool shouldConsiderAddressTypePromotion(
2810 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2811 return Impl.shouldConsiderAddressTypePromotion(
2812 I, AllowPromotionWithoutCommonHeader);
2813 }
2814 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2815 std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2816 return Impl.getCacheSize(Level);
2817 }
2818 std::optional<unsigned>
2819 getCacheAssociativity(CacheLevel Level) const override {
2820 return Impl.getCacheAssociativity(Level);
2821 }
2822
2823 std::optional<unsigned> getMinPageSize() const override {
2824 return Impl.getMinPageSize();
2825 }
2826
2827 /// Return the preferred prefetch distance in terms of instructions.
2828 ///
2829 unsigned getPrefetchDistance() const override {
2830 return Impl.getPrefetchDistance();
2831 }
2832
2833 /// Return the minimum stride necessary to trigger software
2834 /// prefetching.
2835 ///
2836 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2837 unsigned NumStridedMemAccesses,
2838 unsigned NumPrefetches,
2839 bool HasCall) const override {
2840 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2841 NumPrefetches, HasCall);
2842 }
2843
2844 /// Return the maximum prefetch distance in terms of loop
2845 /// iterations.
2846 ///
2847 unsigned getMaxPrefetchIterationsAhead() const override {
2848 return Impl.getMaxPrefetchIterationsAhead();
2849 }
2850
2851 /// \return True if prefetching should also be done for writes.
2852 bool enableWritePrefetching() const override {
2853 return Impl.enableWritePrefetching();
2854 }
2855
2856 /// \return if target want to issue a prefetch in address space \p AS.
2857 bool shouldPrefetchAddressSpace(unsigned AS) const override {
2858 return Impl.shouldPrefetchAddressSpace(AS);
2859 }
2860
2861 InstructionCost getPartialReductionCost(
2862 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
2863 ElementCount VF, PartialReductionExtendKind OpAExtend,
2865 std::optional<unsigned> BinOp = std::nullopt) const override {
2866 return Impl.getPartialReductionCost(Opcode, InputTypeA, InputTypeB,
2867 AccumType, VF, OpAExtend, OpBExtend,
2868 BinOp);
2869 }
2870
2871 unsigned getMaxInterleaveFactor(ElementCount VF) override {
2872 return Impl.getMaxInterleaveFactor(VF);
2873 }
2874 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2875 unsigned &JTSize,
2876 ProfileSummaryInfo *PSI,
2877 BlockFrequencyInfo *BFI) override {
2878 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2879 }
2880 InstructionCost getArithmeticInstrCost(
2881 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2882 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2883 ArrayRef<const Value *> Args,
2884 const Instruction *CxtI = nullptr) override {
2885 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2886 Args, CxtI);
2887 }
2888 InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
2889 unsigned Opcode1,
2890 const SmallBitVector &OpcodeMask,
2891 TTI::TargetCostKind CostKind) const override {
2892 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
2893 }
2894
2895 InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2896 ArrayRef<int> Mask,
2898 VectorType *SubTp,
2899 ArrayRef<const Value *> Args,
2900 const Instruction *CxtI) override {
2901 return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args,
2902 CxtI);
2903 }
2904 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2905 CastContextHint CCH,
2907 const Instruction *I) override {
2908 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2909 }
2910 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2911 VectorType *VecTy,
2912 unsigned Index) override {
2913 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2914 }
2915 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2916 const Instruction *I = nullptr) override {
2917 return Impl.getCFInstrCost(Opcode, CostKind, I);
2918 }
2919 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2920 CmpInst::Predicate VecPred,
2922 OperandValueInfo Op1Info,
2923 OperandValueInfo Op2Info,
2924 const Instruction *I) override {
2925 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
2926 Op1Info, Op2Info, I);
2927 }
2928 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2930 unsigned Index, Value *Op0,
2931 Value *Op1) override {
2932 return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2933 }
2934 InstructionCost getVectorInstrCost(
2935 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
2936 Value *Scalar,
2937 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) override {
2938 return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Scalar,
2939 ScalarUserAndIdx);
2940 }
2941 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2943 unsigned Index) override {
2944 return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2945 }
2946 InstructionCost
2947 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2948 const APInt &DemandedDstElts,
2949 TTI::TargetCostKind CostKind) override {
2950 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2951 DemandedDstElts, CostKind);
2952 }
2953 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2954 unsigned AddressSpace,
2956 OperandValueInfo OpInfo,
2957 const Instruction *I) override {
2958 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2959 OpInfo, I);
2960 }
2961 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2962 unsigned AddressSpace,
2964 const Instruction *I) override {
2965 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2966 CostKind, I);
2967 }
2968 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2969 Align Alignment, unsigned AddressSpace,
2970 TTI::TargetCostKind CostKind) override {
2971 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2972 CostKind);
2973 }
2974 InstructionCost
2975 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2976 bool VariableMask, Align Alignment,
2978 const Instruction *I = nullptr) override {
2979 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2980 Alignment, CostKind, I);
2981 }
2982 InstructionCost getExpandCompressMemoryOpCost(
2983 unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
2984 TTI::TargetCostKind CostKind, const Instruction *I = nullptr) override {
2985 return Impl.getExpandCompressMemoryOpCost(Opcode, DataTy, VariableMask,
2986 Alignment, CostKind, I);
2987 }
2988 InstructionCost
2989 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2990 bool VariableMask, Align Alignment,
2992 const Instruction *I = nullptr) override {
2993 return Impl.getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
2994 Alignment, CostKind, I);
2995 }
2996 InstructionCost getInterleavedMemoryOpCost(
2997 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2998 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2999 bool UseMaskForCond, bool UseMaskForGaps) override {
3000 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
3001 Alignment, AddressSpace, CostKind,
3002 UseMaskForCond, UseMaskForGaps);
3003 }
3004 InstructionCost
3005 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
3006 std::optional<FastMathFlags> FMF,
3007 TTI::TargetCostKind CostKind) override {
3008 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
3009 }
3010 InstructionCost
3011 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
3012 TTI::TargetCostKind CostKind) override {
3013 return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
3014 }
3015 InstructionCost
3016 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
3017 VectorType *Ty, FastMathFlags FMF,
3018 TTI::TargetCostKind CostKind) override {
3019 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
3020 CostKind);
3021 }
3022 InstructionCost
3023 getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
3024 TTI::TargetCostKind CostKind) override {
3025 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
3026 }
3027 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
3028 TTI::TargetCostKind CostKind) override {
3029 return Impl.getIntrinsicInstrCost(ICA, CostKind);
3030 }
3031 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
3032 ArrayRef<Type *> Tys,
3033 TTI::TargetCostKind CostKind) override {
3034 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
3035 }
3036 unsigned getNumberOfParts(Type *Tp) override {
3037 return Impl.getNumberOfParts(Tp);
3038 }
3039 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
3040 const SCEV *Ptr) override {
3041 return Impl.getAddressComputationCost(Ty, SE, Ptr);
3042 }
3043 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
3044 return Impl.getCostOfKeepingLiveOverCall(Tys);
3045 }
3046 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
3047 MemIntrinsicInfo &Info) override {
3048 return Impl.getTgtMemIntrinsic(Inst, Info);
3049 }
3050 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
3051 return Impl.getAtomicMemIntrinsicMaxElementSize();
3052 }
3053 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
3054 Type *ExpectedType) override {
3055 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
3056 }
3057 Type *getMemcpyLoopLoweringType(
3058 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
3059 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
3060 std::optional<uint32_t> AtomicElementSize) const override {
3061 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
3062 DestAddrSpace, SrcAlign, DestAlign,
3063 AtomicElementSize);
3064 }
3065 void getMemcpyLoopResidualLoweringType(
3066 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
3067 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
3068 Align SrcAlign, Align DestAlign,
3069 std::optional<uint32_t> AtomicCpySize) const override {
3070 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
3071 SrcAddrSpace, DestAddrSpace,
3072 SrcAlign, DestAlign, AtomicCpySize);
3073 }
3074 bool areInlineCompatible(const Function *Caller,
3075 const Function *Callee) const override {
3076 return Impl.areInlineCompatible(Caller, Callee);
3077 }
3078 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
3079 unsigned DefaultCallPenalty) const override {
3080 return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
3081 }
3082 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
3083 const ArrayRef<Type *> &Types) const override {
3084 return Impl.areTypesABICompatible(Caller, Callee, Types);
3085 }
3086 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
3087 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
3088 }
3089 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
3090 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
3091 }
3092 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
3093 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
3094 }
3095 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
3096 return Impl.isLegalToVectorizeLoad(LI);
3097 }
3098 bool isLegalToVectorizeStore(StoreInst *SI) const override {
3099 return Impl.isLegalToVectorizeStore(SI);
3100 }
3101 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
3102 unsigned AddrSpace) const override {
3103 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
3104 AddrSpace);
3105 }
3106 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
3107 unsigned AddrSpace) const override {
3108 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
3109 AddrSpace);
3110 }
3111 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
3112 ElementCount VF) const override {
3113 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
3114 }
3115 bool isElementTypeLegalForScalableVector(Type *Ty) const override {
3116 return Impl.isElementTypeLegalForScalableVector(Ty);
3117 }
3118 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
3119 unsigned ChainSizeInBytes,
3120 VectorType *VecTy) const override {
3121 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
3122 }
3123 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
3124 unsigned ChainSizeInBytes,
3125 VectorType *VecTy) const override {
3126 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
3127 }
3128 bool preferFixedOverScalableIfEqualCost() const override {
3129 return Impl.preferFixedOverScalableIfEqualCost();
3130 }
3131 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
3132 ReductionFlags Flags) const override {
3133 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
3134 }
3135 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
3136 ReductionFlags Flags) const override {
3137 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
3138 }
3139 bool preferEpilogueVectorization() const override {
3140 return Impl.preferEpilogueVectorization();
3141 }
3142
3143 bool shouldExpandReduction(const IntrinsicInst *II) const override {
3144 return Impl.shouldExpandReduction(II);
3145 }
3146
3148 getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override {
3149 return Impl.getPreferredExpandedReductionShuffle(II);
3150 }
3151
3152 unsigned getGISelRematGlobalCost() const override {
3153 return Impl.getGISelRematGlobalCost();
3154 }
3155
3156 unsigned getMinTripCountTailFoldingThreshold() const override {
3157 return Impl.getMinTripCountTailFoldingThreshold();
3158 }
3159
3160 bool supportsScalableVectors() const override {
3161 return Impl.supportsScalableVectors();
3162 }
3163
3164 bool enableScalableVectorization() const override {
3165 return Impl.enableScalableVectorization();
3166 }
3167
3168 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
3169 Align Alignment) const override {
3170 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
3171 }
3172
3173 bool isProfitableToSinkOperands(Instruction *I,
3174 SmallVectorImpl<Use *> &Ops) const override {
3175 return Impl.isProfitableToSinkOperands(I, Ops);
3176 };
3177
3178 bool isVectorShiftByScalarCheap(Type *Ty) const override {
3179 return Impl.isVectorShiftByScalarCheap(Ty);
3180 }
3181
3183 getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
3184 return Impl.getVPLegalizationStrategy(PI);
3185 }
3186
3187 bool hasArmWideBranch(bool Thumb) const override {
3188 return Impl.hasArmWideBranch(Thumb);
3189 }
3190
3191 uint64_t getFeatureMask(const Function &F) const override {
3192 return Impl.getFeatureMask(F);
3193 }
3194
3195 bool isMultiversionedFunction(const Function &F) const override {
3196 return Impl.isMultiversionedFunction(F);
3197 }
3198
3199 unsigned getMaxNumArgs() const override {
3200 return Impl.getMaxNumArgs();
3201 }
3202
3203 unsigned getNumBytesToPadGlobalArray(unsigned Size,
3204 Type *ArrayType) const override {
3205 return Impl.getNumBytesToPadGlobalArray(Size, ArrayType);
3206 }
3207
3208 void collectKernelLaunchBounds(
3209 const Function &F,
3210 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override {
3211 Impl.collectKernelLaunchBounds(F, LB);
3212 }
3213};
3214
3215template <typename T>
3217 : TTIImpl(new Model<T>(Impl)) {}
3218
3219/// Analysis pass providing the \c TargetTransformInfo.
3220///
3221/// The core idea of the TargetIRAnalysis is to expose an interface through
3222/// which LLVM targets can analyze and provide information about the middle
3223/// end's target-independent IR. This supports use cases such as target-aware
3224/// cost modeling of IR constructs.
3225///
3226/// This is a function analysis because much of the cost modeling for targets
3227/// is done in a subtarget specific way and LLVM supports compiling different
3228/// functions targeting different subtargets in order to support runtime
3229/// dispatch according to the observed subtarget.
3230class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
3231public:
3233
3234 /// Default construct a target IR analysis.
3235 ///
3236 /// This will use the module's datalayout to construct a baseline
3237 /// conservative TTI result.
3239
3240 /// Construct an IR analysis pass around a target-provide callback.
3241 ///
3242 /// The callback will be called with a particular function for which the TTI
3243 /// is needed and must return a TTI object for that function.
3244 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
3245
3246 // Value semantics. We spell out the constructors for MSVC.
3248 : TTICallback(Arg.TTICallback) {}
3250 : TTICallback(std::move(Arg.TTICallback)) {}
3252 TTICallback = RHS.TTICallback;
3253 return *this;
3254 }
3256 TTICallback = std::move(RHS.TTICallback);
3257 return *this;
3258 }
3259
3261
3262private:
3264 static AnalysisKey Key;
3265
3266 /// The callback used to produce a result.
3267 ///
3268 /// We use a completely opaque callback so that targets can provide whatever
3269 /// mechanism they desire for constructing the TTI for a given function.
3270 ///
3271 /// FIXME: Should we really use std::function? It's relatively inefficient.
3272 /// It might be possible to arrange for even stateful callbacks to outlive
3273 /// the analysis and thus use a function_ref which would be lighter weight.
3274 /// This may also be less error prone as the callback is likely to reference
3275 /// the external TargetMachine, and that reference needs to never dangle.
3276 std::function<Result(const Function &)> TTICallback;
3277
3278 /// Helper function used as the callback in the default constructor.
3279 static Result getDefaultTTI(const Function &F);
3280};
3281
3282/// Wrapper pass for TargetTransformInfo.
3283///
3284/// This pass can be constructed from a TTI object which it stores internally
3285/// and is queried by passes.
3287 TargetIRAnalysis TIRA;
3288 std::optional<TargetTransformInfo> TTI;
3289
3290 virtual void anchor();
3291
3292public:
3293 static char ID;
3294
3295 /// We must provide a default constructor for the pass but it should
3296 /// never be used.
3297 ///
3298 /// Use the constructor below or call one of the creation routines.
3300
3302
3304};
3305
3306/// Create an analysis pass wrapper around a TTI object.
3307///
3308/// This analysis pass just holds the TTI instance and makes it available to
3309/// clients.
3311
3312} // namespace llvm
3313
3314#endif
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
RelocType Type
Definition: COFFYAML.cpp:410
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint32_t Index
uint64_t Size
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Machine InstCombiner
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
Value * RHS
Class for arbitrary precision integers.
Definition: APInt.h:78
an instruction to allocate memory on the stack
Definition: Instructions.h:63
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Class to represent array types.
Definition: DerivedTypes.h:395
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:281
The core instruction combiner logic.
Definition: InstCombiner.h:48
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Definition: DerivedTypes.h:42
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:630
const SmallVectorImpl< Type * > & getArgTypes() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:292
Multiway switch.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
TargetTransformInfo & getTTI(const Function &F)
virtual bool preferFixedOverScalableIfEqualCost() const =0
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
virtual const DataLayout & getDataLayout() const =0
virtual InstructionCost getBranchMispredictPenalty()=0
virtual bool isProfitableLSRChainElement(Instruction *I)=0
virtual InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
virtual uint64_t getFeatureMask(const Function &F) const =0
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, OperandValueInfo Op1Info, OperandValueInfo Op2Info, const Instruction *I)=0
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I)=0
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const =0
virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace)=0
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
virtual bool enableOrderedReductions()=0
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual bool shouldPrefetchAddressSpace(unsigned AS) const =0
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
virtual unsigned getMinVectorRegisterBitWidth() const =0
virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const =0
virtual std::optional< unsigned > getVScaleForTuning() const =0
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)=0
virtual bool supportsEfficientVectorElementLoadStore()=0
virtual unsigned getRegUsageForType(Type *Ty)=0
virtual bool hasArmWideBranch(bool Thumb) const =0
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID)=0
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)=0
virtual bool shouldBuildLookupTables()=0
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const =0
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)=0
virtual bool isVectorShiftByScalarCheap(Type *Ty) const =0
virtual unsigned getGISelRematGlobalCost() const =0
virtual unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const =0
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const =0
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
virtual bool supportsTailCallFor(const CallBase *CB)=0
virtual std::optional< unsigned > getMaxVScale() const =0
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
virtual unsigned getMaxNumArgs() const =0
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
virtual bool enableWritePrefetching() const =0
virtual bool useColdCCForColdCall(Function &F)=0
virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const =0
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual int getInlinerVectorBonusPercent() const =0
virtual unsigned getMaxPrefetchIterationsAhead() const =0
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual unsigned getCacheLineSize() const =0
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const =0
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)=0
virtual bool isProfitableToHoist(Instruction *I)=0
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TargetCostKind CostKind, ArrayRef< Value * > VL={})=0
virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)=0
virtual InstructionCost getFPOpCost(Type *Ty)=0
virtual unsigned getMinTripCountTailFoldingThreshold() const =0
virtual bool enableMaskedInterleavedAccessVectorization()=0
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
virtual bool isTypeLegal(Type *Ty)=0
virtual BranchProbability getPredictableBranchThreshold()=0
virtual bool enableScalableVectorization() const =0
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual const char * getRegisterClassName(unsigned ClassID) const =0
virtual unsigned getMaxInterleaveFactor(ElementCount VF)=0
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const =0
virtual bool haveFastSqrt(Type *Ty)=0
virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)=0
virtual std::optional< unsigned > getCacheSize(CacheLevel Level) const =0
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)=0
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
virtual std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
virtual bool supportsScalableVectors() const =0
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const =0
virtual bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)=0
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
virtual unsigned getNumberOfParts(Type *Tp)=0
virtual bool isLegalICmpImmediate(int64_t Imm)=0
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true)=0
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
virtual bool shouldBuildRelLookupTables()=0
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TargetCostKind CostKind)=0
virtual bool isLoweredToCall(const Function *F)=0
virtual bool isSourceOfDivergence(const Value *V)=0
virtual bool isLegalAddScalableImmediate(int64_t Imm)=0
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const =0
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
virtual unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const =0
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual bool isFPVectorizationPotentiallyUnsafe()=0
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
virtual bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const =0
virtual InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, PartialReductionExtendKind OpAExtend, PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp) const =0
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &OpsToSink) const =0
virtual bool hasBranchDivergence(const Function *F=nullptr)=0
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
virtual bool isMultiversionedFunction(const Function &F) const =0
virtual unsigned getInliningThresholdMultiplier() const =0
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)=0
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
virtual bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)=0
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)=0
virtual bool shouldDropLSRSolutionIfLessProfitable() const =0
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
virtual bool prefersVectorizedAddressing()=0
virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const =0
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args, const Instruction *CxtI)=0
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, OperandValueInfo OpInfo, const Instruction *I)=0
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
virtual bool isAlwaysUniform(const Value *V)=0
virtual std::optional< unsigned > getMinPageSize() const =0
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const =0
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
virtual unsigned getEpilogueVectorizationMinVF()=0
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
virtual InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual unsigned getFlatAddressSpace()=0
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)=0
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar, ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx)=0
virtual unsigned getPrefetchDistance() const =0
virtual void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const =0
virtual bool shouldTreatInstructionLikeSelect(const Instruction *I)=0
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
virtual bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const =0
virtual bool isNumRegsMajorCostOfLSR()=0
virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment)=0
virtual bool isSingleThreaded() const =0
virtual bool isLegalAddImmediate(int64_t Imm)=0
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I, int64_t ScalableOffset)=0
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const =0
virtual bool isVScaleKnownToBeAPowerOfTwo() const =0
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
virtual bool enableInterleavedAccessVectorization()=0
virtual bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx)=0
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
virtual bool preferEpilogueVectorization() const =0
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual int getInliningLastCallToStaticBonus() const =0
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast)=0
virtual unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const =0
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
std::optional< unsigned > getVScaleForTuning() const
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
Return false if a AS0 address cannot possibly alias a AS1 address.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool isLegalToVectorizeStore(StoreInst *SI) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
uint64_t getFeatureMask(const Function &F) const
Returns a bitmask constructed from the target-features or fmv-features metadata of a function.
bool isMultiversionedFunction(const Function &F) const
Returns true if this is an instance of a function with multiple versions.
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
bool isAlwaysUniform(const Value *V) const
unsigned getAssumedAddrSpace(const Value *V) const
bool shouldDropLSRSolutionIfLessProfitable() const
Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked expand load.
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
bool hasBranchDivergence(const Function *F=nullptr) const
Return true if branch divergence exists.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
bool isProfitableLSRChainElement(Instruction *I) const
TypeSize getRegisterBitWidth(RegisterKind K) const
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
Returns a penalty for invoking call Call in F.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
InstructionCost getInstructionCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the three-argument getInstructionCost with Operands which are t...
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
unsigned getAtomicMemIntrinsicMaxElementSize() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
static PartialReductionExtendKind getPartialReductionExtendKind(Instruction *I)
Get the kind of extension that an instruction represents.
bool shouldTreatInstructionLikeSelect(const Instruction *I) const
Should the Select Optimization pass treat the given instruction like a select, potentially converting...
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
Query the target what the preferred style of tail folding is.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType=nullptr, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace) const
Return true is the target supports interleaved access for the given vector type VTy,...
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
unsigned getMaxPrefetchIterationsAhead() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
Return true if the target supports strided load.
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
OperandValueProperties
Additional properties of an operand's values.
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const PointersChainInfo &Info, Type *AccessTy, TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Estimate the cost of a chain of pointers (typically pointer operands of a chain of loads or stores wi...
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned adjustInliningThreshold(const CallBase *CB) const
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
unsigned getMinVectorRegisterBitWidth() const
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
It can be advantageous to detach complex constants from their uses to make their generation cheaper.
bool hasArmWideBranch(bool Thumb) const
const char * getRegisterClassName(unsigned ClassID) const
bool preferEpilogueVectorization() const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
unsigned getEpilogueVectorizationMinVF() const
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const
Collect kernel launch bounds for F into LB.
PopcntSupportKind
Flags indicating the kind of support for population count.
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
unsigned getInliningThresholdMultiplier() const
InstructionCost getBranchMispredictPenalty() const
Returns estimated penalty of a branch misprediction in latency.
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
Return true if the target supports masked compress store.
std::optional< unsigned > getMinPageSize() const
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
std::optional< unsigned > getCacheSize(CacheLevel Level) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool isLegalAddScalableImmediate(int64_t Imm) const
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const
Identifies if the vector form of the intrinsic has a scalar operand.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Returns the cost estimation for alternating opcode pattern that can be lowered to a single instructio...
TargetCostConstants
Underlying constants for 'cost' values in this interface.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={}) const
Estimate the overhead of scalarizing an instruction.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
unsigned getMinTripCountTailFoldingThreshold() const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
unsigned getMaxInterleaveFactor(ElementCount VF) const
bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
unsigned getGISelRematGlobalCost() const
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const
MemIndexedMode
The type of load/store indexing.
@ MIM_PostInc
Post-incrementing.
@ MIM_PostDec
Post-decrementing.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
bool supportsTailCalls() const
If the target supports tail calls.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
unsigned getNumberOfParts(Type *Tp) const
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, PartialReductionExtendKind OpAExtend, PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp=std::nullopt) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool shouldExpandReduction(const IntrinsicInst *II) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
Returns the maximum memset / memcpy size in bytes that still makes it profitable to inline the call.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
OperandValueKind
Additional information about an operand's possible values.
CacheLevel
The possible cache levels.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:53
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:480
@ None
Definition: CodeGenData.h:106
AtomicOrdering
Atomic ordering for LLVM's memory model.
TargetTransformInfo TTI
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:92
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
TargetLibraryInfo * TLI
LoopVectorizationLegality * LVL
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Describe known properties for a set of pointers.
unsigned IsKnownStride
True if distance between any two neigbouring pointers is a known value.
unsigned IsUnitStride
These properties only valid if SameBaseAddress is set.
unsigned IsSameBaseAddress
All the GEPs in a set have same base address.
Flags describing the kind of vector reduction.
bool IsSigned
Whether the operation is a signed int reduction.
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool RuntimeUnrollMultiExit
Allow runtime unrolling multi-exit loops.
unsigned SCEVExpansionBudget
Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned MaxUpperBound
Set the maximum upper bound of trip count.
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)