LLVM 22.0.0git
VectorUtils.h
Go to the documentation of this file.
1//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines some vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ANALYSIS_VECTORUTILS_H
14#define LLVM_ANALYSIS_VECTORUTILS_H
15
16#include "llvm/ADT/MapVector.h"
19#include "llvm/IR/Module.h"
24
25namespace llvm {
26class TargetLibraryInfo;
27class IntrinsicInst;
28
29/// The Vector Function Database.
30///
31/// Helper class used to find the vector functions associated to a
32/// scalar CallInst.
34 /// The Module of the CallInst CI.
35 const Module *M;
36 /// The CallInst instance being queried for scalar to vector mappings.
37 const CallInst &CI;
38 /// List of vector functions descriptors associated to the call
39 /// instruction.
40 const SmallVector<VFInfo, 8> ScalarToVectorMappings;
41
42 /// Retrieve the scalar-to-vector mappings associated to the rule of
43 /// a vector Function ABI.
44 static void getVFABIMappings(const CallInst &CI,
45 SmallVectorImpl<VFInfo> &Mappings) {
46 if (!CI.getCalledFunction())
47 return;
48
49 const StringRef ScalarName = CI.getCalledFunction()->getName();
50
51 SmallVector<std::string, 8> ListOfStrings;
52 // The check for the vector-function-abi-variant attribute is done when
53 // retrieving the vector variant names here.
54 VFABI::getVectorVariantNames(CI, ListOfStrings);
55 if (ListOfStrings.empty())
56 return;
57 for (const auto &MangledName : ListOfStrings) {
58 const std::optional<VFInfo> Shape =
60 // A match is found via scalar and vector names, and also by
61 // ensuring that the variant described in the attribute has a
62 // corresponding definition or declaration of the vector
63 // function in the Module M.
64 if (Shape && (Shape->ScalarName == ScalarName)) {
65 assert(CI.getModule()->getFunction(Shape->VectorName) &&
66 "Vector function is missing.");
67 Mappings.push_back(*Shape);
68 }
69 }
70 }
71
72public:
73 /// Retrieve all the VFInfo instances associated to the CallInst CI.
76
77 // Get mappings from the Vector Function ABI variants.
78 getVFABIMappings(CI, Ret);
79
80 // Other non-VFABI variants should be retrieved here.
81
82 return Ret;
83 }
84
85 static bool hasMaskedVariant(const CallInst &CI,
86 std::optional<ElementCount> VF = std::nullopt) {
87 // Check whether we have at least one masked vector version of a scalar
88 // function. If no VF is specified then we check for any masked variant,
89 // otherwise we look for one that matches the supplied VF.
90 auto Mappings = VFDatabase::getMappings(CI);
91 for (VFInfo Info : Mappings)
92 if (!VF || Info.Shape.VF == *VF)
93 if (Info.isMasked())
94 return true;
95
96 return false;
97 }
98
99 /// Constructor, requires a CallInst instance.
101 : M(CI.getModule()), CI(CI),
102 ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}
103
104 /// \defgroup VFDatabase query interface.
105 ///
106 /// @{
107 /// Retrieve the Function with VFShape \p Shape.
109 if (Shape == VFShape::getScalarShape(CI.getFunctionType()))
110 return CI.getCalledFunction();
111
112 for (const auto &Info : ScalarToVectorMappings)
113 if (Info.Shape == Shape)
114 return M->getFunction(Info.VectorName);
115
116 return nullptr;
117 }
118 /// @}
119};
120
121template <typename T> class ArrayRef;
122class DemandedBits;
123template <typename InstTy> class InterleaveGroup;
124class IRBuilderBase;
125class Loop;
126class TargetTransformInfo;
127class Value;
128
129namespace Intrinsic {
130typedef unsigned ID;
131}
132
133/// Identify if the intrinsic is trivially vectorizable.
134/// This method returns true if the intrinsic's argument types are all scalars
135/// for the scalar form of the intrinsic and all vectors (or scalars handled by
136/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.
137///
138/// Note: isTriviallyVectorizable implies isTriviallyScalarizable.
140
141/// Identify if the intrinsic is trivially scalarizable.
142/// This method returns true following the same predicates of
143/// isTriviallyVectorizable.
144
145/// Note: There are intrinsics where implementing vectorization for the
146/// intrinsic is redundant, but we want to implement scalarization of the
147/// vector. To prevent the requirement that an intrinsic also implements
148/// vectorization we provide this seperate function.
150 const TargetTransformInfo *TTI);
151
152/// Identifies if the vector form of the intrinsic has a scalar operand.
153/// \p TTI is used to consider target specific intrinsics, if no target specific
154/// intrinsics will be considered then it is appropriate to pass in nullptr.
155LLVM_ABI bool
157 const TargetTransformInfo *TTI);
158
159/// Identifies if the vector form of the intrinsic is overloaded on the type of
160/// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1.
161/// \p TTI is used to consider target specific intrinsics, if no target specific
162/// intrinsics will be considered then it is appropriate to pass in nullptr.
163LLVM_ABI bool
165 const TargetTransformInfo *TTI);
166
167/// Identifies if the vector form of the intrinsic that returns a struct is
168/// overloaded at the struct element index \p RetIdx. /// \p TTI is used to
169/// consider target specific intrinsics, if no target specific intrinsics
170/// will be considered then it is appropriate to pass in nullptr.
172 Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI);
173
174/// Returns intrinsic ID for call.
175/// For the input call instruction it finds mapping intrinsic and returns
176/// its intrinsic ID, in case it does not found it return not_intrinsic.
178getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI);
179
180/// Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
182
183/// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
185
186/// Given a deinterleaveN intrinsic, return the (narrow) vector type of each
187/// factor.
189
190/// Given a vector and an element number, see if the scalar value is
191/// already around as a register, for example if it were inserted then extracted
192/// from the vector.
193LLVM_ABI Value *findScalarElement(Value *V, unsigned EltNo);
194
195/// If all non-negative \p Mask elements are the same value, return that value.
196/// If all elements are negative (undefined) or \p Mask contains different
197/// non-negative values, return -1.
198LLVM_ABI int getSplatIndex(ArrayRef<int> Mask);
199
200/// Get splat value if the input is a splat vector or return nullptr.
201/// The value may be extracted from a splat constants vector or from
202/// a sequence of instructions that broadcast a single value into a vector.
203LLVM_ABI Value *getSplatValue(const Value *V);
204
205/// Return true if each element of the vector value \p V is poisoned or equal to
206/// every other non-poisoned element. If an index element is specified, either
207/// every element of the vector is poisoned or the element at that index is not
208/// poisoned and equal to every other non-poisoned element.
209/// This may be more powerful than the related getSplatValue() because it is
210/// not limited by finding a scalar source value to a splatted vector.
211LLVM_ABI bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);
212
213/// Transform a shuffle mask's output demanded element mask into demanded
214/// element masks for the 2 operands, returns false if the mask isn't valid.
215/// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth].
216/// \p AllowUndefElts permits "-1" indices to be treated as undef.
217LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,
218 const APInt &DemandedElts,
219 APInt &DemandedLHS, APInt &DemandedRHS,
220 bool AllowUndefElts = false);
221
222/// Does this shuffle mask represent either one slide shuffle or a pair of
223/// two slide shuffles, combined with a select on some constant vector mask?
224/// A slide is a shuffle mask which shifts some set of elements up or down
225/// the vector, with all other elements being undefined. An identity shuffle
226/// will be matched a slide by 0. The output parameter provides the source
227/// (-1 means no source), and slide direction for each slide.
228LLVM_ABI bool isMaskedSlidePair(ArrayRef<int> Mask, int NumElts,
229 std::array<std::pair<int, int>, 2> &SrcInfo);
230
231/// Replace each shuffle mask index with the scaled sequential indices for an
232/// equivalent mask of narrowed elements. Mask elements that are less than 0
233/// (sentinel values) are repeated in the output mask.
234///
235/// Example with Scale = 4:
236/// <4 x i32> <3, 2, 0, -1> -->
237/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>
238///
239/// This is the reverse process of widening shuffle mask elements, but it always
240/// succeeds because the indexes can always be multiplied (scaled up) to map to
241/// narrower vector elements.
242LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
243 SmallVectorImpl<int> &ScaledMask);
244
245/// Try to transform a shuffle mask by replacing elements with the scaled index
246/// for an equivalent mask of widened elements. If all mask elements that would
247/// map to a wider element of the new mask are the same negative number
248/// (sentinel value), that element of the new mask is the same value. If any
249/// element in a given slice is negative and some other element in that slice is
250/// not the same value, return false (partial matches with sentinel values are
251/// not allowed).
252///
253/// Example with Scale = 4:
254/// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->
255/// <4 x i32> <3, 2, 0, -1>
256///
257/// This is the reverse process of narrowing shuffle mask elements if it
258/// succeeds. This transform is not always possible because indexes may not
259/// divide evenly (scale down) to map to wider vector elements.
260LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
261 SmallVectorImpl<int> &ScaledMask);
262
263/// A variant of the previous method which is specialized for Scale=2, and
264/// treats -1 as undef and allows widening when a wider element is partially
265/// undef in the narrow form of the mask. This transformation discards
266/// information about which bytes in the original shuffle were undef.
267LLVM_ABI bool widenShuffleMaskElts(ArrayRef<int> M,
268 SmallVectorImpl<int> &NewMask);
269
270/// Attempt to narrow/widen the \p Mask shuffle mask to the \p NumDstElts target
271/// width. Internally this will call narrowShuffleMaskElts/widenShuffleMaskElts.
272/// This will assert unless NumDstElts is a multiple of Mask.size (or
273/// vice-versa). Returns false on failure, and ScaledMask will be in an
274/// undefined state.
275LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,
276 SmallVectorImpl<int> &ScaledMask);
277
278/// Repetitively apply `widenShuffleMaskElts()` for as long as it succeeds,
279/// to get the shuffle mask with widest possible elements.
280LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef<int> Mask,
281 SmallVectorImpl<int> &ScaledMask);
282
283/// Splits and processes shuffle mask depending on the number of input and
284/// output registers. The function does 2 main things: 1) splits the
285/// source/destination vectors into real registers; 2) do the mask analysis to
286/// identify which real registers are permuted. Then the function processes
287/// resulting registers mask using provided action items. If no input register
288/// is defined, \p NoInputAction action is used. If only 1 input register is
289/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
290/// process > 2 input registers and masks.
291/// \param Mask Original shuffle mask.
292/// \param NumOfSrcRegs Number of source registers.
293/// \param NumOfDestRegs Number of destination registers.
294/// \param NumOfUsedRegs Number of actually used destination registers.
296 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
297 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
298 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
299 function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>
300 ManyInputsAction);
301
302/// Compute the demanded elements mask of horizontal binary operations. A
303/// horizontal operation combines two adjacent elements in a vector operand.
304/// This function returns a mask for the elements that correspond to the first
305/// operand of this horizontal combination. For example, for two vectors
306/// [X1, X2, X3, X4] and [Y1, Y2, Y3, Y4], the resulting mask can include the
307/// elements X1, X3, Y1, and Y3. To get the other operands, simply shift the
308/// result of this function to the left by 1.
309///
310/// \param VectorBitWidth the total bit width of the vector
311/// \param DemandedElts the demanded elements mask for the operation
312/// \param DemandedLHS the demanded elements mask for the left operand
313/// \param DemandedRHS the demanded elements mask for the right operand
314LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,
315 const APInt &DemandedElts,
316 APInt &DemandedLHS,
317 APInt &DemandedRHS);
318
319/// Compute a map of integer instructions to their minimum legal type
320/// size.
321///
322/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
323/// type (e.g. i32) whenever arithmetic is performed on them.
324///
325/// For targets with native i8 or i16 operations, usually InstCombine can shrink
326/// the arithmetic type down again. However InstCombine refuses to create
327/// illegal types, so for targets without i8 or i16 registers, the lengthening
328/// and shrinking remains.
329///
330/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
331/// their scalar equivalents do not, so during vectorization it is important to
332/// remove these lengthens and truncates when deciding the profitability of
333/// vectorization.
334///
335/// This function analyzes the given range of instructions and determines the
336/// minimum type size each can be converted to. It attempts to remove or
337/// minimize type size changes across each def-use chain, so for example in the
338/// following code:
339///
340/// %1 = load i8, i8*
341/// %2 = add i8 %1, 2
342/// %3 = load i16, i16*
343/// %4 = zext i8 %2 to i32
344/// %5 = zext i16 %3 to i32
345/// %6 = add i32 %4, %5
346/// %7 = trunc i32 %6 to i16
347///
348/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
349/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
350///
351/// If the optional TargetTransformInfo is provided, this function tries harder
352/// to do less work by only looking at illegal types.
353LLVM_ABI MapVector<Instruction *, uint64_t>
354computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
355 const TargetTransformInfo *TTI = nullptr);
356
357/// Compute the union of two access-group lists.
358///
359/// If the list contains just one access group, it is returned directly. If the
360/// list is empty, returns nullptr.
361LLVM_ABI MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);
362
363/// Compute the access-group list of access groups that @p Inst1 and @p Inst2
364/// are both in. If either instruction does not access memory at all, it is
365/// considered to be in every list.
366///
367/// If the list contains just one access group, it is returned directly. If the
368/// list is empty, returns nullptr.
369LLVM_ABI MDNode *intersectAccessGroups(const Instruction *Inst1,
370 const Instruction *Inst2);
371
372/// Add metadata from \p Inst to \p Metadata, if it can be preserved after
373/// vectorization. It can be preserved after vectorization if the kind is one of
374/// [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,
375/// MD_access_group, MD_mmra].
377 Instruction *Inst,
378 SmallVectorImpl<std::pair<unsigned, MDNode *>> &Metadata);
379
380/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,
381/// MD_nontemporal, MD_access_group, MD_mmra].
382/// For K in Kinds, we get the MDNode for K from each of the
383/// elements of VL, compute their "intersection" (i.e., the most generic
384/// metadata value that covers all of the individual values), and set I's
385/// metadata for M equal to the intersection value.
386///
387/// This function always sets a (possibly null) value for each K in Kinds.
388LLVM_ABI Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
389
390/// Create a mask that filters the members of an interleave group where there
391/// are gaps.
392///
393/// For example, the mask for \p Group with interleave-factor 3
394/// and \p VF 4, that has only its first member present is:
395///
396/// <1,0,0,1,0,0,1,0,0,1,0,0>
397///
398/// Note: The result is a mask of 0's and 1's, as opposed to the other
399/// create[*]Mask() utilities which create a shuffle mask (mask that
400/// consists of indices).
402createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
403 const InterleaveGroup<Instruction> &Group);
404
405/// Create a mask with replicated elements.
406///
407/// This function creates a shuffle mask for replicating each of the \p VF
408/// elements in a vector \p ReplicationFactor times. It can be used to
409/// transform a mask of \p VF elements into a mask of
410/// \p VF * \p ReplicationFactor elements used by a predicated
411/// interleaved-group of loads/stores whose Interleaved-factor ==
412/// \p ReplicationFactor.
413///
414/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
415///
416/// <0,0,0,1,1,1,2,2,2,3,3,3>
418createReplicatedMask(unsigned ReplicationFactor, unsigned VF);
419
420/// Create an interleave shuffle mask.
421///
422/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
423/// vectorization factor \p VF into a single wide vector. The mask is of the
424/// form:
425///
426/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
427///
428/// For example, the mask for VF = 4 and NumVecs = 2 is:
429///
430/// <0, 4, 1, 5, 2, 6, 3, 7>.
432 unsigned NumVecs);
433
434/// Create a stride shuffle mask.
435///
436/// This function creates a shuffle mask whose elements begin at \p Start and
437/// are incremented by \p Stride. The mask can be used to deinterleave an
438/// interleaved vector into separate vectors of vectorization factor \p VF. The
439/// mask is of the form:
440///
441/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
442///
443/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
444///
445/// <0, 2, 4, 6>
447createStrideMask(unsigned Start, unsigned Stride, unsigned VF);
448
449/// Create a sequential shuffle mask.
450///
451/// This function creates shuffle mask whose elements are sequential and begin
452/// at \p Start. The mask contains \p NumInts integers and is padded with \p
453/// NumUndefs undef values. The mask is of the form:
454///
455/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
456///
457/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
458///
459/// <0, 1, 2, 3, undef, undef, undef, undef>
461createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
462
463/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
464/// mask assuming both operands are identical. This assumes that the unary
465/// shuffle will use elements from operand 0 (operand 1 will be unused).
467 unsigned NumElts);
468
469/// Concatenate a list of vectors.
470///
471/// This function generates code that concatenate the vectors in \p Vecs into a
472/// single large vector. The number of vectors should be greater than one, and
473/// their element types should be the same. The number of elements in the
474/// vectors should also be the same; however, if the last vector has fewer
475/// elements, it will be padded with undefs.
476LLVM_ABI Value *concatenateVectors(IRBuilderBase &Builder,
477 ArrayRef<Value *> Vecs);
478
479/// Given a mask vector of i1, Return true if all of the elements of this
480/// predicate mask are known to be false or undef. That is, return true if all
481/// lanes can be assumed inactive.
482LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask);
483
484/// Given a mask vector of i1, Return true if all of the elements of this
485/// predicate mask are known to be true or undef. That is, return true if all
486/// lanes can be assumed active.
487LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask);
488
489/// Given a mask vector of i1, Return true if any of the elements of this
490/// predicate mask are known to be true or undef. That is, return true if at
491/// least one lane can be assumed active.
492LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask);
493
494/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
495/// for each lane which may be active.
496LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask);
497
498/// The group of interleaved loads/stores sharing the same stride and
499/// close to each other.
500///
501/// Each member in this group has an index starting from 0, and the largest
502/// index should be less than interleaved factor, which is equal to the absolute
503/// value of the access's stride.
504///
505/// E.g. An interleaved load group of factor 4:
506/// for (unsigned i = 0; i < 1024; i+=4) {
507/// a = A[i]; // Member of index 0
508/// b = A[i+1]; // Member of index 1
509/// d = A[i+3]; // Member of index 3
510/// ...
511/// }
512///
513/// An interleaved store group of factor 4:
514/// for (unsigned i = 0; i < 1024; i+=4) {
515/// ...
516/// A[i] = a; // Member of index 0
517/// A[i+1] = b; // Member of index 1
518/// A[i+2] = c; // Member of index 2
519/// A[i+3] = d; // Member of index 3
520/// }
521///
522/// Note: the interleaved load group could have gaps (missing members), but
523/// the interleaved store group doesn't allow gaps.
524template <typename InstTy> class InterleaveGroup {
525public:
526 InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
527 : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
528 InsertPos(nullptr) {}
529
530 InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
531 : Alignment(Alignment), InsertPos(Instr) {
532 Factor = std::abs(Stride);
533 assert(Factor > 1 && "Invalid interleave factor");
534
535 Reverse = Stride < 0;
536 Members[0] = Instr;
537 }
538
539 bool isReverse() const { return Reverse; }
540 uint32_t getFactor() const { return Factor; }
541 Align getAlign() const { return Alignment; }
542 uint32_t getNumMembers() const { return Members.size(); }
543
544 /// Try to insert a new member \p Instr with index \p Index and
545 /// alignment \p NewAlign. The index is related to the leader and it could be
546 /// negative if it is the new leader.
547 ///
548 /// \returns false if the instruction doesn't belong to the group.
549 bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
550 // Make sure the key fits in an int32_t.
551 std::optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
552 if (!MaybeKey)
553 return false;
554 int32_t Key = *MaybeKey;
555
556 // Skip if the key is used for either the tombstone or empty special values.
559 return false;
560
561 // Skip if there is already a member with the same index.
562 if (Members.contains(Key))
563 return false;
564
565 if (Key > LargestKey) {
566 // The largest index is always less than the interleave factor.
567 if (Index >= static_cast<int32_t>(Factor))
568 return false;
569
570 LargestKey = Key;
571 } else if (Key < SmallestKey) {
572
573 // Make sure the largest index fits in an int32_t.
574 std::optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
575 if (!MaybeLargestIndex)
576 return false;
577
578 // The largest index is always less than the interleave factor.
579 if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
580 return false;
581
582 SmallestKey = Key;
583 }
584
585 // It's always safe to select the minimum alignment.
586 Alignment = std::min(Alignment, NewAlign);
587 Members[Key] = Instr;
588 return true;
589 }
590
591 /// Get the member with the given index \p Index
592 ///
593 /// \returns nullptr if contains no such member.
594 InstTy *getMember(uint32_t Index) const {
595 int32_t Key = SmallestKey + Index;
596 return Members.lookup(Key);
597 }
598
599 /// Get the index for the given member. Unlike the key in the member
600 /// map, the index starts from 0.
601 uint32_t getIndex(const InstTy *Instr) const {
602 for (auto I : Members) {
603 if (I.second == Instr)
604 return I.first - SmallestKey;
605 }
606
607 llvm_unreachable("InterleaveGroup contains no such member");
608 }
609
610 InstTy *getInsertPos() const { return InsertPos; }
611 void setInsertPos(InstTy *Inst) { InsertPos = Inst; }
612
613 /// Add metadata (e.g. alias info) from the instructions in this group to \p
614 /// NewInst.
615 ///
616 /// FIXME: this function currently does not add noalias metadata a'la
617 /// addNewMedata. To do that we need to compute the intersection of the
618 /// noalias info from all members.
619 void addMetadata(InstTy *NewInst) const;
620
621 /// Returns true if this Group requires a scalar iteration to handle gaps.
623 // If the last member of the Group exists, then a scalar epilog is not
624 // needed for this group.
625 if (getMember(getFactor() - 1))
626 return false;
627
628 // We have a group with gaps. It therefore can't be a reversed access,
629 // because such groups get invalidated (TODO).
630 assert(!isReverse() && "Group should have been invalidated");
631
632 // This is a group of loads, with gaps, and without a last-member
633 return true;
634 }
635
636 /// Return true if this group is full, i.e. it has no gaps.
637 bool isFull() const { return getNumMembers() == getFactor(); }
638
639private:
640 uint32_t Factor; // Interleave Factor.
641 bool Reverse;
642 Align Alignment;
644 int32_t SmallestKey = 0;
645 int32_t LargestKey = 0;
646
647 // To avoid breaking dependences, vectorized instructions of an interleave
648 // group should be inserted at either the first load or the last store in
649 // program order.
650 //
651 // E.g. %even = load i32 // Insert Position
652 // %add = add i32 %even // Use of %even
653 // %odd = load i32
654 //
655 // store i32 %even
656 // %odd = add i32 // Def of %odd
657 // store i32 %odd // Insert Position
658 InstTy *InsertPos;
659};
660
661/// Drive the analysis of interleaved memory accesses in the loop.
662///
663/// Use this class to analyze interleaved accesses only when we can vectorize
664/// a loop. Otherwise it's meaningless to do analysis as the vectorization
665/// on interleaved accesses is unsafe.
666///
667/// The analysis collects interleave groups and records the relationships
668/// between the member and the group in a map.
670public:
672 DominatorTree *DT, LoopInfo *LI,
673 const LoopAccessInfo *LAI)
674 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
675
677
678 /// Analyze the interleaved accesses and collect them in interleave
679 /// groups. Substitute symbolic strides using \p Strides.
680 /// Consider also predicated loads/stores in the analysis if
681 /// \p EnableMaskedInterleavedGroup is true.
682 LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup);
683
684 /// Invalidate groups, e.g., in case all blocks in loop will be predicated
685 /// contrary to original assumption. Although we currently prevent group
686 /// formation for predicated accesses, we may be able to relax this limitation
687 /// in the future once we handle more complicated blocks. Returns true if any
688 /// groups were invalidated.
690 if (InterleaveGroups.empty()) {
691 assert(
692 !RequiresScalarEpilogue &&
693 "RequiresScalarEpilog should not be set without interleave groups");
694 return false;
695 }
696
697 InterleaveGroupMap.clear();
698 for (auto *Ptr : InterleaveGroups)
699 delete Ptr;
700 InterleaveGroups.clear();
701 RequiresScalarEpilogue = false;
702 return true;
703 }
704
705 /// Check if \p Instr belongs to any interleave group.
706 bool isInterleaved(Instruction *Instr) const {
707 return InterleaveGroupMap.contains(Instr);
708 }
709
710 /// Get the interleave group that \p Instr belongs to.
711 ///
712 /// \returns nullptr if doesn't have such group.
714 getInterleaveGroup(const Instruction *Instr) const {
715 return InterleaveGroupMap.lookup(Instr);
716 }
717
720 return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
721 }
722
723 /// Returns true if an interleaved group that may access memory
724 /// out-of-bounds requires a scalar epilogue iteration for correctness.
725 bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
726
727 /// Invalidate groups that require a scalar epilogue (due to gaps). This can
728 /// happen when optimizing for size forbids a scalar epilogue, and the gap
729 /// cannot be filtered by masking the load/store.
731
732 /// Returns true if we have any interleave groups.
733 bool hasGroups() const { return !InterleaveGroups.empty(); }
734
735private:
736 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
737 /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
738 /// The interleaved access analysis can also add new predicates (for example
739 /// by versioning strides of pointers).
741
742 Loop *TheLoop;
743 DominatorTree *DT;
744 LoopInfo *LI;
745 const LoopAccessInfo *LAI;
746
747 /// True if the loop may contain non-reversed interleaved groups with
748 /// out-of-bounds accesses. We ensure we don't speculatively access memory
749 /// out-of-bounds by executing at least one scalar epilogue iteration.
750 bool RequiresScalarEpilogue = false;
751
752 /// Holds the relationships between the members and the interleave group.
754
755 SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;
756
757 /// Holds dependences among the memory accesses in the loop. It maps a source
758 /// access to a set of dependent sink accesses.
760
761 /// The descriptor for a strided memory access.
762 struct StrideDescriptor {
763 StrideDescriptor() = default;
764 StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
765 Align Alignment)
766 : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
767
768 // The access's stride. It is negative for a reverse access.
769 int64_t Stride = 0;
770
771 // The scalar expression of this access.
772 const SCEV *Scev = nullptr;
773
774 // The size of the memory object.
775 uint64_t Size = 0;
776
777 // The alignment of this access.
778 Align Alignment;
779 };
780
781 /// A type for holding instructions and their stride descriptors.
782 using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
783
784 /// Create a new interleave group with the given instruction \p Instr,
785 /// stride \p Stride and alignment \p Align.
786 ///
787 /// \returns the newly created interleave group.
788 InterleaveGroup<Instruction> *
789 createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
790 auto [It, Inserted] = InterleaveGroupMap.try_emplace(Instr);
791 assert(Inserted && "Already in an interleaved access group");
792 It->second = new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
793 InterleaveGroups.insert(It->second);
794 return It->second;
795 }
796
797 /// Release the group and remove all the relationships.
798 void releaseGroup(InterleaveGroup<Instruction> *Group) {
799 InterleaveGroups.erase(Group);
800 releaseGroupWithoutRemovingFromSet(Group);
801 }
802
803 /// Do everything necessary to release the group, apart from removing it from
804 /// the InterleaveGroups set.
805 void releaseGroupWithoutRemovingFromSet(InterleaveGroup<Instruction> *Group) {
806 for (unsigned i = 0; i < Group->getFactor(); i++)
807 if (Instruction *Member = Group->getMember(i))
808 InterleaveGroupMap.erase(Member);
809
810 delete Group;
811 }
812
813 /// Collect all the accesses with a constant stride in program order.
814 void collectConstStrideAccesses(
815 MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
816 const DenseMap<Value *, const SCEV *> &Strides);
817
818 /// Returns true if \p Stride is allowed in an interleaved group.
819 LLVM_ABI static bool isStrided(int Stride);
820
821 /// Returns true if \p BB is a predicated block.
822 bool isPredicated(BasicBlock *BB) const {
823 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
824 }
825
826 /// Returns true if LoopAccessInfo can be used for dependence queries.
827 bool areDependencesValid() const {
828 return LAI && LAI->getDepChecker().getDependences();
829 }
830
831 /// Returns true if memory accesses \p A and \p B can be reordered, if
832 /// necessary, when constructing interleaved groups.
833 ///
834 /// \p A must precede \p B in program order. We return false if reordering is
835 /// not necessary or is prevented because \p A and \p B may be dependent.
836 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
837 StrideEntry *B) const {
838 // Code motion for interleaved accesses can potentially hoist strided loads
839 // and sink strided stores. The code below checks the legality of the
840 // following two conditions:
841 //
842 // 1. Potentially moving a strided load (B) before any store (A) that
843 // precedes B, or
844 //
845 // 2. Potentially moving a strided store (A) after any load or store (B)
846 // that A precedes.
847 //
848 // It's legal to reorder A and B if we know there isn't a dependence from A
849 // to B. Note that this determination is conservative since some
850 // dependences could potentially be reordered safely.
851
852 // A is potentially the source of a dependence.
853 auto *Src = A->first;
854 auto SrcDes = A->second;
855
856 // B is potentially the sink of a dependence.
857 auto *Sink = B->first;
858 auto SinkDes = B->second;
859
860 // Code motion for interleaved accesses can't violate WAR dependences.
861 // Thus, reordering is legal if the source isn't a write.
862 if (!Src->mayWriteToMemory())
863 return true;
864
865 // At least one of the accesses must be strided.
866 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
867 return true;
868
869 // If dependence information is not available from LoopAccessInfo,
870 // conservatively assume the instructions can't be reordered.
871 if (!areDependencesValid())
872 return false;
873
874 // If we know there is a dependence from source to sink, assume the
875 // instructions can't be reordered. Otherwise, reordering is legal.
876 return !Dependences.contains(Src) || !Dependences.lookup(Src).count(Sink);
877 }
878
879 /// Collect the dependences from LoopAccessInfo.
880 ///
881 /// We process the dependences once during the interleaved access analysis to
882 /// enable constant-time dependence queries.
883 void collectDependences() {
884 if (!areDependencesValid())
885 return;
886 const auto &DepChecker = LAI->getDepChecker();
887 auto *Deps = DepChecker.getDependences();
888 for (auto Dep : *Deps)
889 Dependences[Dep.getSource(DepChecker)].insert(
890 Dep.getDestination(DepChecker));
891 }
892};
893
894} // llvm namespace
895
896#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_ABI
Definition: Compiler.h:213
uint32_t Index
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Module.h This file contains the declarations for the Module class.
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file defines the SmallVector class.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1348
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1205
This class represents a function call, abstracting a target machine's calling convention.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:203
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:245
bool erase(const KeyT &Val)
Definition: DenseMap.h:319
unsigned size() const
Definition: DenseMap.h:120
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:168
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:230
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:524
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:622
uint32_t getFactor() const
Definition: VectorUtils.h:540
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:594
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
Definition: VectorUtils.h:526
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
Definition: VectorUtils.h:637
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:601
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:611
bool isReverse() const
Definition: VectorUtils.h:539
InstTy * getInsertPos() const
Definition: VectorUtils.h:610
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:541
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
Definition: VectorUtils.h:530
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:549
uint32_t getNumMembers() const
Definition: VectorUtils.h:542
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:669
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:714
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition: VectorUtils.h:725
bool hasGroups() const
Returns true if we have any interleave groups.
Definition: VectorUtils.h:733
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition: VectorUtils.h:706
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition: VectorUtils.h:689
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition: VectorUtils.h:719
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition: VectorUtils.h:671
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
static LLVM_ABI bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:229
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
The Vector Function Database.
Definition: VectorUtils.h:33
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
Definition: VectorUtils.h:100
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition: VectorUtils.h:85
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition: VectorUtils.h:74
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
A range adaptor for a pair of iterators.
Function * getVectorizedFunction(const VFShape &Shape) const
Definition: VectorUtils.h:108
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
LLVM_ABI std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const FunctionType *FTy)
Function to construct a VFInfo out of a mangled names in the following format:
LLVM_ABI void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:46
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:54
Holds the VFShape for a specific scalar to vector function mapping.
Contains the information about the kind of vectorization available.
static VFShape getScalarShape(const FunctionType *FTy)
Retrieve the VFShape that can be used to map a scalar function to itself, with VF = 1.