31#define DEBUG_TYPE "vectorutils"
39 cl::desc(
"Maximum factor for an interleaved access group (default = 8)"),
49 case Intrinsic::bswap:
50 case Intrinsic::bitreverse:
51 case Intrinsic::ctpop:
60 case Intrinsic::sadd_sat:
61 case Intrinsic::ssub_sat:
62 case Intrinsic::uadd_sat:
63 case Intrinsic::usub_sat:
64 case Intrinsic::smul_fix:
65 case Intrinsic::smul_fix_sat:
66 case Intrinsic::umul_fix:
67 case Intrinsic::umul_fix_sat:
72 case Intrinsic::atan2:
75 case Intrinsic::sincos:
76 case Intrinsic::sincospi:
82 case Intrinsic::exp10:
84 case Intrinsic::ldexp:
86 case Intrinsic::log10:
89 case Intrinsic::minnum:
90 case Intrinsic::maxnum:
91 case Intrinsic::minimum:
92 case Intrinsic::maximum:
93 case Intrinsic::minimumnum:
94 case Intrinsic::maximumnum:
96 case Intrinsic::copysign:
97 case Intrinsic::floor:
99 case Intrinsic::trunc:
100 case Intrinsic::rint:
101 case Intrinsic::nearbyint:
102 case Intrinsic::round:
103 case Intrinsic::roundeven:
106 case Intrinsic::fmuladd:
107 case Intrinsic::is_fpclass:
108 case Intrinsic::powi:
109 case Intrinsic::canonicalize:
110 case Intrinsic::fptosi_sat:
111 case Intrinsic::fptoui_sat:
112 case Intrinsic::lround:
113 case Intrinsic::llround:
114 case Intrinsic::lrint:
115 case Intrinsic::llrint:
116 case Intrinsic::ucmp:
117 case Intrinsic::scmp:
135 case Intrinsic::frexp:
136 case Intrinsic::uadd_with_overflow:
137 case Intrinsic::sadd_with_overflow:
138 case Intrinsic::ssub_with_overflow:
139 case Intrinsic::usub_with_overflow:
140 case Intrinsic::umul_with_overflow:
141 case Intrinsic::smul_with_overflow:
149 unsigned ScalarOpdIdx,
161 case Intrinsic::vp_abs:
162 case Intrinsic::ctlz:
163 case Intrinsic::vp_ctlz:
164 case Intrinsic::cttz:
165 case Intrinsic::vp_cttz:
166 case Intrinsic::is_fpclass:
167 case Intrinsic::vp_is_fpclass:
168 case Intrinsic::powi:
169 return (ScalarOpdIdx == 1);
170 case Intrinsic::smul_fix:
171 case Intrinsic::smul_fix_sat:
172 case Intrinsic::umul_fix:
173 case Intrinsic::umul_fix_sat:
174 return (ScalarOpdIdx == 2);
175 case Intrinsic::experimental_vp_splice:
176 return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;
190 return OpdIdx == -1 || OpdIdx == 0;
193 case Intrinsic::fptosi_sat:
194 case Intrinsic::fptoui_sat:
195 case Intrinsic::lround:
196 case Intrinsic::llround:
197 case Intrinsic::lrint:
198 case Intrinsic::llrint:
199 case Intrinsic::vp_lrint:
200 case Intrinsic::vp_llrint:
201 case Intrinsic::ucmp:
202 case Intrinsic::scmp:
203 return OpdIdx == -1 || OpdIdx == 0;
204 case Intrinsic::modf:
205 case Intrinsic::sincos:
206 case Intrinsic::sincospi:
207 case Intrinsic::is_fpclass:
208 case Intrinsic::vp_is_fpclass:
210 case Intrinsic::powi:
211 case Intrinsic::ldexp:
212 return OpdIdx == -1 || OpdIdx == 1;
225 case Intrinsic::frexp:
226 return RetIdx == 0 || RetIdx == 1;
242 ID == Intrinsic::lifetime_end ||
ID == Intrinsic::assume ||
243 ID == Intrinsic::experimental_noalias_scope_decl ||
244 ID == Intrinsic::sideeffect ||
ID == Intrinsic::pseudoprobe)
251 case Intrinsic::vector_interleave2:
253 case Intrinsic::vector_interleave3:
255 case Intrinsic::vector_interleave4:
257 case Intrinsic::vector_interleave5:
259 case Intrinsic::vector_interleave6:
261 case Intrinsic::vector_interleave7:
263 case Intrinsic::vector_interleave8:
272 case Intrinsic::vector_deinterleave2:
274 case Intrinsic::vector_deinterleave3:
276 case Intrinsic::vector_deinterleave4:
278 case Intrinsic::vector_deinterleave5:
280 case Intrinsic::vector_deinterleave6:
282 case Intrinsic::vector_deinterleave7:
284 case Intrinsic::vector_deinterleave8:
292 [[maybe_unused]]
unsigned Factor =
295 assert(Factor && Factor == DISubtypes.
size() &&
296 "unexpected deinterleave factor or result type");
297 return cast<VectorType>(DISubtypes[0]);
304 assert(V->getType()->isVectorTy() &&
"Not looking at a vector?");
305 VectorType *VTy = cast<VectorType>(V->getType());
307 if (
auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
308 unsigned Width = FVTy->getNumElements();
314 return C->getAggregateElement(EltNo);
318 if (!isa<ConstantInt>(III->getOperand(2)))
320 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
325 return III->getOperand(1);
328 if (III == III->getOperand(0))
338 if (SVI && isa<FixedVectorType>(SVI->
getType())) {
344 if (InEl < (
int)LHSWidth)
353 if (
Constant *Elt =
C->getAggregateElement(EltNo))
354 if (Elt->isNullValue())
358 if (isa<ScalableVectorType>(VTy))
360 if (EltNo < VTy->getElementCount().getKnownMinValue())
375 if (SplatIndex != -1 && SplatIndex != M)
381 assert((SplatIndex == -1 || SplatIndex >= 0) &&
"Negative index?");
390 if (isa<VectorType>(V->getType()))
391 if (
auto *
C = dyn_cast<Constant>(V))
392 return C->getSplatValue();
407 if (isa<VectorType>(V->getType())) {
408 if (isa<UndefValue>(V))
412 if (
auto *
C = dyn_cast<Constant>(V))
413 return C->getSplatValue() !=
nullptr;
416 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
428 return Shuf->getMaskValue(Index) == Index;
451 const APInt &DemandedElts,
APInt &DemandedLHS,
452 APInt &DemandedRHS,
bool AllowUndefElts) {
456 if (DemandedElts.
isZero())
460 if (
all_of(Mask, [](
int Elt) {
return Elt == 0; })) {
465 for (
unsigned I = 0, E = Mask.size();
I != E; ++
I) {
467 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
468 "Invalid shuffle mask constant");
470 if (!DemandedElts[
I] || (AllowUndefElts && (M < 0)))
481 DemandedRHS.
setBit(M - SrcWidth);
488 std::array<std::pair<int, int>, 2> &SrcInfo) {
489 const int SignalValue = NumElts * 2;
490 SrcInfo[0] = {-1, SignalValue};
491 SrcInfo[1] = {-1, SignalValue};
495 int Src = M >= (int)NumElts;
496 int Diff = (int)i - (M % NumElts);
498 for (
int j = 0; j < 2; j++) {
499 auto &[SrcE, DiffE] = SrcInfo[j];
501 assert(DiffE == SignalValue);
505 if (SrcE == Src && DiffE == Diff) {
514 return SrcInfo[0].first != -1;
519 assert(Scale > 0 &&
"Unexpected scaling factor");
523 ScaledMask.
assign(Mask.begin(), Mask.end());
528 for (
int MaskElt : Mask) {
531 "Overflowed 32-bits");
533 for (
int SliceElt = 0; SliceElt != Scale; ++SliceElt)
534 ScaledMask.
push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
540 assert(Scale > 0 &&
"Unexpected scaling factor");
544 ScaledMask.
assign(Mask.begin(), Mask.end());
549 int NumElts = Mask.size();
550 if (NumElts % Scale != 0)
554 ScaledMask.
reserve(NumElts / Scale);
559 assert((
int)MaskSlice.
size() == Scale &&
"Expected Scale-sized slice.");
562 int SliceFront = MaskSlice.
front();
563 if (SliceFront < 0) {
571 if (SliceFront % Scale != 0)
574 for (
int i = 1; i < Scale; ++i)
575 if (MaskSlice[i] != SliceFront + i)
577 ScaledMask.
push_back(SliceFront / Scale);
579 Mask = Mask.drop_front(Scale);
580 }
while (!Mask.empty());
582 assert((
int)ScaledMask.
size() * Scale == NumElts &&
"Unexpected scaled mask");
591 unsigned NumElts = M.size();
592 if (NumElts % 2 != 0)
596 for (
unsigned i = 0; i < NumElts; i += 2) {
601 if (
M0 == -1 &&
M1 == -1) {
606 if (
M0 == -1 &&
M1 != -1 && (
M1 % 2) == 1) {
611 if (
M0 != -1 && (
M0 % 2) == 0 && ((
M0 + 1) ==
M1 ||
M1 == -1)) {
620 assert(NewMask.
size() == NumElts / 2 &&
"Incorrect size for mask!");
626 unsigned NumSrcElts = Mask.size();
627 assert(NumSrcElts > 0 && NumDstElts > 0 &&
"Unexpected scaling factor");
630 if (NumSrcElts == NumDstElts) {
631 ScaledMask.
assign(Mask.begin(), Mask.end());
636 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&
637 "Unexpected scaling factor");
639 if (NumSrcElts > NumDstElts) {
640 int Scale = NumSrcElts / NumDstElts;
644 int Scale = NumDstElts / NumSrcElts;
651 std::array<SmallVector<int, 16>, 2> TmpMasks;
654 for (
unsigned Scale = 2; Scale <= InputMask.
size(); ++Scale) {
664 ArrayRef<int> Mask,
unsigned NumOfSrcRegs,
unsigned NumOfDestRegs,
665 unsigned NumOfUsedRegs,
function_ref<
void()> NoInputAction,
674 int Sz = Mask.size();
675 unsigned SzDest = Sz / NumOfDestRegs;
676 unsigned SzSrc = Sz / NumOfSrcRegs;
677 for (
unsigned I = 0;
I < NumOfDestRegs; ++
I) {
678 auto &RegMasks = Res[
I];
679 RegMasks.
assign(2 * NumOfSrcRegs, {});
682 for (
unsigned K = 0; K < SzDest; ++K) {
683 int Idx =
I * SzDest + K;
688 int MaskIdx = Mask[
Idx] % Sz;
689 int SrcRegIdx = MaskIdx / SzSrc + (Mask[
Idx] >= Sz ? NumOfSrcRegs : 0);
692 if (RegMasks[SrcRegIdx].empty())
694 RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;
698 for (
unsigned I : seq<unsigned>(NumOfUsedRegs)) {
702 switch (NumSrcRegs) {
711 unsigned SrcReg = std::distance(Dest.begin(), It);
712 SingleInputAction(*It, SrcReg,
I);
727 "Expected undefined mask element.");
728 FirstMask[
Idx] = SecondMask[
Idx] + VF;
733 for (
int Idx = 0, VF = Mask.size();
Idx < VF; ++
Idx) {
744 for (
unsigned I : seq<unsigned>(2 * NumOfSrcRegs)) {
749 if (FirstIdx == SecondIdx) {
755 SecondMask = RegMask;
756 CombineMasks(FirstMask, SecondMask);
757 ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);
759 NormalizeMask(FirstMask);
761 SecondMask = FirstMask;
762 SecondIdx = FirstIdx;
764 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
765 CombineMasks(SecondMask, FirstMask);
766 ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);
768 Dest[FirstIdx].clear();
769 NormalizeMask(SecondMask);
771 }
while (SecondIdx >= 0);
779 const APInt &DemandedElts,
781 APInt &DemandedRHS) {
782 assert(VectorBitWidth >= 128 &&
"Vectors smaller than 128 bit not supported");
783 int NumLanes = VectorBitWidth / 128;
785 int NumEltsPerLane = NumElts / NumLanes;
786 int HalfEltsPerLane = NumEltsPerLane / 2;
792 for (
int Idx = 0;
Idx != NumElts; ++
Idx) {
793 if (!DemandedElts[
Idx])
795 int LaneIdx = (
Idx / NumEltsPerLane) * NumEltsPerLane;
796 int LocalIdx =
Idx % NumEltsPerLane;
797 if (LocalIdx < HalfEltsPerLane) {
798 DemandedLHS.
setBit(LaneIdx + 2 * LocalIdx);
800 LocalIdx -= HalfEltsPerLane;
801 DemandedRHS.
setBit(LaneIdx + 2 * LocalIdx);
822 bool SeenExtFromIllegalType =
false;
824 for (
auto &
I : *BB) {
827 if (
TTI && (isa<ZExtInst>(&
I) || isa<SExtInst>(&
I)) &&
829 SeenExtFromIllegalType =
true;
832 if ((isa<TruncInst>(&
I) || isa<ICmpInst>(&
I)) &&
833 !
I.getType()->isVectorTy() &&
834 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
845 if (Worklist.
empty() || (
TTI && !SeenExtFromIllegalType))
849 while (!Worklist.
empty()) {
858 if (DB.getDemandedBits(
I).getBitWidth() > 64)
861 uint64_t V = DB.getDemandedBits(
I).getZExtValue();
867 if (isa<SExtInst>(
I) || isa<ZExtInst>(
I) || isa<LoadInst>(
I) ||
874 if (isa<BitCastInst>(
I) || isa<PtrToIntInst>(
I) || isa<IntToPtrInst>(
I) ||
875 !
I->getType()->isIntegerTy()) {
876 DBits[Leader] |= ~0ULL;
888 if (isa<CallBase>(
I))
891 if (DBits[Leader] == ~0ULL)
895 for (
Value *O :
I->operands()) {
897 if (
auto *OI = dyn_cast<Instruction>(O))
905 for (
auto &
I : DBits)
906 for (
auto *U :
I.first->users())
907 if (U->getType()->isIntegerTy() && DBits.
count(U) == 0)
910 for (
const auto &E : ECs) {
915 LeaderDemandedBits |= DBits[M];
935 auto *
MI = dyn_cast<Instruction>(M);
938 Type *Ty = M->getType();
940 Ty =
MI->getOperand(0)->getType();
947 auto *Call = dyn_cast<CallBase>(
MI);
948 auto Ops = Call ? Call->args() :
MI->operands();
950 auto *CI = dyn_cast<ConstantInt>(U);
954 isa<ShlOperator, LShrOperator, AShrOperator>(U.getUser()) &&
955 U.getOperandNo() == 1)
956 return CI->uge(MinBW);
970template <
typename ListT>
975 List.insert(AccGroups);
979 for (
const auto &AccGroupListOp : AccGroups->
operands()) {
980 auto *Item = cast<MDNode>(AccGroupListOp.get());
991 if (AccGroups1 == AccGroups2)
998 if (Union.size() == 0)
1000 if (Union.size() == 1)
1001 return cast<MDNode>(Union.front());
1012 if (!MayAccessMem1 && !MayAccessMem2)
1015 return Inst2->
getMetadata(LLVMContext::MD_access_group);
1017 return Inst1->
getMetadata(LLVMContext::MD_access_group);
1033 if (AccGroupSet2.
count(MD1))
1037 auto *Item = cast<MDNode>(Node.get());
1039 if (AccGroupSet2.
count(Item))
1044 if (Intersection.
size() == 0)
1046 if (Intersection.
size() == 1)
1047 return cast<MDNode>(Intersection.
front());
1059 static const unsigned SupportedIDs[] = {
1060 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1061 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
1062 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
1063 LLVMContext::MD_access_group, LLVMContext::MD_mmra};
1084 for (
auto &[Kind, MD] :
Metadata) {
1085 for (
int J = 1, E = VL.
size(); MD && J != E; ++J) {
1090 case LLVMContext::MD_mmra: {
1094 case LLVMContext::MD_tbaa:
1097 case LLVMContext::MD_alias_scope:
1100 case LLVMContext::MD_fpmath:
1103 case LLVMContext::MD_noalias:
1104 case LLVMContext::MD_nontemporal:
1105 case LLVMContext::MD_invariant_load:
1108 case LLVMContext::MD_access_group:
1133 for (
unsigned i = 0; i < VF; i++)
1134 for (
unsigned j = 0; j < Group.
getFactor(); ++j) {
1135 unsigned HasMember = Group.
getMember(j) ? 1 : 0;
1136 Mask.push_back(Builder.
getInt1(HasMember));
1145 for (
unsigned i = 0; i < VF; i++)
1146 for (
unsigned j = 0; j < ReplicationFactor; j++)
1155 for (
unsigned i = 0; i < VF; i++)
1156 for (
unsigned j = 0; j < NumVecs; j++)
1157 Mask.push_back(j * VF + i);
1165 for (
unsigned i = 0; i < VF; i++)
1166 Mask.push_back(Start + i * Stride);
1173 unsigned NumUndefs) {
1175 for (
unsigned i = 0; i < NumInts; i++)
1176 Mask.push_back(Start + i);
1178 for (
unsigned i = 0; i < NumUndefs; i++)
1187 int NumEltsSigned = NumElts;
1188 assert(NumEltsSigned > 0 &&
"Expected smaller or non-zero element count");
1193 for (
int MaskElt : Mask) {
1194 assert((MaskElt < NumEltsSigned * 2) &&
"Expected valid shuffle mask");
1195 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
1208 assert(VecTy1 && VecTy2 &&
1209 VecTy1->getScalarType() == VecTy2->getScalarType() &&
1210 "Expect two vectors with the same element type");
1212 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
1213 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
1214 assert(NumElts1 >= NumElts2 &&
"Unexpect the first vector has less elements");
1216 if (NumElts1 > NumElts2) {
1228 unsigned NumVecs = Vecs.
size();
1229 assert(NumVecs > 1 &&
"Should be at least two vectors");
1235 for (
unsigned i = 0; i < NumVecs - 1; i += 2) {
1236 Value *V0 = ResList[i], *V1 = ResList[i + 1];
1237 assert((V0->
getType() == V1->getType() || i == NumVecs - 2) &&
1238 "Only the last vector may have a different type");
1244 if (NumVecs % 2 != 0)
1245 TmpList.
push_back(ResList[NumVecs - 1]);
1248 NumVecs = ResList.
size();
1249 }
while (NumVecs > 1);
1255 assert(isa<VectorType>(Mask->getType()) &&
1256 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1257 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1259 "Mask must be a vector of i1");
1261 auto *ConstMask = dyn_cast<Constant>(Mask);
1264 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1266 if (isa<ScalableVectorType>(ConstMask->getType()))
1270 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1272 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1273 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1281 assert(isa<VectorType>(Mask->getType()) &&
1282 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1283 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1285 "Mask must be a vector of i1");
1287 auto *ConstMask = dyn_cast<Constant>(Mask);
1290 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1292 if (isa<ScalableVectorType>(ConstMask->getType()))
1296 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1298 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1299 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1307 assert(isa<VectorType>(Mask->getType()) &&
1308 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1309 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1311 "Mask must be a vector of i1");
1313 auto *ConstMask = dyn_cast<Constant>(Mask);
1316 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1318 if (isa<ScalableVectorType>(ConstMask->getType()))
1322 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1324 if (
auto *MaskElt = ConstMask->getAggregateElement(
I))
1325 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1334 assert(isa<FixedVectorType>(Mask->getType()) &&
1335 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1336 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1338 "Mask must be a fixed width vector of i1");
1340 const unsigned VWidth =
1341 cast<FixedVectorType>(Mask->getType())->getNumElements();
1343 if (
auto *CV = dyn_cast<ConstantVector>(Mask))
1344 for (
unsigned i = 0; i < VWidth; i++)
1345 if (CV->getAggregateElement(i)->isNullValue())
1347 return DemandedElts;
1350bool InterleavedAccessInfo::isStrided(
int Stride) {
1351 unsigned Factor = std::abs(Stride);
1355void InterleavedAccessInfo::collectConstStrideAccesses(
1369 for (
auto &
I : *BB) {
1378 if (
Size * 8 !=
DL.getTypeSizeInBits(ElementTy))
1390 true,
false).value_or(0);
1393 AccessStrideInfo[&
I] = StrideDescriptor(Stride, Scev,
Size,
1435 bool EnablePredicatedInterleavedMemAccesses) {
1441 collectConstStrideAccesses(AccessStrideInfo, Strides);
1443 if (AccessStrideInfo.
empty())
1447 collectDependences();
1468 for (
auto BI = AccessStrideInfo.
rbegin(), E = AccessStrideInfo.
rend();
1471 StrideDescriptor DesB = BI->second;
1477 if (isStrided(DesB.Stride) &&
1478 (!isPredicated(
B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1483 GroupB = createInterleaveGroup(
B, DesB.Stride, DesB.Alignment);
1484 if (
B->mayWriteToMemory())
1485 StoreGroups.
insert(GroupB);
1487 LoadGroups.
insert(GroupB);
1491 for (
auto AI = std::next(BI); AI != E; ++AI) {
1493 StrideDescriptor DesA = AI->second;
1518 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(
1519 A, &*AccessStrideInfo.
find(MemberOfGroupB)))
1520 return MemberOfGroupB;
1530 if (
A->mayWriteToMemory() && GroupA != GroupB) {
1538 if (GroupB && LoadGroups.
contains(GroupB))
1539 DependentInst = DependentMember(GroupB, &*AI);
1540 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))
1543 if (DependentInst) {
1548 if (GroupA && StoreGroups.
contains(GroupA)) {
1550 "dependence between "
1551 << *
A <<
" and " << *DependentInst <<
'\n');
1552 StoreGroups.
remove(GroupA);
1553 releaseGroup(GroupA);
1559 if (GroupB && LoadGroups.
contains(GroupB)) {
1561 <<
" as complete.\n");
1562 CompletedLoadGroups.
insert(GroupB);
1566 if (CompletedLoadGroups.
contains(GroupB)) {
1574 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1584 (
A->mayReadFromMemory() !=
B->mayReadFromMemory()) ||
1585 (
A->mayWriteToMemory() !=
B->mayWriteToMemory()))
1590 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1607 if (DistanceToB %
static_cast<int64_t
>(DesB.Size))
1614 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1615 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1621 GroupB->
getIndex(
B) + DistanceToB /
static_cast<int64_t
>(DesB.Size);
1626 <<
" into the interleave group with" << *
B
1628 InterleaveGroupMap[
A] = GroupB;
1631 if (
A->mayReadFromMemory())
1639 const char *FirstOrLast) ->
bool {
1641 assert(Member &&
"Group member does not exist");
1644 if (
getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
1645 false,
true).value_or(0))
1647 LLVM_DEBUG(
dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1649 <<
" group member potentially pointer-wrapping.\n");
1650 releaseGroup(Group);
1668 for (
auto *Group : LoadGroups) {
1680 if (InvalidateGroupIfMemberMayWrap(Group, 0,
"first"))
1683 InvalidateGroupIfMemberMayWrap(Group, Group->
getFactor() - 1,
"last");
1692 dbgs() <<
"LV: Invalidate candidate interleaved group due to "
1693 "a reverse access with gaps.\n");
1694 releaseGroup(Group);
1698 dbgs() <<
"LV: Interleaved group requires epilogue iteration.\n");
1699 RequiresScalarEpilogue =
true;
1703 for (
auto *Group : StoreGroups) {
1713 if (!EnablePredicatedInterleavedMemAccesses) {
1715 dbgs() <<
"LV: Invalidate candidate interleaved store group due "
1717 releaseGroup(Group);
1727 if (InvalidateGroupIfMemberMayWrap(Group, 0,
"first"))
1729 for (
int Index = Group->
getFactor() - 1; Index > 0; Index--)
1731 InvalidateGroupIfMemberMayWrap(Group, Index,
"last");
1745 bool ReleasedGroup = InterleaveGroups.remove_if([&](
auto *Group) {
1746 if (!Group->requiresScalarEpilogue())
1750 <<
"LV: Invalidate candidate interleaved group due to gaps that "
1751 "require a scalar epilogue (not allowed under optsize) and cannot "
1752 "be masked (not enabled). \n");
1753 releaseGroupWithoutRemovingFromSet(Group);
1756 assert(ReleasedGroup &&
"At least one group must be invalidated, as a "
1757 "scalar epilogue was required");
1758 (void)ReleasedGroup;
1759 RequiresScalarEpilogue =
false;
1762template <
typename InstT>
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
iterator_range< member_iterator > members(const ECValue &ECV) const
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Common base class shared among various IRBuilders.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
BlockT * getHeader() const
Store the result of a depth first search within basic blocks contained by a single loop.
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
unsigned getNumOperands() const
Return number of MDNode operands.
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
LLVMContext & getContext() const
Tracking metadata reference owned by Metadata.
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
reverse_iterator rbegin()
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
bool remove(const value_type &X)
Remove an item from the set vector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
ArrayRef< Type * > subtypes() const
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPCast(Intrinsic::ID ID)
static LLVM_ABI std::optional< unsigned > getVectorLengthParamPos(Intrinsic::ID IntrinsicID)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
Type * getElementType() const
An efficient, type-erasing, non-owning reference to a callable.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI bool isTargetIntrinsic(ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
constexpr int PoisonMaskElem
LLVM_ABI bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
LLVM_ABI Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
unsigned M0(unsigned Val)
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.