43#define DEBUG_TYPE "vector-combine"
49STATISTIC(NumVecLoad,
"Number of vector loads formed");
50STATISTIC(NumVecCmp,
"Number of vector compares formed");
51STATISTIC(NumVecBO,
"Number of vector binops formed");
52STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
53STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
54STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
55STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
56STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
60 cl::desc(
"Disable all vector combine transforms"));
64 cl::desc(
"Disable binop extract to shuffle transforms"));
68 cl::desc(
"Max number of instructions to scan for vector combining."));
70static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
78 bool TryEarlyFoldsOnly)
81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
88 const TargetTransformInfo &TTI;
89 const DominatorTree &DT;
94 const SimplifyQuery SQ;
98 bool TryEarlyFoldsOnly;
100 InstructionWorklist Worklist;
109 bool vectorizeLoadInsert(Instruction &
I);
110 bool widenSubvectorLoad(Instruction &
I);
111 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
112 ExtractElementInst *Ext1,
113 unsigned PreferredExtractIndex)
const;
114 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
115 const Instruction &
I,
116 ExtractElementInst *&ConvertToShuffle,
117 unsigned PreferredExtractIndex);
120 bool foldExtractExtract(Instruction &
I);
121 bool foldInsExtFNeg(Instruction &
I);
122 bool foldInsExtBinop(Instruction &
I);
123 bool foldInsExtVectorToShuffle(Instruction &
I);
124 bool foldBitOpOfCastops(Instruction &
I);
125 bool foldBitOpOfCastConstant(Instruction &
I);
126 bool foldBitcastShuffle(Instruction &
I);
127 bool scalarizeOpOrCmp(Instruction &
I);
128 bool scalarizeVPIntrinsic(Instruction &
I);
129 bool foldExtractedCmps(Instruction &
I);
130 bool foldBinopOfReductions(Instruction &
I);
131 bool foldSingleElementStore(Instruction &
I);
132 bool scalarizeLoadExtract(Instruction &
I);
133 bool scalarizeExtExtract(Instruction &
I);
134 bool foldConcatOfBoolMasks(Instruction &
I);
135 bool foldPermuteOfBinops(Instruction &
I);
136 bool foldShuffleOfBinops(Instruction &
I);
137 bool foldShuffleOfSelects(Instruction &
I);
138 bool foldShuffleOfCastops(Instruction &
I);
139 bool foldShuffleOfShuffles(Instruction &
I);
140 bool foldShuffleOfIntrinsics(Instruction &
I);
141 bool foldShuffleToIdentity(Instruction &
I);
142 bool foldShuffleFromReductions(Instruction &
I);
143 bool foldShuffleChainsToReduce(Instruction &
I);
144 bool foldCastFromReductions(Instruction &
I);
145 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
146 bool foldInterleaveIntrinsics(Instruction &
I);
147 bool shrinkType(Instruction &
I);
148 bool shrinkLoadForShuffles(Instruction &
I);
149 bool shrinkPhiOfShuffles(Instruction &
I);
151 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
157 Worklist.pushUsersToWorkList(*NewI);
158 Worklist.pushValue(NewI);
175 SmallPtrSet<Value *, 4> Visited;
180 OpI,
nullptr,
nullptr, [&](
Value *V) {
185 NextInst = NextInst->getNextNode();
190 Worklist.pushUsersToWorkList(*OpI);
191 Worklist.pushValue(OpI);
211 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
212 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
218 Type *ScalarTy = Load->getType()->getScalarType();
220 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
221 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
228bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
254 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
257 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
258 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
259 unsigned OffsetEltIndex = 0;
267 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
268 APInt
Offset(OffsetBitWidth, 0);
278 uint64_t ScalarSizeInBytes = ScalarSize / 8;
279 if (
Offset.urem(ScalarSizeInBytes) != 0)
283 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
284 if (OffsetEltIndex >= MinVecNumElts)
301 unsigned AS =
Load->getPointerAddressSpace();
320 unsigned OutputNumElts = Ty->getNumElements();
322 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
323 Mask[0] = OffsetEltIndex;
330 if (OldCost < NewCost || !NewCost.
isValid())
341 replaceValue(
I, *VecLd);
349bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
352 if (!Shuf->isIdentityWithPadding())
358 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
359 return M >= (int)(NumOpElts);
370 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
378 unsigned AS =
Load->getPointerAddressSpace();
393 if (OldCost < NewCost || !NewCost.
isValid())
400 replaceValue(
I, *VecLd);
407ExtractElementInst *VectorCombine::getShuffleExtract(
408 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
412 assert(Index0C && Index1C &&
"Expected constant extract indexes");
414 unsigned Index0 = Index0C->getZExtValue();
415 unsigned Index1 = Index1C->getZExtValue();
418 if (Index0 == Index1)
442 if (PreferredExtractIndex == Index0)
444 if (PreferredExtractIndex == Index1)
448 return Index0 > Index1 ? Ext0 : Ext1;
456bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
457 ExtractElementInst *Ext1,
458 const Instruction &
I,
459 ExtractElementInst *&ConvertToShuffle,
460 unsigned PreferredExtractIndex) {
463 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
465 unsigned Opcode =
I.getOpcode();
478 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
479 "Expected a compare");
489 unsigned Ext0Index = Ext0IndexC->getZExtValue();
490 unsigned Ext1Index = Ext1IndexC->getZExtValue();
504 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
505 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
506 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
511 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
516 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
518 OldCost = CheapExtractCost + ScalarOpCost;
519 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
523 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
524 NewCost = VectorOpCost + CheapExtractCost +
529 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
530 if (ConvertToShuffle) {
542 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
544 ShuffleMask[BestInsIndex] = BestExtIndex;
546 VecTy, VecTy, ShuffleMask,
CostKind, 0,
547 nullptr, {ConvertToShuffle});
550 VecTy, VecTy, {},
CostKind, 0,
nullptr,
558 return OldCost < NewCost;
570 ShufMask[NewIndex] = OldIndex;
571 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
623 V1,
"foldExtExtBinop");
628 VecBOInst->copyIRFlags(&
I);
634bool VectorCombine::foldExtractExtract(Instruction &
I) {
665 ExtractElementInst *ExtractToChange;
666 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
672 if (ExtractToChange) {
673 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
678 if (ExtractToChange == Ext0)
687 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
688 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
691 replaceValue(
I, *NewExt);
697bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
717 if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
721 unsigned NumElts = VecTy->getNumElements();
722 if (Index >= NumElts)
728 SmallVector<int>
Mask(NumElts);
729 std::iota(
Mask.begin(),
Mask.end(), 0);
746 bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
749 SmallVector<int> SrcMask;
754 VecTy, SrcVecTy, SrcMask,
CostKind);
757 if (NewCost > OldCost)
772 replaceValue(
I, *NewShuf);
778bool VectorCombine::foldInsExtBinop(Instruction &
I) {
779 BinaryOperator *VecBinOp, *SclBinOp;
811 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
813 if (NewCost > OldCost)
824 NewInst->copyIRFlags(VecBinOp);
825 NewInst->andIRFlags(SclBinOp);
830 replaceValue(
I, *NewBO);
836bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
839 if (!BinOp || !BinOp->isBitwiseLogicOp())
845 if (!LHSCast || !RHSCast) {
846 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
852 if (CastOpcode != RHSCast->getOpcode())
856 switch (CastOpcode) {
857 case Instruction::BitCast:
858 case Instruction::Trunc:
859 case Instruction::SExt:
860 case Instruction::ZExt:
866 Value *LHSSrc = LHSCast->getOperand(0);
867 Value *RHSSrc = RHSCast->getOperand(0);
873 auto *SrcTy = LHSSrc->
getType();
874 auto *DstTy =
I.getType();
877 if (CastOpcode != Instruction::BitCast &&
882 if (!SrcTy->getScalarType()->isIntegerTy() ||
883 !DstTy->getScalarType()->isIntegerTy())
898 LHSCastCost + RHSCastCost;
909 if (!LHSCast->hasOneUse())
910 NewCost += LHSCastCost;
911 if (!RHSCast->hasOneUse())
912 NewCost += RHSCastCost;
915 <<
" NewCost=" << NewCost <<
"\n");
917 if (NewCost > OldCost)
922 BinOp->getName() +
".inner");
924 NewBinOp->copyIRFlags(BinOp);
938 replaceValue(
I, *Result);
947bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
963 switch (CastOpcode) {
964 case Instruction::BitCast:
965 case Instruction::ZExt:
966 case Instruction::SExt:
967 case Instruction::Trunc:
973 Value *LHSSrc = LHSCast->getOperand(0);
975 auto *SrcTy = LHSSrc->
getType();
976 auto *DstTy =
I.getType();
979 if (CastOpcode != Instruction::BitCast &&
984 if (!SrcTy->getScalarType()->isIntegerTy() ||
985 !DstTy->getScalarType()->isIntegerTy())
989 PreservedCastFlags RHSFlags;
1014 if (!LHSCast->hasOneUse())
1015 NewCost += LHSCastCost;
1017 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1018 <<
" NewCost=" << NewCost <<
"\n");
1020 if (NewCost > OldCost)
1025 LHSSrc, InvC,
I.getName() +
".inner");
1027 NewBinOp->copyIRFlags(&
I);
1037 replaceValue(
I, *Result);
1044bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1058 if (!DestTy || !SrcTy)
1061 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1062 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1063 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1073 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1074 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1078 SmallVector<int, 16> NewMask;
1079 if (DestEltSize <= SrcEltSize) {
1082 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
1083 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1088 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
1089 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1096 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1097 auto *NewShuffleTy =
1099 auto *OldShuffleTy =
1101 unsigned NumOps = IsUnary ? 1 : 2;
1111 TargetTransformInfo::CastContextHint::None,
1116 TargetTransformInfo::CastContextHint::None,
1119 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1120 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1122 if (NewCost > OldCost || !NewCost.
isValid())
1130 replaceValue(
I, *Shuf);
1137bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1151 if (!ScalarOp0 || !ScalarOp1)
1159 auto IsAllTrueMask = [](
Value *MaskVal) {
1162 return ConstValue->isAllOnesValue();
1176 SmallVector<int>
Mask;
1178 Mask.resize(FVTy->getNumElements(), 0);
1187 Args.push_back(
V->getType());
1188 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1193 std::optional<unsigned> FunctionalOpcode =
1195 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1196 if (!FunctionalOpcode) {
1205 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1215 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1217 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1220 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1223 if (OldCost < NewCost || !NewCost.
isValid())
1234 bool SafeToSpeculate;
1240 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1241 if (!SafeToSpeculate &&
1248 {ScalarOp0, ScalarOp1})
1250 ScalarOp0, ScalarOp1);
1259bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1264 if (!UO && !BO && !CI && !
II)
1272 if (Arg->getType() !=
II->getType() &&
1282 for (User *U :
I.users())
1289 std::optional<uint64_t>
Index;
1291 auto Ops =
II ?
II->args() :
I.operands();
1295 uint64_t InsIdx = 0;
1300 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1306 else if (InsIdx != *Index)
1323 if (!
Index.has_value())
1327 Type *ScalarTy = VecTy->getScalarType();
1328 assert(VecTy->isVectorTy() &&
1331 "Unexpected types for insert element into binop or cmp");
1333 unsigned Opcode =
I.getOpcode();
1341 }
else if (UO || BO) {
1345 IntrinsicCostAttributes ScalarICA(
1346 II->getIntrinsicID(), ScalarTy,
1349 IntrinsicCostAttributes VectorICA(
1350 II->getIntrinsicID(), VecTy,
1357 Value *NewVecC =
nullptr;
1359 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1362 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1364 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1378 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1380 II->getIntrinsicID(), Idx, &
TTI)))
1383 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1384 OldCost += InsertCost;
1385 NewCost += !
Op->hasOneUse() * InsertCost;
1389 if (OldCost < NewCost || !NewCost.
isValid())
1399 ++NumScalarIntrinsic;
1409 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1415 Scalar->setName(
I.getName() +
".scalar");
1420 ScalarInst->copyIRFlags(&
I);
1423 replaceValue(
I, *Insert);
1430bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1435 if (!BI || !
I.getType()->isIntegerTy(1))
1440 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1443 CmpPredicate
P0,
P1;
1455 uint64_t Index0, Index1;
1462 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1465 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1466 "Unknown ExtractElementInst");
1471 unsigned CmpOpcode =
1486 Ext0Cost + Ext1Cost + CmpCost * 2 +
1492 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1493 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1498 ShufMask[CheapIndex] = ExpensiveIndex;
1503 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1504 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1509 if (OldCost < NewCost || !NewCost.
isValid())
1519 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1520 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1523 replaceValue(
I, *NewExt);
1536 unsigned ReductionOpc =
1542 CostBeforeReduction =
1543 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1545 CostAfterReduction =
1546 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1550 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1556 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1563 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1566 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1568 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1571 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1572 CostAfterReduction =
TTI.getMulAccReductionCost(
1573 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1576 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1580bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1583 if (BinOpOpc == Instruction::Sub)
1584 ReductionIID = Intrinsic::vector_reduce_add;
1588 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1593 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1594 return II->getArgOperand(0);
1598 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1601 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1610 unsigned ReductionOpc =
1623 CostOfRedOperand0 + CostOfRedOperand1 +
1626 if (NewCost >= OldCost || !NewCost.
isValid())
1630 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1633 if (BinOpOpc == Instruction::Or)
1634 VectorBO = Builder.
CreateOr(V0, V1,
"",
1640 replaceValue(
I, *Rdx);
1648 unsigned NumScanned = 0;
1649 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1658class ScalarizationResult {
1659 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1664 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1665 : Status(Status), ToFreeze(ToFreeze) {}
1668 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1669 ~ScalarizationResult() {
1670 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1673 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1674 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1675 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1676 return {StatusTy::SafeWithFreeze, ToFreeze};
1680 bool isSafe()
const {
return Status == StatusTy::Safe; }
1682 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1685 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1690 Status = StatusTy::Unsafe;
1694 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1695 assert(isSafeWithFreeze() &&
1696 "should only be used when freezing is required");
1698 "UserI must be a user of ToFreeze");
1699 IRBuilder<>::InsertPointGuard Guard(Builder);
1704 if (
U.get() == ToFreeze)
1721 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1725 if (
C->getValue().ult(NumElements))
1726 return ScalarizationResult::safe();
1727 return ScalarizationResult::unsafe();
1732 return ScalarizationResult::unsafe();
1734 APInt Zero(IntWidth, 0);
1735 APInt MaxElts(IntWidth, NumElements);
1741 true, &AC, CtxI, &DT)))
1742 return ScalarizationResult::safe();
1743 return ScalarizationResult::unsafe();
1756 if (ValidIndices.
contains(IdxRange))
1757 return ScalarizationResult::safeWithFreeze(IdxBase);
1758 return ScalarizationResult::unsafe();
1770 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1782bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1794 if (!
match(
SI->getValueOperand(),
1801 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1804 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1805 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1806 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1810 if (ScalarizableIdx.isUnsafe() ||
1817 Worklist.
push(Load);
1819 if (ScalarizableIdx.isSafeWithFreeze())
1822 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1823 {ConstantInt::get(Idx->getType(), 0), Idx});
1827 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1830 replaceValue(
I, *NSI);
1839bool VectorCombine::scalarizeLoadExtract(Instruction &
I) {
1849 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
1854 LI->getPointerAddressSpace(),
CostKind);
1858 unsigned NumInstChecked = 0;
1859 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
1862 for (
auto &Pair : NeedFreeze)
1863 Pair.second.discard();
1869 for (User *U : LI->users()) {
1871 if (!UI || UI->getParent() != LI->getParent())
1876 if (UI->use_empty())
1882 for (Instruction &
I :
1883 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1890 LastCheckedInst = UI;
1895 if (ScalarIdx.isUnsafe())
1897 if (ScalarIdx.isSafeWithFreeze()) {
1898 NeedFreeze.try_emplace(UI, ScalarIdx);
1899 ScalarIdx.discard();
1905 Index ?
Index->getZExtValue() : -1);
1914 <<
"\n LoadExtractCost: " << OriginalCost
1915 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
1917 if (ScalarizedCost >= OriginalCost)
1924 Type *ElemType = VecTy->getElementType();
1927 for (User *U : LI->users()) {
1929 Value *Idx = EI->getIndexOperand();
1932 auto It = NeedFreeze.find(EI);
1933 if (It != NeedFreeze.end())
1940 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
1942 Align ScalarOpAlignment =
1944 NewLoad->setAlignment(ScalarOpAlignment);
1947 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
1948 AAMDNodes OldAAMD = LI->getAAMetadata();
1952 replaceValue(*EI, *NewLoad,
false);
1955 FailureGuard.release();
1959bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
1974 Type *ScalarDstTy = DstTy->getElementType();
1975 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
1981 unsigned ExtCnt = 0;
1982 bool ExtLane0 =
false;
1983 for (User *U :
Ext->users()) {
1997 Instruction::And, ScalarDstTy,
CostKind,
2000 (ExtCnt - ExtLane0) *
2002 Instruction::LShr, ScalarDstTy,
CostKind,
2005 if (ScalarCost > VectorCost)
2008 Value *ScalarV =
Ext->getOperand(0);
2015 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2016 uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
2017 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2019 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2020 for (User *U :
Ext->users()) {
2026 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2027 : (Idx * SrcEltSizeInBits);
2030 U->replaceAllUsesWith(
And);
2038bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2039 Type *Ty =
I.getType();
2044 if (
DL->isBigEndian())
2055 uint64_t ShAmtX = 0;
2063 uint64_t ShAmtY = 0;
2071 if (ShAmtX > ShAmtY) {
2079 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2080 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2085 MaskTy->getNumElements() != ShAmtDiff ||
2086 MaskTy->getNumElements() > (
BitWidth / 2))
2091 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2092 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2095 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2112 if (Ty != ConcatIntTy)
2118 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2119 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2122 if (NewCost > OldCost)
2132 if (Ty != ConcatIntTy) {
2142 replaceValue(
I, *Result);
2148bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2149 BinaryOperator *BinOp;
2150 ArrayRef<int> OuterMask;
2159 Value *Op00, *Op01, *Op10, *Op11;
2160 ArrayRef<int> Mask0, Mask1;
2167 if (!Match0 && !Match1)
2180 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2183 unsigned NumSrcElts = BinOpTy->getNumElements();
2188 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2192 SmallVector<int> NewMask0, NewMask1;
2193 for (
int M : OuterMask) {
2194 if (M < 0 || M >= (
int)NumSrcElts) {
2198 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2199 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2203 unsigned NumOpElts = Op0Ty->getNumElements();
2204 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2205 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2207 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2208 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2215 BinOpTy, OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I);
2231 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2235 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2237 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2238 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2242 if (NewCost > OldCost)
2253 NewInst->copyIRFlags(BinOp);
2257 replaceValue(
I, *NewBO);
2263bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2264 ArrayRef<int> OldMask;
2271 if (
LHS->getOpcode() !=
RHS->getOpcode())
2275 bool IsCommutative =
false;
2284 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2295 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2298 unsigned NumSrcElts = BinOpTy->getNumElements();
2301 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2304 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2305 if (M >= (
int)NumSrcElts)
2309 SmallVector<int> NewMask0(OldMask);
2317 SmallVector<int> NewMask1(OldMask);
2340 ArrayRef<int> InnerMask;
2342 m_Mask(InnerMask)))) &&
2345 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2357 bool ReducedInstCount =
false;
2358 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2359 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2360 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2361 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2363 auto *ShuffleCmpTy =
2380 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2387 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2395 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2399 NewInst->copyIRFlags(
LHS);
2400 NewInst->andIRFlags(
RHS);
2405 replaceValue(
I, *NewBO);
2412bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2414 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2423 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2429 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2430 ((SI0FOp !=
nullptr) &&
2431 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2437 auto SelOp = Instruction::Select;
2444 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2448 Mask,
CostKind, 0,
nullptr, {C1, C2});
2454 toVectorTy(Type::getInt1Ty(
I.getContext()), DstVecTy->getNumElements()));
2459 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2461 if (NewCost > OldCost)
2470 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2471 SI0FOp->getFastMathFlags());
2473 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2478 replaceValue(
I, *NewSel);
2484bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2486 ArrayRef<int> OldMask;
2496 if (C0->getSrcTy() != C1->getSrcTy())
2500 if (Opcode != C1->getOpcode()) {
2502 Opcode = Instruction::SExt;
2510 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2513 unsigned NumSrcElts = CastSrcTy->getNumElements();
2514 unsigned NumDstElts = CastDstTy->getNumElements();
2515 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2516 "Only bitcasts expected to alter src/dst element counts");
2520 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2521 (NumDstElts % NumSrcElts) != 0)
2524 SmallVector<int, 16> NewMask;
2525 if (NumSrcElts >= NumDstElts) {
2528 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2529 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2534 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2535 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2540 auto *NewShuffleDstTy =
2553 CastDstTy, OldMask,
CostKind, 0,
nullptr, {}, &
I);
2566 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2568 if (NewCost > OldCost)
2572 C1->getOperand(0), NewMask);
2577 NewInst->copyIRFlags(C0);
2578 NewInst->andIRFlags(C1);
2582 replaceValue(
I, *Cast);
2592bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2593 ArrayRef<int> OuterMask;
2594 Value *OuterV0, *OuterV1;
2599 ArrayRef<int> InnerMask0, InnerMask1;
2600 Value *X0, *X1, *Y0, *Y1;
2605 if (!Match0 && !Match1)
2610 SmallVector<int, 16> PoisonMask1;
2615 InnerMask1 = PoisonMask1;
2619 X0 = Match0 ? X0 : OuterV0;
2620 Y0 = Match0 ? Y0 : OuterV0;
2621 X1 = Match1 ? X1 : OuterV1;
2622 Y1 = Match1 ? Y1 : OuterV1;
2626 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2630 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2631 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2636 SmallVector<int, 16> NewMask(OuterMask);
2637 Value *NewX =
nullptr, *NewY =
nullptr;
2638 for (
int &M : NewMask) {
2639 Value *Src =
nullptr;
2640 if (0 <= M && M < (
int)NumImmElts) {
2644 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2645 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2647 }
else if (M >= (
int)NumImmElts) {
2652 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2653 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2657 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2666 if (!NewX || NewX == Src) {
2670 if (!NewY || NewY == Src) {
2686 replaceValue(
I, *NewX);
2703 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2709 nullptr, {NewX, NewY});
2711 NewCost += InnerCost0;
2713 NewCost += InnerCost1;
2716 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2718 if (NewCost > OldCost)
2722 replaceValue(
I, *Shuf);
2728bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
2730 ArrayRef<int> OldMask;
2741 if (IID != II1->getIntrinsicID())
2746 if (!ShuffleDstTy || !II0Ty)
2752 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2754 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2761 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
2765 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
2767 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2771 ShuffleDstTy->getNumElements());
2777 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
2781 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2784 if (NewCost > OldCost)
2788 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2793 II1->getArgOperand(
I), OldMask);
2801 NewInst->copyIRFlags(II0);
2802 NewInst->andIRFlags(II1);
2805 replaceValue(
I, *NewIntrinsic);
2815 int M = SV->getMaskValue(Lane);
2818 if (
static_cast<unsigned>(M) < NumElts) {
2819 U = &SV->getOperandUse(0);
2822 U = &SV->getOperandUse(1);
2833 auto [U, Lane] = IL;
2847 unsigned NumElts = Ty->getNumElements();
2848 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2854 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2860 unsigned NumSlices = Item.
size() / NumElts;
2865 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2866 Use *SliceV = Item[Slice * NumElts].first;
2867 if (!SliceV || SliceV->get()->
getType() != Ty)
2869 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2870 auto [V, Lane] = Item[Slice * NumElts + Elt];
2871 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2884 auto [FrontU, FrontLane] = Item.
front();
2886 if (IdentityLeafs.
contains(FrontU)) {
2887 return FrontU->get();
2891 return Builder.CreateShuffleVector(FrontU->get(), Mask);
2893 if (ConcatLeafs.
contains(FrontU)) {
2897 for (
unsigned S = 0; S < Values.
size(); ++S)
2898 Values[S] = Item[S * NumElts].first->get();
2900 while (Values.
size() > 1) {
2903 std::iota(Mask.begin(), Mask.end(), 0);
2905 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2907 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
2915 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2917 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
2920 Ops[Idx] =
II->getOperand(Idx);
2924 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2929 for (
const auto &Lane : Item)
2942 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
2952 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
2957 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
2971bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
2973 if (!Ty ||
I.use_empty())
2977 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
2982 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
2983 unsigned NumVisited = 0;
2985 while (!Worklist.
empty()) {
2990 auto [FrontU, FrontLane] = Item.
front();
2998 return X->getType() ==
Y->getType() &&
3003 if (FrontLane == 0 &&
3005 Ty->getNumElements() &&
3008 return !
E.value().first || (IsEquiv(
E.value().first->get(), FrontV) &&
3009 E.value().second == (int)
E.index());
3011 IdentityLeafs.
insert(FrontU);
3016 C &&
C->getSplatValue() &&
3024 SplatLeafs.
insert(FrontU);
3029 auto [FrontU, FrontLane] = Item.
front();
3030 auto [
U, Lane] = IL;
3031 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3033 SplatLeafs.
insert(FrontU);
3039 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3043 Value *
V = IL.first->get();
3049 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3052 if (CI->getSrcTy()->getScalarType() !=
3057 SI->getOperand(0)->getType() !=
3064 II->getIntrinsicID() ==
3066 !
II->hasOperandBundles());
3073 BO && BO->isIntDivRem())
3078 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3079 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3086 if (DstTy && SrcTy &&
3087 SrcTy->getNumElements() == DstTy->getNumElements()) {
3098 !
II->hasOperandBundles()) {
3099 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3118 ConcatLeafs.
insert(FrontU);
3125 if (NumVisited <= 1)
3128 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3134 ConcatLeafs, Builder, &
TTI);
3135 replaceValue(
I, *V);
3142bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3146 switch (
II->getIntrinsicID()) {
3147 case Intrinsic::vector_reduce_add:
3148 case Intrinsic::vector_reduce_mul:
3149 case Intrinsic::vector_reduce_and:
3150 case Intrinsic::vector_reduce_or:
3151 case Intrinsic::vector_reduce_xor:
3152 case Intrinsic::vector_reduce_smin:
3153 case Intrinsic::vector_reduce_smax:
3154 case Intrinsic::vector_reduce_umin:
3155 case Intrinsic::vector_reduce_umax:
3164 std::queue<Value *> Worklist;
3165 SmallPtrSet<Value *, 4> Visited;
3166 ShuffleVectorInst *Shuffle =
nullptr;
3170 while (!Worklist.empty()) {
3171 Value *CV = Worklist.front();
3183 if (CI->isBinaryOp()) {
3184 for (
auto *
Op : CI->operand_values())
3188 if (Shuffle && Shuffle != SV)
3205 for (
auto *V : Visited)
3206 for (
auto *U :
V->users())
3207 if (!Visited.contains(U) && U != &
I)
3210 FixedVectorType *VecType =
3214 FixedVectorType *ShuffleInputType =
3216 if (!ShuffleInputType)
3222 SmallVector<int> ConcatMask;
3224 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3225 bool UsesSecondVec =
3226 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3233 ShuffleInputType, ConcatMask,
CostKind);
3235 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3237 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3239 bool MadeChanges =
false;
3240 if (NewCost < OldCost) {
3244 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3245 replaceValue(*Shuffle, *NewShuffle);
3251 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3297bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3299 std::queue<Value *> InstWorklist;
3303 std::optional<unsigned int> CommonCallOp = std::nullopt;
3304 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3306 bool IsFirstCallOrBinInst =
true;
3307 bool ShouldBeCallOrBinInst =
true;
3313 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3323 int64_t
VecSize = FVT->getNumElements();
3329 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3330 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3340 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3341 Cur = (Cur + 1) / 2, --
Mask) {
3343 ExpectedParityMask |= (1ll <<
Mask);
3346 InstWorklist.push(VecOpEE);
3348 while (!InstWorklist.empty()) {
3349 Value *CI = InstWorklist.front();
3353 if (!ShouldBeCallOrBinInst)
3356 if (!IsFirstCallOrBinInst &&
3357 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3362 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3364 IsFirstCallOrBinInst =
false;
3367 CommonCallOp =
II->getIntrinsicID();
3368 if (
II->getIntrinsicID() != *CommonCallOp)
3371 switch (
II->getIntrinsicID()) {
3372 case Intrinsic::umin:
3373 case Intrinsic::umax:
3374 case Intrinsic::smin:
3375 case Intrinsic::smax: {
3376 auto *Op0 =
II->getOperand(0);
3377 auto *Op1 =
II->getOperand(1);
3385 ShouldBeCallOrBinInst ^= 1;
3387 IntrinsicCostAttributes ICA(
3388 *CommonCallOp,
II->getType(),
3389 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
3396 InstWorklist.push(PrevVecV[1]);
3397 InstWorklist.push(PrevVecV[0]);
3401 if (!ShouldBeCallOrBinInst)
3404 if (!IsFirstCallOrBinInst &&
3405 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3408 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3410 IsFirstCallOrBinInst =
false;
3418 switch (*CommonBinOp) {
3419 case BinaryOperator::Add:
3420 case BinaryOperator::Mul:
3421 case BinaryOperator::Or:
3422 case BinaryOperator::And:
3423 case BinaryOperator::Xor: {
3433 ShouldBeCallOrBinInst ^= 1;
3440 InstWorklist.push(PrevVecV[1]);
3441 InstWorklist.push(PrevVecV[0]);
3445 if (ShouldBeCallOrBinInst ||
3446 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3449 if (SVInst != PrevVecV[1])
3452 ArrayRef<int> CurMask;
3458 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
3459 if (Mask < ShuffleMaskHalf &&
3460 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
3462 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
3467 ShuffleMaskHalf *= 2;
3468 ShuffleMaskHalf -= (ExpectedParityMask & 1);
3469 ExpectedParityMask >>= 1;
3472 SVInst->getType(), SVInst->getType(),
3476 if (!ExpectedParityMask && VisitedCnt == NumLevels)
3479 ShouldBeCallOrBinInst ^= 1;
3486 if (ShouldBeCallOrBinInst)
3489 assert(VecSize != -1 &&
"Expected Match for Vector Size");
3491 Value *FinalVecV = PrevVecV[0];
3503 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
3506 if (NewCost >= OrigCost)
3509 auto *ReducedResult =
3511 replaceValue(
I, *ReducedResult);
3520bool VectorCombine::foldCastFromReductions(Instruction &
I) {
3525 bool TruncOnly =
false;
3528 case Intrinsic::vector_reduce_add:
3529 case Intrinsic::vector_reduce_mul:
3532 case Intrinsic::vector_reduce_and:
3533 case Intrinsic::vector_reduce_or:
3534 case Intrinsic::vector_reduce_xor:
3541 Value *ReductionSrc =
I.getOperand(0);
3553 Type *ResultTy =
I.getType();
3556 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
3566 if (OldCost <= NewCost || !NewCost.
isValid())
3570 II->getIntrinsicID(), {Src});
3572 replaceValue(
I, *NewCast);
3581 constexpr unsigned MaxVisited = 32;
3584 bool FoundReduction =
false;
3587 while (!WorkList.
empty()) {
3589 for (
User *U :
I->users()) {
3591 if (!UI || !Visited.
insert(UI).second)
3593 if (Visited.
size() > MaxVisited)
3599 switch (
II->getIntrinsicID()) {
3600 case Intrinsic::vector_reduce_add:
3601 case Intrinsic::vector_reduce_mul:
3602 case Intrinsic::vector_reduce_and:
3603 case Intrinsic::vector_reduce_or:
3604 case Intrinsic::vector_reduce_xor:
3605 case Intrinsic::vector_reduce_smin:
3606 case Intrinsic::vector_reduce_smax:
3607 case Intrinsic::vector_reduce_umin:
3608 case Intrinsic::vector_reduce_umax:
3609 FoundReduction =
true;
3622 return FoundReduction;
3635bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
3640 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
3648 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
3650 if (!
I ||
I->getOperand(0)->getType() != VT)
3652 return any_of(
I->users(), [&](User *U) {
3653 return U != Op0 && U != Op1 &&
3654 !(isa<ShuffleVectorInst>(U) &&
3655 (InputShuffles.contains(cast<Instruction>(U)) ||
3656 isInstructionTriviallyDead(cast<Instruction>(U))));
3659 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
3660 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
3668 for (
auto *U :
I->users()) {
3670 if (!SV || SV->getType() != VT)
3672 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
3673 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
3680 if (!collectShuffles(Op0) || !collectShuffles(Op1))
3684 if (FromReduction && Shuffles.
size() > 1)
3689 if (!FromReduction) {
3690 for (ShuffleVectorInst *SV : Shuffles) {
3691 for (
auto *U : SV->users()) {
3694 Shuffles.push_back(SSV);
3706 int MaxV1Elt = 0, MaxV2Elt = 0;
3707 unsigned NumElts = VT->getNumElements();
3708 for (ShuffleVectorInst *SVN : Shuffles) {
3709 SmallVector<int>
Mask;
3710 SVN->getShuffleMask(Mask);
3714 Value *SVOp0 = SVN->getOperand(0);
3715 Value *SVOp1 = SVN->getOperand(1);
3720 for (
int &Elem : Mask) {
3726 if (SVOp0 == Op1 && SVOp1 == Op0) {
3730 if (SVOp0 != Op0 || SVOp1 != Op1)
3736 SmallVector<int> ReconstructMask;
3737 for (
unsigned I = 0;
I <
Mask.size();
I++) {
3740 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
3741 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
3742 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
3743 return Mask[
I] ==
A.first;
3752 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
3753 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
3754 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
3768 sort(ReconstructMask);
3769 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
3777 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
3778 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
3790 if (InputShuffles.contains(SSV))
3792 return SV->getMaskValue(M);
3800 std::pair<int, int>
Y) {
3801 int MXA = GetBaseMaskValue(
A,
X.first);
3802 int MYA = GetBaseMaskValue(
A,
Y.first);
3805 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3806 return SortBase(SVI0A,
A,
B);
3808 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3809 return SortBase(SVI1A,
A,
B);
3814 for (
const auto &Mask : OrigReconstructMasks) {
3815 SmallVector<int> ReconstructMask;
3816 for (
int M : Mask) {
3818 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
3819 assert(It !=
V.end() &&
"Expected all entries in Mask");
3820 return std::distance(
V.begin(), It);
3824 else if (M <
static_cast<int>(NumElts)) {
3825 ReconstructMask.
push_back(FindIndex(V1, M));
3827 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
3830 ReconstructMasks.
push_back(std::move(ReconstructMask));
3835 SmallVector<int> V1A, V1B, V2A, V2B;
3836 for (
unsigned I = 0;
I < V1.
size();
I++) {
3837 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
3838 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
3840 for (
unsigned I = 0;
I < V2.
size();
I++) {
3841 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
3842 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
3844 while (V1A.
size() < NumElts) {
3848 while (V2A.
size() < NumElts) {
3860 VT, VT, SV->getShuffleMask(),
CostKind);
3867 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
3868 unsigned MaxVectorSize =
3870 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
3871 if (MaxElementsInVector == 0)
3880 std::set<SmallVector<int, 4>> UniqueShuffles;
3885 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
3886 if (NumFullVectors < 2)
3887 return C + ShuffleCost;
3888 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
3889 unsigned NumUniqueGroups = 0;
3890 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
3893 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
3894 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
3895 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
3896 if (UniqueShuffles.insert(SubShuffle).second)
3897 NumUniqueGroups += 1;
3899 return C + ShuffleCost * NumUniqueGroups / NumGroups;
3905 SmallVector<int, 16>
Mask;
3906 SV->getShuffleMask(Mask);
3907 return AddShuffleMaskAdjustedCost(
C, Mask);
3910 auto AllShufflesHaveSameOperands =
3911 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
3912 if (InputShuffles.size() < 2)
3914 ShuffleVectorInst *FirstSV =
3921 std::next(InputShuffles.begin()), InputShuffles.end(),
3922 [&](Instruction *
I) {
3923 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
3924 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
3933 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
3935 if (AllShufflesHaveSameOperands(InputShuffles)) {
3936 UniqueShuffles.clear();
3937 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
3940 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
3946 FixedVectorType *Op0SmallVT =
3948 FixedVectorType *Op1SmallVT =
3953 UniqueShuffles.clear();
3954 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
3956 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
3958 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
3961 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
3963 <<
" vs CostAfter: " << CostAfter <<
"\n");
3964 if (CostBefore < CostAfter ||
3975 if (InputShuffles.contains(SSV))
3977 return SV->getOperand(
Op);
3981 GetShuffleOperand(SVI0A, 1), V1A);
3984 GetShuffleOperand(SVI0B, 1), V1B);
3987 GetShuffleOperand(SVI1A, 1), V2A);
3990 GetShuffleOperand(SVI1B, 1), V2B);
3995 I->copyIRFlags(Op0,
true);
4000 I->copyIRFlags(Op1,
true);
4002 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
4005 replaceValue(*Shuffles[S], *NSV,
false);
4008 Worklist.pushValue(NSV0A);
4009 Worklist.pushValue(NSV0B);
4010 Worklist.pushValue(NSV1A);
4011 Worklist.pushValue(NSV1B);
4021bool VectorCombine::shrinkType(Instruction &
I) {
4022 Value *ZExted, *OtherOperand;
4028 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
4032 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
4034 if (
I.getOpcode() == Instruction::LShr) {
4051 Instruction::ZExt, BigTy, SmallTy,
4052 TargetTransformInfo::CastContextHint::None,
CostKind);
4057 for (User *U : ZExtOperand->
users()) {
4064 ShrinkCost += ZExtCost;
4079 ShrinkCost += ZExtCost;
4086 Instruction::Trunc, SmallTy, BigTy,
4087 TargetTransformInfo::CastContextHint::None,
CostKind);
4092 if (ShrinkCost > CurrentCost)
4096 Value *Op0 = ZExted;
4099 if (
I.getOperand(0) == OtherOperand)
4106 replaceValue(
I, *NewZExtr);
4112bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
4113 Value *DstVec, *SrcVec;
4114 uint64_t ExtIdx, InsIdx;
4124 if (!DstVecTy || !SrcVecTy ||
4125 SrcVecTy->getElementType() != DstVecTy->getElementType())
4128 unsigned NumDstElts = DstVecTy->getNumElements();
4129 unsigned NumSrcElts = SrcVecTy->getNumElements();
4130 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
4137 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
4138 bool IsExtIdxInBounds = ExtIdx < NumDstElts;
4140 if (NeedDstSrcSwap) {
4142 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4145 Mask[InsIdx] = ExtIdx;
4149 std::iota(
Mask.begin(),
Mask.end(), 0);
4150 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4151 Mask[InsIdx] = NumDstElts;
4153 Mask[InsIdx] = ExtIdx + NumDstElts;
4166 SmallVector<int> ExtToVecMask;
4167 if (!NeedExpOrNarrow) {
4172 nullptr, {DstVec, SrcVec});
4178 if (IsExtIdxInBounds)
4179 ExtToVecMask[ExtIdx] = ExtIdx;
4181 ExtToVecMask[0] = ExtIdx;
4184 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
4188 if (!
Ext->hasOneUse())
4191 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
4192 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4195 if (OldCost < NewCost)
4198 if (NeedExpOrNarrow) {
4199 if (!NeedDstSrcSwap)
4212 replaceValue(
I, *Shuf);
4221bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
4222 const APInt *SplatVal0, *SplatVal1;
4232 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
4233 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
4242 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
4243 << *
I.getType() <<
" is too high.\n");
4247 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
4248 NewSplatVal <<= Width;
4249 NewSplatVal |= SplatVal0->
zext(Width * 2);
4251 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
4259bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
4261 if (!OldLoad || !OldLoad->isSimple())
4268 unsigned const OldNumElements = OldLoadTy->getNumElements();
4274 using IndexRange = std::pair<int, int>;
4275 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
4276 IndexRange OutputRange = IndexRange(OldNumElements, -1);
4277 for (llvm::Use &Use :
I.uses()) {
4279 User *Shuffle =
Use.getUser();
4284 return std::nullopt;
4291 for (
int Index : Mask) {
4292 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
4293 OutputRange.first = std::min(Index, OutputRange.first);
4294 OutputRange.second = std::max(Index, OutputRange.second);
4299 if (OutputRange.second < OutputRange.first)
4300 return std::nullopt;
4306 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
4307 unsigned const NewNumElements = Indices->second + 1u;
4311 if (NewNumElements < OldNumElements) {
4316 Type *ElemTy = OldLoadTy->getElementType();
4318 Value *PtrOp = OldLoad->getPointerOperand();
4321 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
4322 OldLoad->getPointerAddressSpace(),
CostKind);
4325 OldLoad->getPointerAddressSpace(),
CostKind);
4327 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
4329 unsigned const MaxIndex = NewNumElements * 2u;
4331 for (llvm::Use &Use :
I.uses()) {
4333 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
4339 for (
int Index : OldMask) {
4340 if (Index >=
static_cast<int>(MaxIndex))
4354 dbgs() <<
"Found a load used only by shufflevector instructions: "
4355 <<
I <<
"\n OldCost: " << OldCost
4356 <<
" vs NewCost: " << NewCost <<
"\n");
4358 if (OldCost < NewCost || !NewCost.
isValid())
4364 NewLoad->copyMetadata(
I);
4367 for (UseEntry &Use : NewUses) {
4368 ShuffleVectorInst *Shuffle =
Use.first;
4369 std::vector<int> &NewMask =
Use.second;
4376 replaceValue(*Shuffle, *NewShuffle,
false);
4389bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
4391 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
4395 ArrayRef<int> Mask0;
4396 ArrayRef<int> Mask1;
4409 auto const InputNumElements = InputVT->getNumElements();
4411 if (InputNumElements >= ResultVT->getNumElements())
4416 SmallVector<int, 16> NewMask;
4419 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
4420 if (
M0 >= 0 &&
M1 >= 0)
4422 else if (
M0 == -1 &&
M1 == -1)
4435 int MaskOffset = NewMask[0
u];
4436 unsigned Index = (InputNumElements - MaskOffset) % InputNumElements;
4439 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
4453 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4456 if (NewCost > OldCost)
4468 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
4470 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
4476 replaceValue(*Phi, *NewShuf1);
4482bool VectorCombine::run() {
4496 auto Opcode =
I.getOpcode();
4504 if (IsFixedVectorType) {
4506 case Instruction::InsertElement:
4507 if (vectorizeLoadInsert(
I))
4510 case Instruction::ShuffleVector:
4511 if (widenSubvectorLoad(
I))
4522 if (scalarizeOpOrCmp(
I))
4524 if (scalarizeLoadExtract(
I))
4526 if (scalarizeExtExtract(
I))
4528 if (scalarizeVPIntrinsic(
I))
4530 if (foldInterleaveIntrinsics(
I))
4534 if (Opcode == Instruction::Store)
4535 if (foldSingleElementStore(
I))
4539 if (TryEarlyFoldsOnly)
4546 if (IsFixedVectorType) {
4548 case Instruction::InsertElement:
4549 if (foldInsExtFNeg(
I))
4551 if (foldInsExtBinop(
I))
4553 if (foldInsExtVectorToShuffle(
I))
4556 case Instruction::ShuffleVector:
4557 if (foldPermuteOfBinops(
I))
4559 if (foldShuffleOfBinops(
I))
4561 if (foldShuffleOfSelects(
I))
4563 if (foldShuffleOfCastops(
I))
4565 if (foldShuffleOfShuffles(
I))
4567 if (foldShuffleOfIntrinsics(
I))
4569 if (foldSelectShuffle(
I))
4571 if (foldShuffleToIdentity(
I))
4574 case Instruction::Load:
4575 if (shrinkLoadForShuffles(
I))
4578 case Instruction::BitCast:
4579 if (foldBitcastShuffle(
I))
4582 case Instruction::And:
4583 case Instruction::Or:
4584 case Instruction::Xor:
4585 if (foldBitOpOfCastops(
I))
4587 if (foldBitOpOfCastConstant(
I))
4590 case Instruction::PHI:
4591 if (shrinkPhiOfShuffles(
I))
4601 case Instruction::Call:
4602 if (foldShuffleFromReductions(
I))
4604 if (foldCastFromReductions(
I))
4607 case Instruction::ExtractElement:
4608 if (foldShuffleChainsToReduce(
I))
4611 case Instruction::ICmp:
4612 case Instruction::FCmp:
4613 if (foldExtractExtract(
I))
4616 case Instruction::Or:
4617 if (foldConcatOfBoolMasks(
I))
4622 if (foldExtractExtract(
I))
4624 if (foldExtractedCmps(
I))
4626 if (foldBinopOfReductions(
I))
4635 bool MadeChange =
false;
4636 for (BasicBlock &BB :
F) {
4648 if (!
I->isDebugOrPseudoInst())
4649 MadeChange |= FoldInst(*
I);
4656 while (!Worklist.isEmpty()) {
4666 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.