43#define DEBUG_TYPE "vector-combine"
49STATISTIC(NumVecLoad,
"Number of vector loads formed");
50STATISTIC(NumVecCmp,
"Number of vector compares formed");
51STATISTIC(NumVecBO,
"Number of vector binops formed");
52STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
53STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
54STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
55STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
56STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
60 cl::desc(
"Disable all vector combine transforms"));
64 cl::desc(
"Disable binop extract to shuffle transforms"));
68 cl::desc(
"Max number of instructions to scan for vector combining."));
70static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
78 bool TryEarlyFoldsOnly)
81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
88 const TargetTransformInfo &TTI;
89 const DominatorTree &DT;
94 const SimplifyQuery SQ;
98 bool TryEarlyFoldsOnly;
100 InstructionWorklist Worklist;
109 bool vectorizeLoadInsert(Instruction &
I);
110 bool widenSubvectorLoad(Instruction &
I);
111 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
112 ExtractElementInst *Ext1,
113 unsigned PreferredExtractIndex)
const;
114 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
115 const Instruction &
I,
116 ExtractElementInst *&ConvertToShuffle,
117 unsigned PreferredExtractIndex);
120 bool foldExtractExtract(Instruction &
I);
121 bool foldInsExtFNeg(Instruction &
I);
122 bool foldInsExtBinop(Instruction &
I);
123 bool foldInsExtVectorToShuffle(Instruction &
I);
124 bool foldBitOpOfCastops(Instruction &
I);
125 bool foldBitOpOfCastConstant(Instruction &
I);
126 bool foldBitcastShuffle(Instruction &
I);
127 bool scalarizeOpOrCmp(Instruction &
I);
128 bool scalarizeVPIntrinsic(Instruction &
I);
129 bool foldExtractedCmps(Instruction &
I);
130 bool foldBinopOfReductions(Instruction &
I);
131 bool foldSingleElementStore(Instruction &
I);
132 bool scalarizeLoadExtract(Instruction &
I);
133 bool scalarizeExtExtract(Instruction &
I);
134 bool foldConcatOfBoolMasks(Instruction &
I);
135 bool foldPermuteOfBinops(Instruction &
I);
136 bool foldShuffleOfBinops(Instruction &
I);
137 bool foldShuffleOfSelects(Instruction &
I);
138 bool foldShuffleOfCastops(Instruction &
I);
139 bool foldShuffleOfShuffles(Instruction &
I);
140 bool foldShuffleOfIntrinsics(Instruction &
I);
141 bool foldShuffleToIdentity(Instruction &
I);
142 bool foldShuffleFromReductions(Instruction &
I);
143 bool foldShuffleChainsToReduce(Instruction &
I);
144 bool foldCastFromReductions(Instruction &
I);
145 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
146 bool foldInterleaveIntrinsics(Instruction &
I);
147 bool shrinkType(Instruction &
I);
148 bool shrinkLoadForShuffles(Instruction &
I);
149 bool shrinkPhiOfShuffles(Instruction &
I);
151 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
157 Worklist.pushUsersToWorkList(*NewI);
158 Worklist.pushValue(NewI);
175 SmallPtrSet<Value *, 4> Visited;
180 OpI,
nullptr,
nullptr, [&](
Value *V) {
185 NextInst = NextInst->getNextNode();
190 Worklist.pushUsersToWorkList(*OpI);
191 Worklist.pushValue(OpI);
211 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
212 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
218 Type *ScalarTy = Load->getType()->getScalarType();
220 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
221 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
228bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
254 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
257 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
258 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
259 unsigned OffsetEltIndex = 0;
267 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
268 APInt
Offset(OffsetBitWidth, 0);
278 uint64_t ScalarSizeInBytes = ScalarSize / 8;
279 if (
Offset.urem(ScalarSizeInBytes) != 0)
283 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
284 if (OffsetEltIndex >= MinVecNumElts)
301 unsigned AS =
Load->getPointerAddressSpace();
320 unsigned OutputNumElts = Ty->getNumElements();
322 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
323 Mask[0] = OffsetEltIndex;
330 if (OldCost < NewCost || !NewCost.
isValid())
341 replaceValue(
I, *VecLd);
349bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
352 if (!Shuf->isIdentityWithPadding())
358 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
359 return M >= (int)(NumOpElts);
370 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
378 unsigned AS =
Load->getPointerAddressSpace();
393 if (OldCost < NewCost || !NewCost.
isValid())
400 replaceValue(
I, *VecLd);
407ExtractElementInst *VectorCombine::getShuffleExtract(
408 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
412 assert(Index0C && Index1C &&
"Expected constant extract indexes");
414 unsigned Index0 = Index0C->getZExtValue();
415 unsigned Index1 = Index1C->getZExtValue();
418 if (Index0 == Index1)
442 if (PreferredExtractIndex == Index0)
444 if (PreferredExtractIndex == Index1)
448 return Index0 > Index1 ? Ext0 : Ext1;
456bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
457 ExtractElementInst *Ext1,
458 const Instruction &
I,
459 ExtractElementInst *&ConvertToShuffle,
460 unsigned PreferredExtractIndex) {
463 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
465 unsigned Opcode =
I.getOpcode();
478 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
479 "Expected a compare");
489 unsigned Ext0Index = Ext0IndexC->getZExtValue();
490 unsigned Ext1Index = Ext1IndexC->getZExtValue();
504 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
505 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
506 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
511 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
516 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
518 OldCost = CheapExtractCost + ScalarOpCost;
519 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
523 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
524 NewCost = VectorOpCost + CheapExtractCost +
529 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
530 if (ConvertToShuffle) {
542 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
544 ShuffleMask[BestInsIndex] = BestExtIndex;
546 VecTy, VecTy, ShuffleMask,
CostKind, 0,
547 nullptr, {ConvertToShuffle});
550 VecTy, VecTy, {},
CostKind, 0,
nullptr,
558 return OldCost < NewCost;
570 ShufMask[NewIndex] = OldIndex;
571 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
623 V1,
"foldExtExtBinop");
628 VecBOInst->copyIRFlags(&
I);
634bool VectorCombine::foldExtractExtract(Instruction &
I) {
665 ExtractElementInst *ExtractToChange;
666 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
672 if (ExtractToChange) {
673 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
678 if (ExtractToChange == Ext0)
687 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
688 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
691 replaceValue(
I, *NewExt);
697bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
717 if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
721 unsigned NumElts = VecTy->getNumElements();
722 if (Index >= NumElts)
728 SmallVector<int>
Mask(NumElts);
729 std::iota(
Mask.begin(),
Mask.end(), 0);
746 bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
749 SmallVector<int> SrcMask;
754 VecTy, SrcVecTy, SrcMask,
CostKind);
757 if (NewCost > OldCost)
772 replaceValue(
I, *NewShuf);
778bool VectorCombine::foldInsExtBinop(Instruction &
I) {
779 BinaryOperator *VecBinOp, *SclBinOp;
811 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
813 if (NewCost > OldCost)
824 NewInst->copyIRFlags(VecBinOp);
825 NewInst->andIRFlags(SclBinOp);
830 replaceValue(
I, *NewBO);
836bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
839 if (!BinOp || !BinOp->isBitwiseLogicOp())
845 if (!LHSCast || !RHSCast) {
846 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
852 if (CastOpcode != RHSCast->getOpcode())
856 switch (CastOpcode) {
857 case Instruction::BitCast:
858 case Instruction::Trunc:
859 case Instruction::SExt:
860 case Instruction::ZExt:
866 Value *LHSSrc = LHSCast->getOperand(0);
867 Value *RHSSrc = RHSCast->getOperand(0);
873 auto *SrcTy = LHSSrc->
getType();
874 auto *DstTy =
I.getType();
877 if (CastOpcode != Instruction::BitCast &&
882 if (!SrcTy->getScalarType()->isIntegerTy() ||
883 !DstTy->getScalarType()->isIntegerTy())
898 LHSCastCost + RHSCastCost;
909 if (!LHSCast->hasOneUse())
910 NewCost += LHSCastCost;
911 if (!RHSCast->hasOneUse())
912 NewCost += RHSCastCost;
915 <<
" NewCost=" << NewCost <<
"\n");
917 if (NewCost > OldCost)
922 BinOp->getName() +
".inner");
924 NewBinOp->copyIRFlags(BinOp);
938 replaceValue(
I, *Result);
947bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
963 switch (CastOpcode) {
964 case Instruction::BitCast:
970 Value *LHSSrc = LHSCast->getOperand(0);
972 auto *SrcTy = LHSSrc->
getType();
973 auto *DstTy =
I.getType();
976 if (CastOpcode != Instruction::BitCast &&
981 if (!SrcTy->getScalarType()->isIntegerTy() ||
982 !DstTy->getScalarType()->isIntegerTy())
986 PreservedCastFlags RHSFlags;
1011 if (!LHSCast->hasOneUse())
1012 NewCost += LHSCastCost;
1014 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1015 <<
" NewCost=" << NewCost <<
"\n");
1017 if (NewCost > OldCost)
1022 LHSSrc, InvC,
I.getName() +
".inner");
1024 NewBinOp->copyIRFlags(&
I);
1034 replaceValue(
I, *Result);
1041bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1055 if (!DestTy || !SrcTy)
1058 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1059 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1060 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1070 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1071 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1075 SmallVector<int, 16> NewMask;
1076 if (DestEltSize <= SrcEltSize) {
1079 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
1080 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1085 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
1086 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1093 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1094 auto *NewShuffleTy =
1096 auto *OldShuffleTy =
1098 unsigned NumOps = IsUnary ? 1 : 2;
1108 TargetTransformInfo::CastContextHint::None,
1113 TargetTransformInfo::CastContextHint::None,
1116 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1117 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1119 if (NewCost > OldCost || !NewCost.
isValid())
1127 replaceValue(
I, *Shuf);
1134bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1148 if (!ScalarOp0 || !ScalarOp1)
1156 auto IsAllTrueMask = [](
Value *MaskVal) {
1159 return ConstValue->isAllOnesValue();
1173 SmallVector<int>
Mask;
1175 Mask.resize(FVTy->getNumElements(), 0);
1184 Args.push_back(
V->getType());
1185 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1190 std::optional<unsigned> FunctionalOpcode =
1192 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1193 if (!FunctionalOpcode) {
1202 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1212 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1214 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1217 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1220 if (OldCost < NewCost || !NewCost.
isValid())
1231 bool SafeToSpeculate;
1237 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1238 if (!SafeToSpeculate &&
1245 {ScalarOp0, ScalarOp1})
1247 ScalarOp0, ScalarOp1);
1256bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1261 if (!UO && !BO && !CI && !
II)
1269 if (Arg->getType() !=
II->getType() &&
1279 for (User *U :
I.users())
1286 std::optional<uint64_t>
Index;
1288 auto Ops =
II ?
II->args() :
I.operands();
1292 uint64_t InsIdx = 0;
1297 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1303 else if (InsIdx != *Index)
1320 if (!
Index.has_value())
1324 Type *ScalarTy = VecTy->getScalarType();
1325 assert(VecTy->isVectorTy() &&
1328 "Unexpected types for insert element into binop or cmp");
1330 unsigned Opcode =
I.getOpcode();
1338 }
else if (UO || BO) {
1342 IntrinsicCostAttributes ScalarICA(
1343 II->getIntrinsicID(), ScalarTy,
1346 IntrinsicCostAttributes VectorICA(
1347 II->getIntrinsicID(), VecTy,
1354 Value *NewVecC =
nullptr;
1356 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1359 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1361 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1375 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1377 II->getIntrinsicID(), Idx, &
TTI)))
1380 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1381 OldCost += InsertCost;
1382 NewCost += !
Op->hasOneUse() * InsertCost;
1386 if (OldCost < NewCost || !NewCost.
isValid())
1396 ++NumScalarIntrinsic;
1406 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1412 Scalar->setName(
I.getName() +
".scalar");
1417 ScalarInst->copyIRFlags(&
I);
1420 replaceValue(
I, *Insert);
1427bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1432 if (!BI || !
I.getType()->isIntegerTy(1))
1437 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1440 CmpPredicate
P0,
P1;
1452 uint64_t Index0, Index1;
1459 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1462 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1463 "Unknown ExtractElementInst");
1468 unsigned CmpOpcode =
1483 Ext0Cost + Ext1Cost + CmpCost * 2 +
1489 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1490 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1495 ShufMask[CheapIndex] = ExpensiveIndex;
1500 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1501 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1506 if (OldCost < NewCost || !NewCost.
isValid())
1516 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1517 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1520 replaceValue(
I, *NewExt);
1533 unsigned ReductionOpc =
1539 CostBeforeReduction =
1540 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1542 CostAfterReduction =
1543 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1547 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1553 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1560 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1563 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1565 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1568 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1569 CostAfterReduction =
TTI.getMulAccReductionCost(
1570 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1573 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1577bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1580 if (BinOpOpc == Instruction::Sub)
1581 ReductionIID = Intrinsic::vector_reduce_add;
1585 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1590 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1591 return II->getArgOperand(0);
1595 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1598 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1607 unsigned ReductionOpc =
1620 CostOfRedOperand0 + CostOfRedOperand1 +
1623 if (NewCost >= OldCost || !NewCost.
isValid())
1627 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1630 if (BinOpOpc == Instruction::Or)
1631 VectorBO = Builder.
CreateOr(V0, V1,
"",
1637 replaceValue(
I, *Rdx);
1645 unsigned NumScanned = 0;
1646 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1655class ScalarizationResult {
1656 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1661 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1662 : Status(Status), ToFreeze(ToFreeze) {}
1665 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1666 ~ScalarizationResult() {
1667 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1670 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1671 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1672 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1673 return {StatusTy::SafeWithFreeze, ToFreeze};
1677 bool isSafe()
const {
return Status == StatusTy::Safe; }
1679 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1682 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1687 Status = StatusTy::Unsafe;
1691 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1692 assert(isSafeWithFreeze() &&
1693 "should only be used when freezing is required");
1695 "UserI must be a user of ToFreeze");
1696 IRBuilder<>::InsertPointGuard Guard(Builder);
1701 if (
U.get() == ToFreeze)
1718 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1722 if (
C->getValue().ult(NumElements))
1723 return ScalarizationResult::safe();
1724 return ScalarizationResult::unsafe();
1729 return ScalarizationResult::unsafe();
1731 APInt Zero(IntWidth, 0);
1732 APInt MaxElts(IntWidth, NumElements);
1738 true, &AC, CtxI, &DT)))
1739 return ScalarizationResult::safe();
1740 return ScalarizationResult::unsafe();
1753 if (ValidIndices.
contains(IdxRange))
1754 return ScalarizationResult::safeWithFreeze(IdxBase);
1755 return ScalarizationResult::unsafe();
1767 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1779bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1791 if (!
match(
SI->getValueOperand(),
1798 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1801 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1802 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1803 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1807 if (ScalarizableIdx.isUnsafe() ||
1814 Worklist.
push(Load);
1816 if (ScalarizableIdx.isSafeWithFreeze())
1819 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1820 {ConstantInt::get(Idx->getType(), 0), Idx});
1824 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1827 replaceValue(
I, *NSI);
1836bool VectorCombine::scalarizeLoadExtract(Instruction &
I) {
1846 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
1851 LI->getPointerAddressSpace(),
CostKind);
1855 unsigned NumInstChecked = 0;
1856 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
1859 for (
auto &Pair : NeedFreeze)
1860 Pair.second.discard();
1866 for (User *U : LI->users()) {
1868 if (!UI || UI->getParent() != LI->getParent())
1873 if (UI->use_empty())
1879 for (Instruction &
I :
1880 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1887 LastCheckedInst = UI;
1892 if (ScalarIdx.isUnsafe())
1894 if (ScalarIdx.isSafeWithFreeze()) {
1895 NeedFreeze.try_emplace(UI, ScalarIdx);
1896 ScalarIdx.discard();
1902 Index ?
Index->getZExtValue() : -1);
1911 <<
"\n LoadExtractCost: " << OriginalCost
1912 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
1914 if (ScalarizedCost >= OriginalCost)
1921 Type *ElemType = VecTy->getElementType();
1924 for (User *U : LI->users()) {
1926 Value *Idx = EI->getIndexOperand();
1929 auto It = NeedFreeze.find(EI);
1930 if (It != NeedFreeze.end())
1937 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
1939 Align ScalarOpAlignment =
1941 NewLoad->setAlignment(ScalarOpAlignment);
1944 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
1945 AAMDNodes OldAAMD = LI->getAAMetadata();
1949 replaceValue(*EI, *NewLoad,
false);
1952 FailureGuard.release();
1956bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
1971 Type *ScalarDstTy = DstTy->getElementType();
1972 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
1978 unsigned ExtCnt = 0;
1979 bool ExtLane0 =
false;
1980 for (User *U :
Ext->users()) {
1994 Instruction::And, ScalarDstTy,
CostKind,
1997 (ExtCnt - ExtLane0) *
1999 Instruction::LShr, ScalarDstTy,
CostKind,
2002 if (ScalarCost > VectorCost)
2005 Value *ScalarV =
Ext->getOperand(0);
2012 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2013 uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
2014 for (User *U :
Ext->users()) {
2020 U->replaceAllUsesWith(
And);
2028bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2029 Type *Ty =
I.getType();
2034 if (
DL->isBigEndian())
2045 uint64_t ShAmtX = 0;
2053 uint64_t ShAmtY = 0;
2061 if (ShAmtX > ShAmtY) {
2069 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2070 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2075 MaskTy->getNumElements() != ShAmtDiff ||
2076 MaskTy->getNumElements() > (
BitWidth / 2))
2081 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2082 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2085 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2102 if (Ty != ConcatIntTy)
2108 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2109 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2112 if (NewCost > OldCost)
2122 if (Ty != ConcatIntTy) {
2132 replaceValue(
I, *Result);
2138bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2139 BinaryOperator *BinOp;
2140 ArrayRef<int> OuterMask;
2149 Value *Op00, *Op01, *Op10, *Op11;
2150 ArrayRef<int> Mask0, Mask1;
2157 if (!Match0 && !Match1)
2170 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2173 unsigned NumSrcElts = BinOpTy->getNumElements();
2178 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2182 SmallVector<int> NewMask0, NewMask1;
2183 for (
int M : OuterMask) {
2184 if (M < 0 || M >= (
int)NumSrcElts) {
2188 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2189 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2193 unsigned NumOpElts = Op0Ty->getNumElements();
2194 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2195 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2197 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2198 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2205 BinOpTy, OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I);
2221 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2225 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2227 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2228 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2232 if (NewCost > OldCost)
2243 NewInst->copyIRFlags(BinOp);
2247 replaceValue(
I, *NewBO);
2253bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2254 ArrayRef<int> OldMask;
2261 if (
LHS->getOpcode() !=
RHS->getOpcode())
2265 bool IsCommutative =
false;
2274 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2285 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2288 unsigned NumSrcElts = BinOpTy->getNumElements();
2291 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2294 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2295 if (M >= (
int)NumSrcElts)
2299 SmallVector<int> NewMask0(OldMask);
2307 SmallVector<int> NewMask1(OldMask);
2330 ArrayRef<int> InnerMask;
2332 m_Mask(InnerMask)))) &&
2335 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2347 bool ReducedInstCount =
false;
2348 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2349 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2350 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2351 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2353 auto *ShuffleCmpTy =
2370 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2377 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2385 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2389 NewInst->copyIRFlags(
LHS);
2390 NewInst->andIRFlags(
RHS);
2395 replaceValue(
I, *NewBO);
2402bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2404 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2413 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2419 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2420 ((SI0FOp !=
nullptr) &&
2421 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2427 auto SelOp = Instruction::Select;
2434 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2438 Mask,
CostKind, 0,
nullptr, {C1, C2});
2444 toVectorTy(Type::getInt1Ty(
I.getContext()), DstVecTy->getNumElements()));
2449 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2451 if (NewCost > OldCost)
2460 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2461 SI0FOp->getFastMathFlags());
2463 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2468 replaceValue(
I, *NewSel);
2474bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2476 ArrayRef<int> OldMask;
2486 if (C0->getSrcTy() != C1->getSrcTy())
2490 if (Opcode != C1->getOpcode()) {
2492 Opcode = Instruction::SExt;
2500 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2503 unsigned NumSrcElts = CastSrcTy->getNumElements();
2504 unsigned NumDstElts = CastDstTy->getNumElements();
2505 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2506 "Only bitcasts expected to alter src/dst element counts");
2510 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2511 (NumDstElts % NumSrcElts) != 0)
2514 SmallVector<int, 16> NewMask;
2515 if (NumSrcElts >= NumDstElts) {
2518 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2519 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2524 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2525 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2530 auto *NewShuffleDstTy =
2543 CastDstTy, OldMask,
CostKind, 0,
nullptr, {}, &
I);
2556 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2558 if (NewCost > OldCost)
2562 C1->getOperand(0), NewMask);
2567 NewInst->copyIRFlags(C0);
2568 NewInst->andIRFlags(C1);
2572 replaceValue(
I, *Cast);
2582bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2583 ArrayRef<int> OuterMask;
2584 Value *OuterV0, *OuterV1;
2589 ArrayRef<int> InnerMask0, InnerMask1;
2590 Value *X0, *X1, *Y0, *Y1;
2595 if (!Match0 && !Match1)
2600 SmallVector<int, 16> PoisonMask1;
2605 InnerMask1 = PoisonMask1;
2609 X0 = Match0 ? X0 : OuterV0;
2610 Y0 = Match0 ? Y0 : OuterV0;
2611 X1 = Match1 ? X1 : OuterV1;
2612 Y1 = Match1 ? Y1 : OuterV1;
2616 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2620 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2621 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2626 SmallVector<int, 16> NewMask(OuterMask);
2627 Value *NewX =
nullptr, *NewY =
nullptr;
2628 for (
int &M : NewMask) {
2629 Value *Src =
nullptr;
2630 if (0 <= M && M < (
int)NumImmElts) {
2634 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2635 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2637 }
else if (M >= (
int)NumImmElts) {
2642 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2643 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2647 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2656 if (!NewX || NewX == Src) {
2660 if (!NewY || NewY == Src) {
2676 replaceValue(
I, *NewX);
2693 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2699 nullptr, {NewX, NewY});
2701 NewCost += InnerCost0;
2703 NewCost += InnerCost1;
2706 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2708 if (NewCost > OldCost)
2712 replaceValue(
I, *Shuf);
2718bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
2720 ArrayRef<int> OldMask;
2731 if (IID != II1->getIntrinsicID())
2736 if (!ShuffleDstTy || !II0Ty)
2742 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2744 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2751 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
2755 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
2757 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2761 ShuffleDstTy->getNumElements());
2767 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
2771 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2774 if (NewCost > OldCost)
2778 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
2783 II1->getArgOperand(
I), OldMask);
2791 NewInst->copyIRFlags(II0);
2792 NewInst->andIRFlags(II1);
2795 replaceValue(
I, *NewIntrinsic);
2805 int M = SV->getMaskValue(Lane);
2808 if (
static_cast<unsigned>(M) < NumElts) {
2809 U = &SV->getOperandUse(0);
2812 U = &SV->getOperandUse(1);
2823 auto [U, Lane] = IL;
2837 unsigned NumElts = Ty->getNumElements();
2838 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2844 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2850 unsigned NumSlices = Item.
size() / NumElts;
2855 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2856 Use *SliceV = Item[Slice * NumElts].first;
2857 if (!SliceV || SliceV->get()->
getType() != Ty)
2859 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2860 auto [V, Lane] = Item[Slice * NumElts + Elt];
2861 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2874 auto [FrontU, FrontLane] = Item.
front();
2876 if (IdentityLeafs.
contains(FrontU)) {
2877 return FrontU->get();
2881 return Builder.CreateShuffleVector(FrontU->get(), Mask);
2883 if (ConcatLeafs.
contains(FrontU)) {
2887 for (
unsigned S = 0; S < Values.
size(); ++S)
2888 Values[S] = Item[S * NumElts].first->get();
2890 while (Values.
size() > 1) {
2893 std::iota(Mask.begin(), Mask.end(), 0);
2895 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2897 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
2905 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2907 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
2910 Ops[Idx] =
II->getOperand(Idx);
2914 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2919 for (
const auto &Lane : Item)
2932 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
2942 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
2947 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
2961bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
2963 if (!Ty ||
I.use_empty())
2967 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
2972 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
2973 unsigned NumVisited = 0;
2975 while (!Worklist.
empty()) {
2980 auto [FrontU, FrontLane] = Item.
front();
2988 return X->getType() ==
Y->getType() &&
2993 if (FrontLane == 0 &&
2995 Ty->getNumElements() &&
2998 return !
E.value().first || (IsEquiv(
E.value().first->get(), FrontV) &&
2999 E.value().second == (int)
E.index());
3001 IdentityLeafs.
insert(FrontU);
3006 C &&
C->getSplatValue() &&
3014 SplatLeafs.
insert(FrontU);
3019 auto [FrontU, FrontLane] = Item.
front();
3020 auto [
U, Lane] = IL;
3021 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3023 SplatLeafs.
insert(FrontU);
3029 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3033 Value *
V = IL.first->get();
3039 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3042 if (CI->getSrcTy()->getScalarType() !=
3047 SI->getOperand(0)->getType() !=
3054 II->getIntrinsicID() ==
3056 !
II->hasOperandBundles());
3063 BO && BO->isIntDivRem())
3068 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3069 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3076 if (DstTy && SrcTy &&
3077 SrcTy->getNumElements() == DstTy->getNumElements()) {
3088 !
II->hasOperandBundles()) {
3089 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3108 ConcatLeafs.
insert(FrontU);
3115 if (NumVisited <= 1)
3118 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3124 ConcatLeafs, Builder, &
TTI);
3125 replaceValue(
I, *V);
3132bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3136 switch (
II->getIntrinsicID()) {
3137 case Intrinsic::vector_reduce_add:
3138 case Intrinsic::vector_reduce_mul:
3139 case Intrinsic::vector_reduce_and:
3140 case Intrinsic::vector_reduce_or:
3141 case Intrinsic::vector_reduce_xor:
3142 case Intrinsic::vector_reduce_smin:
3143 case Intrinsic::vector_reduce_smax:
3144 case Intrinsic::vector_reduce_umin:
3145 case Intrinsic::vector_reduce_umax:
3154 std::queue<Value *> Worklist;
3155 SmallPtrSet<Value *, 4> Visited;
3156 ShuffleVectorInst *Shuffle =
nullptr;
3160 while (!Worklist.empty()) {
3161 Value *CV = Worklist.front();
3173 if (CI->isBinaryOp()) {
3174 for (
auto *
Op : CI->operand_values())
3178 if (Shuffle && Shuffle != SV)
3195 for (
auto *V : Visited)
3196 for (
auto *U :
V->users())
3197 if (!Visited.contains(U) && U != &
I)
3200 FixedVectorType *VecType =
3204 FixedVectorType *ShuffleInputType =
3206 if (!ShuffleInputType)
3212 SmallVector<int> ConcatMask;
3214 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3215 bool UsesSecondVec =
3216 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3223 ShuffleInputType, ConcatMask,
CostKind);
3225 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3227 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3229 bool MadeChanges =
false;
3230 if (NewCost < OldCost) {
3234 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3235 replaceValue(*Shuffle, *NewShuffle);
3241 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3287bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3289 std::queue<Value *> InstWorklist;
3293 std::optional<unsigned int> CommonCallOp = std::nullopt;
3294 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3296 bool IsFirstCallOrBinInst =
true;
3297 bool ShouldBeCallOrBinInst =
true;
3303 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3313 int64_t
VecSize = FVT->getNumElements();
3319 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3320 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3330 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3331 Cur = (Cur + 1) / 2, --
Mask) {
3333 ExpectedParityMask |= (1ll <<
Mask);
3336 InstWorklist.push(VecOpEE);
3338 while (!InstWorklist.empty()) {
3339 Value *CI = InstWorklist.front();
3343 if (!ShouldBeCallOrBinInst)
3346 if (!IsFirstCallOrBinInst &&
3347 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3352 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3354 IsFirstCallOrBinInst =
false;
3357 CommonCallOp =
II->getIntrinsicID();
3358 if (
II->getIntrinsicID() != *CommonCallOp)
3361 switch (
II->getIntrinsicID()) {
3362 case Intrinsic::umin:
3363 case Intrinsic::umax:
3364 case Intrinsic::smin:
3365 case Intrinsic::smax: {
3366 auto *Op0 =
II->getOperand(0);
3367 auto *Op1 =
II->getOperand(1);
3375 ShouldBeCallOrBinInst ^= 1;
3377 IntrinsicCostAttributes ICA(
3378 *CommonCallOp,
II->getType(),
3379 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
3386 InstWorklist.push(PrevVecV[1]);
3387 InstWorklist.push(PrevVecV[0]);
3391 if (!ShouldBeCallOrBinInst)
3394 if (!IsFirstCallOrBinInst &&
3395 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3398 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3400 IsFirstCallOrBinInst =
false;
3408 switch (*CommonBinOp) {
3409 case BinaryOperator::Add:
3410 case BinaryOperator::Mul:
3411 case BinaryOperator::Or:
3412 case BinaryOperator::And:
3413 case BinaryOperator::Xor: {
3423 ShouldBeCallOrBinInst ^= 1;
3430 InstWorklist.push(PrevVecV[1]);
3431 InstWorklist.push(PrevVecV[0]);
3435 if (ShouldBeCallOrBinInst ||
3436 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3439 if (SVInst != PrevVecV[1])
3442 ArrayRef<int> CurMask;
3448 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
3449 if (Mask < ShuffleMaskHalf &&
3450 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
3452 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
3457 ShuffleMaskHalf *= 2;
3458 ShuffleMaskHalf -= (ExpectedParityMask & 1);
3459 ExpectedParityMask >>= 1;
3462 SVInst->getType(), SVInst->getType(),
3466 if (!ExpectedParityMask && VisitedCnt == NumLevels)
3469 ShouldBeCallOrBinInst ^= 1;
3476 if (ShouldBeCallOrBinInst)
3479 assert(VecSize != -1 &&
"Expected Match for Vector Size");
3481 Value *FinalVecV = PrevVecV[0];
3493 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
3496 if (NewCost >= OrigCost)
3499 auto *ReducedResult =
3501 replaceValue(
I, *ReducedResult);
3510bool VectorCombine::foldCastFromReductions(Instruction &
I) {
3515 bool TruncOnly =
false;
3518 case Intrinsic::vector_reduce_add:
3519 case Intrinsic::vector_reduce_mul:
3522 case Intrinsic::vector_reduce_and:
3523 case Intrinsic::vector_reduce_or:
3524 case Intrinsic::vector_reduce_xor:
3531 Value *ReductionSrc =
I.getOperand(0);
3543 Type *ResultTy =
I.getType();
3546 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
3556 if (OldCost <= NewCost || !NewCost.
isValid())
3560 II->getIntrinsicID(), {Src});
3562 replaceValue(
I, *NewCast);
3571 constexpr unsigned MaxVisited = 32;
3574 bool FoundReduction =
false;
3577 while (!WorkList.
empty()) {
3579 for (
User *U :
I->users()) {
3581 if (!UI || !Visited.
insert(UI).second)
3583 if (Visited.
size() > MaxVisited)
3589 switch (
II->getIntrinsicID()) {
3590 case Intrinsic::vector_reduce_add:
3591 case Intrinsic::vector_reduce_mul:
3592 case Intrinsic::vector_reduce_and:
3593 case Intrinsic::vector_reduce_or:
3594 case Intrinsic::vector_reduce_xor:
3595 case Intrinsic::vector_reduce_smin:
3596 case Intrinsic::vector_reduce_smax:
3597 case Intrinsic::vector_reduce_umin:
3598 case Intrinsic::vector_reduce_umax:
3599 FoundReduction =
true;
3612 return FoundReduction;
3625bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
3630 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
3638 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
3640 if (!
I ||
I->getOperand(0)->getType() != VT)
3642 return any_of(
I->users(), [&](User *U) {
3643 return U != Op0 && U != Op1 &&
3644 !(isa<ShuffleVectorInst>(U) &&
3645 (InputShuffles.contains(cast<Instruction>(U)) ||
3646 isInstructionTriviallyDead(cast<Instruction>(U))));
3649 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
3650 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
3658 for (
auto *U :
I->users()) {
3660 if (!SV || SV->getType() != VT)
3662 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
3663 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
3670 if (!collectShuffles(Op0) || !collectShuffles(Op1))
3674 if (FromReduction && Shuffles.
size() > 1)
3679 if (!FromReduction) {
3680 for (ShuffleVectorInst *SV : Shuffles) {
3681 for (
auto *U : SV->users()) {
3684 Shuffles.push_back(SSV);
3696 int MaxV1Elt = 0, MaxV2Elt = 0;
3697 unsigned NumElts = VT->getNumElements();
3698 for (ShuffleVectorInst *SVN : Shuffles) {
3699 SmallVector<int>
Mask;
3700 SVN->getShuffleMask(Mask);
3704 Value *SVOp0 = SVN->getOperand(0);
3705 Value *SVOp1 = SVN->getOperand(1);
3710 for (
int &Elem : Mask) {
3716 if (SVOp0 == Op1 && SVOp1 == Op0) {
3720 if (SVOp0 != Op0 || SVOp1 != Op1)
3726 SmallVector<int> ReconstructMask;
3727 for (
unsigned I = 0;
I <
Mask.size();
I++) {
3730 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
3731 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
3732 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
3733 return Mask[
I] ==
A.first;
3742 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
3743 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
3744 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
3758 sort(ReconstructMask);
3759 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
3767 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
3768 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
3780 if (InputShuffles.contains(SSV))
3782 return SV->getMaskValue(M);
3790 std::pair<int, int>
Y) {
3791 int MXA = GetBaseMaskValue(
A,
X.first);
3792 int MYA = GetBaseMaskValue(
A,
Y.first);
3795 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3796 return SortBase(SVI0A,
A,
B);
3798 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3799 return SortBase(SVI1A,
A,
B);
3804 for (
const auto &Mask : OrigReconstructMasks) {
3805 SmallVector<int> ReconstructMask;
3806 for (
int M : Mask) {
3808 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
3809 assert(It !=
V.end() &&
"Expected all entries in Mask");
3810 return std::distance(
V.begin(), It);
3814 else if (M <
static_cast<int>(NumElts)) {
3815 ReconstructMask.
push_back(FindIndex(V1, M));
3817 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
3820 ReconstructMasks.
push_back(std::move(ReconstructMask));
3825 SmallVector<int> V1A, V1B, V2A, V2B;
3826 for (
unsigned I = 0;
I < V1.
size();
I++) {
3827 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
3828 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
3830 for (
unsigned I = 0;
I < V2.
size();
I++) {
3831 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
3832 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
3834 while (V1A.
size() < NumElts) {
3838 while (V2A.
size() < NumElts) {
3850 VT, VT, SV->getShuffleMask(),
CostKind);
3857 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
3858 unsigned MaxVectorSize =
3860 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
3861 if (MaxElementsInVector == 0)
3870 std::set<SmallVector<int, 4>> UniqueShuffles;
3875 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
3876 if (NumFullVectors < 2)
3877 return C + ShuffleCost;
3878 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
3879 unsigned NumUniqueGroups = 0;
3880 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
3883 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
3884 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
3885 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
3886 if (UniqueShuffles.insert(SubShuffle).second)
3887 NumUniqueGroups += 1;
3889 return C + ShuffleCost * NumUniqueGroups / NumGroups;
3895 SmallVector<int, 16>
Mask;
3896 SV->getShuffleMask(Mask);
3897 return AddShuffleMaskAdjustedCost(
C, Mask);
3900 auto AllShufflesHaveSameOperands =
3901 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
3902 if (InputShuffles.size() < 2)
3904 ShuffleVectorInst *FirstSV =
3911 std::next(InputShuffles.begin()), InputShuffles.end(),
3912 [&](Instruction *
I) {
3913 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
3914 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
3923 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
3925 if (AllShufflesHaveSameOperands(InputShuffles)) {
3926 UniqueShuffles.clear();
3927 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
3930 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
3936 FixedVectorType *Op0SmallVT =
3938 FixedVectorType *Op1SmallVT =
3943 UniqueShuffles.clear();
3944 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
3946 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
3948 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
3951 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
3953 <<
" vs CostAfter: " << CostAfter <<
"\n");
3954 if (CostBefore < CostAfter ||
3965 if (InputShuffles.contains(SSV))
3967 return SV->getOperand(
Op);
3971 GetShuffleOperand(SVI0A, 1), V1A);
3974 GetShuffleOperand(SVI0B, 1), V1B);
3977 GetShuffleOperand(SVI1A, 1), V2A);
3980 GetShuffleOperand(SVI1B, 1), V2B);
3985 I->copyIRFlags(Op0,
true);
3990 I->copyIRFlags(Op1,
true);
3992 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
3995 replaceValue(*Shuffles[S], *NSV,
false);
3998 Worklist.pushValue(NSV0A);
3999 Worklist.pushValue(NSV0B);
4000 Worklist.pushValue(NSV1A);
4001 Worklist.pushValue(NSV1B);
4011bool VectorCombine::shrinkType(Instruction &
I) {
4012 Value *ZExted, *OtherOperand;
4018 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
4022 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
4024 if (
I.getOpcode() == Instruction::LShr) {
4041 Instruction::ZExt, BigTy, SmallTy,
4042 TargetTransformInfo::CastContextHint::None,
CostKind);
4047 for (User *U : ZExtOperand->
users()) {
4054 ShrinkCost += ZExtCost;
4069 ShrinkCost += ZExtCost;
4076 Instruction::Trunc, SmallTy, BigTy,
4077 TargetTransformInfo::CastContextHint::None,
CostKind);
4082 if (ShrinkCost > CurrentCost)
4086 Value *Op0 = ZExted;
4089 if (
I.getOperand(0) == OtherOperand)
4096 replaceValue(
I, *NewZExtr);
4102bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
4103 Value *DstVec, *SrcVec;
4104 uint64_t ExtIdx, InsIdx;
4114 if (!DstVecTy || !SrcVecTy ||
4115 SrcVecTy->getElementType() != DstVecTy->getElementType())
4118 unsigned NumDstElts = DstVecTy->getNumElements();
4119 unsigned NumSrcElts = SrcVecTy->getNumElements();
4120 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
4127 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
4128 bool IsExtIdxInBounds = ExtIdx < NumDstElts;
4130 if (NeedDstSrcSwap) {
4132 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4135 Mask[InsIdx] = ExtIdx;
4139 std::iota(
Mask.begin(),
Mask.end(), 0);
4140 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4141 Mask[InsIdx] = NumDstElts;
4143 Mask[InsIdx] = ExtIdx + NumDstElts;
4156 SmallVector<int> ExtToVecMask;
4157 if (!NeedExpOrNarrow) {
4162 nullptr, {DstVec, SrcVec});
4168 if (IsExtIdxInBounds)
4169 ExtToVecMask[ExtIdx] = ExtIdx;
4171 ExtToVecMask[0] = ExtIdx;
4174 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
4178 if (!
Ext->hasOneUse())
4181 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
4182 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4185 if (OldCost < NewCost)
4188 if (NeedExpOrNarrow) {
4189 if (!NeedDstSrcSwap)
4202 replaceValue(
I, *Shuf);
4211bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
4212 const APInt *SplatVal0, *SplatVal1;
4222 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
4223 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
4232 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
4233 << *
I.getType() <<
" is too high.\n");
4237 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
4238 NewSplatVal <<= Width;
4239 NewSplatVal |= SplatVal0->
zext(Width * 2);
4241 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
4249bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
4251 if (!OldLoad || !OldLoad->isSimple())
4258 unsigned const OldNumElements = OldLoadTy->getNumElements();
4264 using IndexRange = std::pair<int, int>;
4265 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
4266 IndexRange OutputRange = IndexRange(OldNumElements, -1);
4267 for (llvm::Use &Use :
I.uses()) {
4269 User *Shuffle =
Use.getUser();
4274 return std::nullopt;
4281 for (
int Index : Mask) {
4282 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
4283 OutputRange.first = std::min(Index, OutputRange.first);
4284 OutputRange.second = std::max(Index, OutputRange.second);
4289 if (OutputRange.second < OutputRange.first)
4290 return std::nullopt;
4296 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
4297 unsigned const NewNumElements = Indices->second + 1u;
4301 if (NewNumElements < OldNumElements) {
4306 Type *ElemTy = OldLoadTy->getElementType();
4308 Value *PtrOp = OldLoad->getPointerOperand();
4311 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
4312 OldLoad->getPointerAddressSpace(),
CostKind);
4315 OldLoad->getPointerAddressSpace(),
CostKind);
4317 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
4319 unsigned const MaxIndex = NewNumElements * 2u;
4321 for (llvm::Use &Use :
I.uses()) {
4323 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
4329 for (
int Index : OldMask) {
4330 if (Index >=
static_cast<int>(MaxIndex))
4344 dbgs() <<
"Found a load used only by shufflevector instructions: "
4345 <<
I <<
"\n OldCost: " << OldCost
4346 <<
" vs NewCost: " << NewCost <<
"\n");
4348 if (OldCost < NewCost || !NewCost.
isValid())
4354 NewLoad->copyMetadata(
I);
4357 for (UseEntry &Use : NewUses) {
4358 ShuffleVectorInst *Shuffle =
Use.first;
4359 std::vector<int> &NewMask =
Use.second;
4366 replaceValue(*Shuffle, *NewShuffle,
false);
4379bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
4381 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
4385 ArrayRef<int> Mask0;
4386 ArrayRef<int> Mask1;
4399 auto const InputNumElements = InputVT->getNumElements();
4401 if (InputNumElements >= ResultVT->getNumElements())
4406 SmallVector<int, 16> NewMask;
4409 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
4410 if (
M0 >= 0 &&
M1 >= 0)
4412 else if (
M0 == -1 &&
M1 == -1)
4425 int MaskOffset = NewMask[0
u];
4426 unsigned Index = (InputNumElements - MaskOffset) % InputNumElements;
4429 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
4443 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4446 if (NewCost > OldCost)
4458 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
4460 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
4466 replaceValue(*Phi, *NewShuf1);
4472bool VectorCombine::run() {
4486 auto Opcode =
I.getOpcode();
4494 if (IsFixedVectorType) {
4496 case Instruction::InsertElement:
4497 if (vectorizeLoadInsert(
I))
4500 case Instruction::ShuffleVector:
4501 if (widenSubvectorLoad(
I))
4512 if (scalarizeOpOrCmp(
I))
4514 if (scalarizeLoadExtract(
I))
4516 if (scalarizeExtExtract(
I))
4518 if (scalarizeVPIntrinsic(
I))
4520 if (foldInterleaveIntrinsics(
I))
4524 if (Opcode == Instruction::Store)
4525 if (foldSingleElementStore(
I))
4529 if (TryEarlyFoldsOnly)
4536 if (IsFixedVectorType) {
4538 case Instruction::InsertElement:
4539 if (foldInsExtFNeg(
I))
4541 if (foldInsExtBinop(
I))
4543 if (foldInsExtVectorToShuffle(
I))
4546 case Instruction::ShuffleVector:
4547 if (foldPermuteOfBinops(
I))
4549 if (foldShuffleOfBinops(
I))
4551 if (foldShuffleOfSelects(
I))
4553 if (foldShuffleOfCastops(
I))
4555 if (foldShuffleOfShuffles(
I))
4557 if (foldShuffleOfIntrinsics(
I))
4559 if (foldSelectShuffle(
I))
4561 if (foldShuffleToIdentity(
I))
4564 case Instruction::Load:
4565 if (shrinkLoadForShuffles(
I))
4568 case Instruction::BitCast:
4569 if (foldBitcastShuffle(
I))
4572 case Instruction::And:
4573 case Instruction::Or:
4574 case Instruction::Xor:
4575 if (foldBitOpOfCastops(
I))
4577 if (foldBitOpOfCastConstant(
I))
4580 case Instruction::PHI:
4581 if (shrinkPhiOfShuffles(
I))
4591 case Instruction::Call:
4592 if (foldShuffleFromReductions(
I))
4594 if (foldCastFromReductions(
I))
4597 case Instruction::ExtractElement:
4598 if (foldShuffleChainsToReduce(
I))
4601 case Instruction::ICmp:
4602 case Instruction::FCmp:
4603 if (foldExtractExtract(
I))
4606 case Instruction::Or:
4607 if (foldConcatOfBoolMasks(
I))
4612 if (foldExtractExtract(
I))
4614 if (foldExtractedCmps(
I))
4616 if (foldBinopOfReductions(
I))
4625 bool MadeChange =
false;
4626 for (BasicBlock &BB :
F) {
4638 if (!
I->isDebugOrPseudoInst())
4639 MadeChange |= FoldInst(*
I);
4646 while (!Worklist.isEmpty()) {
4656 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.