43#define DEBUG_TYPE "vector-combine"
49STATISTIC(NumVecLoad,
"Number of vector loads formed");
50STATISTIC(NumVecCmp,
"Number of vector compares formed");
51STATISTIC(NumVecBO,
"Number of vector binops formed");
52STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
53STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
54STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
55STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
56STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
60 cl::desc(
"Disable all vector combine transforms"));
64 cl::desc(
"Disable binop extract to shuffle transforms"));
68 cl::desc(
"Max number of instructions to scan for vector combining."));
70static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
78 bool TryEarlyFoldsOnly)
80 DT(DT), AA(AA), AC(AC),
DL(
DL), CostKind(CostKind), SQ(*
DL),
81 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
98 bool TryEarlyFoldsOnly;
113 unsigned PreferredExtractIndex)
const;
117 unsigned PreferredExtractIndex);
144 bool foldSelectShuffle(
Instruction &
I,
bool FromReduction =
false);
154 if (
auto *NewI = dyn_cast<Instruction>(&New)) {
177 if (
auto *OpI = dyn_cast<Instruction>(
Op)) {
179 OpI,
nullptr,
nullptr, [&](
Value *V) {
180 if (
auto *
I = dyn_cast<Instruction>(V)) {
201 while (
auto *BitCast = dyn_cast<BitCastInst>(V))
202 V = BitCast->getOperand(0);
210 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
211 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
217 Type *ScalarTy = Load->getType()->getScalarType();
220 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
227bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
241 auto *
Load = dyn_cast<LoadInst>(
X);
245 Type *ScalarTy = Scalar->getType();
253 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
254 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
256 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
257 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
258 unsigned OffsetEltIndex = 0;
266 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
277 uint64_t ScalarSizeInBytes = ScalarSize / 8;
278 if (
Offset.urem(ScalarSizeInBytes) != 0)
282 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
283 if (OffsetEltIndex >= MinVecNumElts)
300 unsigned AS =
Load->getPointerAddressSpace();
318 auto *Ty = cast<FixedVectorType>(
I.getType());
319 unsigned OutputNumElts = Ty->getNumElements();
321 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
322 Mask[0] = OffsetEltIndex;
329 if (OldCost < NewCost || !NewCost.
isValid())
340 replaceValue(
I, *VecLd);
350 auto *Shuf = cast<ShuffleVectorInst>(&
I);
351 if (!Shuf->isIdentityWithPadding())
356 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
357 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
358 return M >= (int)(NumOpElts);
361 auto *
Load = dyn_cast<LoadInst>(Shuf->getOperand(
OpIndex));
368 auto *Ty = cast<FixedVectorType>(
I.getType());
369 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
370 assert(isa<PointerType>(SrcPtr->
getType()) &&
"Expected a pointer type");
377 unsigned AS =
Load->getPointerAddressSpace();
392 if (OldCost < NewCost || !NewCost.
isValid())
399 replaceValue(
I, *VecLd);
411 assert(Index0C && Index1C &&
"Expected constant extract indexes");
413 unsigned Index0 = Index0C->getZExtValue();
414 unsigned Index1 = Index1C->getZExtValue();
417 if (Index0 == Index1)
441 if (PreferredExtractIndex == Index0)
443 if (PreferredExtractIndex == Index1)
447 return Index0 > Index1 ? Ext0 : Ext1;
459 unsigned PreferredExtractIndex) {
462 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
464 unsigned Opcode =
I.getOpcode();
468 auto *VecTy = cast<VectorType>(Ext0Src->
getType());
477 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
478 "Expected a compare");
488 unsigned Ext0Index = Ext0IndexC->getZExtValue();
489 unsigned Ext1Index = Ext1IndexC->getZExtValue();
503 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
504 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
505 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
510 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
515 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
517 OldCost = CheapExtractCost + ScalarOpCost;
518 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
522 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
523 NewCost = VectorOpCost + CheapExtractCost +
528 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
529 if (ConvertToShuffle) {
540 if (
auto *FixedVecTy = dyn_cast<FixedVectorType>(VecTy)) {
543 ShuffleMask[BestInsIndex] = BestExtIndex;
545 VecTy, VecTy, ShuffleMask,
CostKind, 0,
546 nullptr, {ConvertToShuffle});
549 VecTy, VecTy, {},
CostKind, 0,
nullptr,
557 return OldCost < NewCost;
567 auto *VecTy = cast<FixedVectorType>(Vec->
getType());
569 ShufMask[NewIndex] = OldIndex;
581 if (!isa<FixedVectorType>(
X->getType()))
587 assert(isa<ConstantInt>(
C) &&
"Expected a constant index operand");
588 if (isa<Constant>(
X))
601 assert(isa<CmpInst>(&
I) &&
"Expected a compare");
616 assert(isa<BinaryOperator>(&
I) &&
"Expected a binary operator");
622 V1,
"foldExtExtBinop");
626 if (
auto *VecBOInst = dyn_cast<Instruction>(VecBO))
627 VecBOInst->copyIRFlags(&
I);
657 auto *Ext0 = cast<ExtractElementInst>(I0);
658 auto *Ext1 = cast<ExtractElementInst>(I1);
665 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
671 if (ExtractToChange) {
672 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
677 if (ExtractToChange == Ext0)
686 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
687 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
690 replaceValue(
I, *NewExt);
713 auto *VecTy = cast<FixedVectorType>(
I.getType());
715 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->
getType());
716 if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType())
720 unsigned NumElts = VecTy->getNumElements();
721 if (Index >= NumElts)
728 std::iota(
Mask.begin(),
Mask.end(), 0);
745 bool NeedLenChg = SrcVecTy->getNumElements() != NumElts;
753 VecTy, SrcVecTy, SrcMask,
CostKind);
756 if (NewCost > OldCost)
771 replaceValue(
I, *NewShuf);
790 auto *ResultTy = dyn_cast<FixedVectorType>(
I.getType());
810 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
812 if (NewCost > OldCost)
822 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
823 NewInst->copyIRFlags(VecBinOp);
824 NewInst->andIRFlags(SclBinOp);
829 replaceValue(
I, *NewBO);
837 auto *BinOp = dyn_cast<BinaryOperator>(&
I);
838 if (!BinOp || !BinOp->isBitwiseLogicOp())
842 auto *LHSCast = dyn_cast<CastInst>(BinOp->getOperand(0));
843 auto *RHSCast = dyn_cast<CastInst>(BinOp->getOperand(1));
844 if (!LHSCast || !RHSCast) {
845 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
851 if (CastOpcode != RHSCast->getOpcode())
855 switch (CastOpcode) {
856 case Instruction::BitCast:
857 case Instruction::Trunc:
858 case Instruction::SExt:
859 case Instruction::ZExt:
865 Value *LHSSrc = LHSCast->getOperand(0);
866 Value *RHSSrc = RHSCast->getOperand(0);
873 auto *SrcVecTy = dyn_cast<FixedVectorType>(LHSSrc->
getType());
874 auto *DstVecTy = dyn_cast<FixedVectorType>(
I.getType());
875 if (!SrcVecTy || !DstVecTy)
878 if (!SrcVecTy->getScalarType()->isIntegerTy() ||
879 !DstVecTy->getScalarType()->isIntegerTy())
896 LHSCastCost + RHSCastCost;
907 if (!LHSCast->hasOneUse())
908 NewCost += LHSCastCost;
909 if (!RHSCast->hasOneUse())
910 NewCost += RHSCastCost;
913 <<
" NewCost=" << NewCost <<
"\n");
915 if (NewCost > OldCost)
920 BinOp->getName() +
".inner");
921 if (
auto *NewBinOp = dyn_cast<BinaryOperator>(NewOp))
922 NewBinOp->copyIRFlags(BinOp);
936 replaceValue(
I, *Result);
955 auto *DestTy = dyn_cast<FixedVectorType>(
I.getType());
956 auto *SrcTy = dyn_cast<FixedVectorType>(V0->
getType());
957 if (!DestTy || !SrcTy)
960 unsigned DestEltSize = DestTy->getScalarSizeInBits();
961 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
962 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
965 bool IsUnary = isa<UndefValue>(V1);
972 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
973 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
978 if (DestEltSize <= SrcEltSize) {
981 assert(SrcEltSize % DestEltSize == 0 &&
"Unexpected shuffle mask");
982 unsigned ScaleFactor = SrcEltSize / DestEltSize;
987 assert(DestEltSize % SrcEltSize == 0 &&
"Unexpected shuffle mask");
988 unsigned ScaleFactor = DestEltSize / SrcEltSize;
995 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1000 unsigned NumOps = IsUnary ? 1 : 2;
1010 TargetTransformInfo::CastContextHint::None,
1015 TargetTransformInfo::CastContextHint::None,
1018 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1019 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1021 if (NewCost > OldCost || !NewCost.
isValid())
1029 replaceValue(
I, *Shuf);
1036bool VectorCombine::scalarizeVPIntrinsic(
Instruction &
I) {
1037 if (!isa<VPIntrinsic>(
I))
1050 if (!ScalarOp0 || !ScalarOp1)
1058 auto IsAllTrueMask = [](
Value *MaskVal) {
1060 if (
auto *ConstValue = dyn_cast<Constant>(SplattedVal))
1061 return ConstValue->isAllOnesValue();
1076 if (
auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
1077 Mask.resize(FVTy->getNumElements(), 0);
1086 Args.push_back(
V->getType());
1092 std::optional<unsigned> FunctionalOpcode =
1094 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1095 if (!FunctionalOpcode) {
1114 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1116 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1119 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1122 if (OldCost < NewCost || !NewCost.
isValid())
1133 bool SafeToSpeculate;
1139 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1140 if (!SafeToSpeculate &&
1147 {ScalarOp0, ScalarOp1})
1149 ScalarOp0, ScalarOp1);
1159 auto *UO = dyn_cast<UnaryOperator>(&
I);
1160 auto *BO = dyn_cast<BinaryOperator>(&
I);
1161 auto *CI = dyn_cast<CmpInst>(&
I);
1162 auto *
II = dyn_cast<IntrinsicInst>(&
I);
1163 if (!UO && !BO && !CI && !
II)
1171 if (Arg->getType() !=
II->getType() &&
1181 for (
User *U :
I.users())
1188 std::optional<uint64_t>
Index;
1190 auto Ops =
II ?
II->args() :
I.operands();
1199 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1205 else if (InsIdx != *Index)
1222 if (!
Index.has_value())
1225 VectorType *VecTy = cast<VectorType>(
I.getType());
1226 Type *ScalarTy = VecTy->getScalarType();
1227 assert(VecTy->isVectorTy() &&
1230 "Unexpected types for insert element into binop or cmp");
1232 unsigned Opcode =
I.getOpcode();
1240 }
else if (UO || BO) {
1245 II->getIntrinsicID(), ScalarTy,
1249 II->getIntrinsicID(), VecTy,
1256 Value *NewVecC =
nullptr;
1258 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1261 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1263 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1277 for (
auto [
Idx,
Op, VecC, Scalar] :
enumerate(Ops, VecCs, ScalarOps)) {
1282 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1283 OldCost += InsertCost;
1284 NewCost += !
Op->hasOneUse() * InsertCost;
1288 if (OldCost < NewCost || !NewCost.
isValid())
1298 ++NumScalarIntrinsic;
1304 cast<Constant>(VecC), Builder.
getInt64(*Index));
1308 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1314 Scalar->setName(
I.getName() +
".scalar");
1318 if (
auto *ScalarInst = dyn_cast<Instruction>(Scalar))
1319 ScalarInst->copyIRFlags(&
I);
1322 replaceValue(
I, *Insert);
1330 auto *BI = dyn_cast<BinaryOperator>(&
I);
1334 if (!BI || !
I.getType()->isIntegerTy(1))
1339 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1359 auto *Ext0 = cast<ExtractElementInst>(I0);
1360 auto *Ext1 = cast<ExtractElementInst>(I1);
1364 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1365 "Unknown ExtractElementInst");
1370 unsigned CmpOpcode =
1372 auto *VecTy = dyn_cast<FixedVectorType>(
X->getType());
1385 Ext0Cost + Ext1Cost + CmpCost * 2 +
1391 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1392 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1397 ShufMask[CheapIndex] = ExpensiveIndex;
1402 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1403 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1408 if (OldCost < NewCost || !NewCost.
isValid())
1418 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1419 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1422 replaceValue(
I, *NewExt);
1433 auto *RedOp = dyn_cast<Instruction>(
II.getOperand(0));
1434 auto *VecRedTy = cast<VectorType>(
II.getOperand(0)->getType());
1435 unsigned ReductionOpc =
1438 bool IsUnsigned = isa<ZExtInst>(RedOp);
1439 auto *ExtType = cast<VectorType>(RedOp->getOperand(0)->getType());
1441 CostBeforeReduction =
1444 CostAfterReduction =
1449 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1455 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1457 bool IsUnsigned = isa<ZExtInst>(Op0);
1470 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1471 CostAfterReduction =
1479bool VectorCombine::foldBinopOfReductions(
Instruction &
I) {
1482 if (BinOpOpc == Instruction::Sub)
1483 ReductionIID = Intrinsic::vector_reduce_add;
1487 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1489 auto *
II = dyn_cast<IntrinsicInst>(V);
1492 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1493 return II->getArgOperand(0);
1497 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1500 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1504 auto *VTy = cast<VectorType>(V0->
getType());
1507 const auto &II0 = *cast<IntrinsicInst>(
I.getOperand(0));
1508 const auto &II1 = *cast<IntrinsicInst>(
I.getOperand(1));
1509 unsigned ReductionOpc =
1522 CostOfRedOperand0 + CostOfRedOperand1 +
1525 if (NewCost >= OldCost || !NewCost.
isValid())
1529 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1532 if (BinOpOpc == Instruction::Or)
1533 VectorBO = Builder.
CreateOr(V0, V1,
"",
1534 cast<PossiblyDisjointInst>(
I).isDisjoint());
1539 replaceValue(
I, *Rdx);
1547 unsigned NumScanned = 0;
1557class ScalarizationResult {
1558 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1563 ScalarizationResult(StatusTy
Status,
Value *ToFreeze =
nullptr)
1567 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1568 ~ScalarizationResult() {
1569 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1572 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1573 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1574 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1575 return {StatusTy::SafeWithFreeze, ToFreeze};
1579 bool isSafe()
const {
return Status == StatusTy::Safe; }
1581 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1584 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1589 Status = StatusTy::Unsafe;
1594 assert(isSafeWithFreeze() &&
1595 "should only be used when freezing is required");
1597 "UserI must be a user of ToFreeze");
1603 if (
U.get() == ToFreeze)
1620 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1621 unsigned IntWidth =
Idx->getType()->getScalarSizeInBits();
1623 if (
auto *
C = dyn_cast<ConstantInt>(
Idx)) {
1624 if (
C->getValue().ult(NumElements))
1625 return ScalarizationResult::safe();
1626 return ScalarizationResult::unsafe();
1631 return ScalarizationResult::unsafe();
1633 APInt Zero(IntWidth, 0);
1634 APInt MaxElts(IntWidth, NumElements);
1640 true, &AC, CtxI, &DT)))
1641 return ScalarizationResult::safe();
1642 return ScalarizationResult::unsafe();
1655 if (ValidIndices.
contains(IdxRange))
1656 return ScalarizationResult::safeWithFreeze(IdxBase);
1657 return ScalarizationResult::unsafe();
1667 if (
auto *
C = dyn_cast<ConstantInt>(
Idx))
1669 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1681bool VectorCombine::foldSingleElementStore(
Instruction &
I) {
1684 auto *
SI = cast<StoreInst>(&
I);
1685 if (!
SI->isSimple() || !isa<VectorType>(
SI->getValueOperand()->getType()))
1693 if (!
match(
SI->getValueOperand(),
1698 if (
auto *Load = dyn_cast<LoadInst>(Source)) {
1699 auto VecTy = cast<VectorType>(
SI->getValueOperand()->getType());
1700 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1703 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1704 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1705 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1709 if (ScalarizableIdx.isUnsafe() ||
1716 Worklist.
push(Load);
1718 if (ScalarizableIdx.isSafeWithFreeze())
1719 ScalarizableIdx.freeze(Builder, *cast<Instruction>(
Idx));
1721 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1722 {ConstantInt::get(Idx->getType(), 0), Idx});
1729 replaceValue(
I, *NSI);
1738bool VectorCombine::scalarizeLoadExtract(
Instruction &
I) {
1746 auto *LI = cast<LoadInst>(&
I);
1747 auto *VecTy = cast<VectorType>(LI->getType());
1748 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
1753 LI->getPointerAddressSpace(),
CostKind);
1757 unsigned NumInstChecked = 0;
1761 for (
auto &Pair : NeedFreeze)
1762 Pair.second.discard();
1769 auto *UI = dyn_cast<ExtractElementInst>(U);
1770 if (!UI || UI->getParent() != LI->getParent())
1775 if (UI->use_empty())
1782 make_range(std::next(LI->getIterator()), UI->getIterator())) {
1789 LastCheckedInst = UI;
1794 if (ScalarIdx.isUnsafe())
1796 if (ScalarIdx.isSafeWithFreeze()) {
1798 ScalarIdx.discard();
1801 auto *
Index = dyn_cast<ConstantInt>(UI->getIndexOperand());
1804 Index ?
Index->getZExtValue() : -1);
1813 <<
"\n LoadExtractCost: " << OriginalCost
1814 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
1816 if (ScalarizedCost >= OriginalCost)
1823 Type *ElemType = VecTy->getElementType();
1827 auto *EI = cast<ExtractElementInst>(U);
1828 Value *
Idx = EI->getIndexOperand();
1831 auto It = NeedFreeze.
find(EI);
1832 if (It != NeedFreeze.
end())
1833 It->second.freeze(Builder, *cast<Instruction>(
Idx));
1838 auto *NewLoad = cast<LoadInst>(
1839 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
1841 Align ScalarOpAlignment =
1843 NewLoad->setAlignment(ScalarOpAlignment);
1845 if (
auto *ConstIdx = dyn_cast<ConstantInt>(
Idx)) {
1846 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
1847 AAMDNodes OldAAMD = LI->getAAMetadata();
1851 replaceValue(*EI, *NewLoad,
false);
1854 FailureGuard.release();
1858bool VectorCombine::scalarizeExtExtract(
Instruction &
I) {
1861 auto *
Ext = dyn_cast<ZExtInst>(&
I);
1868 auto *SrcTy = dyn_cast<FixedVectorType>(
Ext->getOperand(0)->getType());
1871 auto *DstTy = cast<FixedVectorType>(
Ext->getType());
1873 Type *ScalarDstTy = DstTy->getElementType();
1874 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
1880 unsigned ExtCnt = 0;
1881 bool ExtLane0 =
false;
1882 for (
User *U :
Ext->users()) {
1886 if (cast<Instruction>(U)->use_empty())
1896 Instruction::And, ScalarDstTy,
CostKind,
1899 (ExtCnt - ExtLane0) *
1901 Instruction::LShr, ScalarDstTy,
CostKind,
1904 if (ScalarCost > VectorCost)
1907 Value *ScalarV =
Ext->getOperand(0);
1914 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
1915 uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
1916 for (
User *U :
Ext->users()) {
1917 auto *Extract = cast<ExtractElementInst>(U);
1919 cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue();
1922 U->replaceAllUsesWith(
And);
1930bool VectorCombine::foldConcatOfBoolMasks(
Instruction &
I) {
1931 Type *Ty =
I.getType();
1936 if (
DL->isBigEndian())
1963 if (ShAmtX > ShAmtY) {
1971 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
1972 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
1974 auto *MaskTy = dyn_cast<FixedVectorType>(SrcX->
getType());
1977 MaskTy->getNumElements() != ShAmtDiff ||
1978 MaskTy->getNumElements() > (
BitWidth / 2))
1987 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2004 if (Ty != ConcatIntTy)
2010 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2011 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2014 if (NewCost > OldCost)
2024 if (Ty != ConcatIntTy) {
2034 replaceValue(
I, *Result);
2040bool VectorCombine::foldPermuteOfBinops(
Instruction &
I) {
2051 Value *Op00, *Op01, *Op10, *Op11;
2059 if (!Match0 && !Match1)
2068 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2069 auto *BinOpTy = dyn_cast<FixedVectorType>(BinOp->
getType());
2070 auto *Op0Ty = dyn_cast<FixedVectorType>(Op00->
getType());
2071 auto *Op1Ty = dyn_cast<FixedVectorType>(Op10->
getType());
2072 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2075 unsigned NumSrcElts = BinOpTy->getNumElements();
2079 if ((BinOp->
isIntDivRem() || !isa<PoisonValue>(
I.getOperand(1))) &&
2080 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2085 for (
int M : OuterMask) {
2086 if (M < 0 || M >= (
int)NumSrcElts) {
2090 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2091 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2095 unsigned NumOpElts = Op0Ty->getNumElements();
2096 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2097 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2099 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2100 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2107 BinOpTy, OuterMask,
CostKind, 0,
nullptr, {BinOp}, &
I);
2111 0,
nullptr, {Op00, Op01}, cast<Instruction>(BinOp->
getOperand(0)));
2115 0,
nullptr, {Op10, Op11}, cast<Instruction>(BinOp->
getOperand(1)));
2123 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2127 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2129 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2130 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2134 if (NewCost > OldCost)
2144 if (
auto *NewInst = dyn_cast<Instruction>(NewBO))
2145 NewInst->copyIRFlags(BinOp);
2149 replaceValue(
I, *NewBO);
2155bool VectorCombine::foldShuffleOfBinops(
Instruction &
I) {
2163 if (
LHS->getOpcode() !=
RHS->getOpcode())
2167 bool IsCommutative =
false;
2172 auto *BO = cast<BinaryOperator>(LHS);
2176 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2180 IsCommutative = cast<CmpInst>(LHS)->isCommutative();
2184 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2185 auto *BinResTy = dyn_cast<FixedVectorType>(
LHS->
getType());
2186 auto *BinOpTy = dyn_cast<FixedVectorType>(
X->getType());
2187 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2190 unsigned NumSrcElts = BinOpTy->getNumElements();
2193 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2196 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2197 if (M >= (
int)NumSrcElts)
2237 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2249 bool ReducedInstCount =
false;
2250 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2251 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2252 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2253 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2255 auto *ShuffleCmpTy =
2272 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2277 ReducedInstCount |= (isa<Constant>(
X) && isa<Constant>(Z)) ||
2278 (isa<Constant>(
Y) && isa<Constant>(W));
2279 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2286 cast<BinaryOperator>(LHS)->getOpcode(), Shuf0, Shuf1)
2287 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2290 if (
auto *NewInst = dyn_cast<Instruction>(NewBO)) {
2291 NewInst->copyIRFlags(LHS);
2292 NewInst->andIRFlags(RHS);
2297 replaceValue(
I, *NewBO);
2304bool VectorCombine::foldShuffleOfSelects(
Instruction &
I) {
2306 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2313 auto *C1VecTy = dyn_cast<FixedVectorType>(C1->
getType());
2314 auto *C2VecTy = dyn_cast<FixedVectorType>(C2->
getType());
2315 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2318 auto *SI0FOp = dyn_cast<FPMathOperator>(
I.getOperand(0));
2319 auto *SI1FOp = dyn_cast<FPMathOperator>(
I.getOperand(1));
2321 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2322 ((SI0FOp !=
nullptr) &&
2323 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2326 auto *SrcVecTy = cast<FixedVectorType>(
T1->getType());
2327 auto *DstVecTy = cast<FixedVectorType>(
I.getType());
2329 auto SelOp = Instruction::Select;
2336 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2340 Mask,
CostKind, 0,
nullptr, {C1, C2});
2345 auto *C1C2ShuffledVecTy = cast<FixedVectorType>(
2351 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2353 if (NewCost > OldCost)
2362 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2363 SI0FOp->getFastMathFlags());
2365 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2370 replaceValue(
I, *NewSel);
2376bool VectorCombine::foldShuffleOfCastops(
Instruction &
I) {
2382 auto *C0 = dyn_cast<CastInst>(V0);
2383 auto *C1 = dyn_cast<CastInst>(V1);
2388 if (C0->getSrcTy() != C1->getSrcTy())
2392 if (Opcode != C1->getOpcode()) {
2394 Opcode = Instruction::SExt;
2399 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2400 auto *CastDstTy = dyn_cast<FixedVectorType>(C0->getDestTy());
2401 auto *CastSrcTy = dyn_cast<FixedVectorType>(C0->getSrcTy());
2402 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2405 unsigned NumSrcElts = CastSrcTy->getNumElements();
2406 unsigned NumDstElts = CastDstTy->getNumElements();
2407 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2408 "Only bitcasts expected to alter src/dst element counts");
2412 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2413 (NumDstElts % NumSrcElts) != 0)
2417 if (NumSrcElts >= NumDstElts) {
2420 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2421 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2426 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2427 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2432 auto *NewShuffleDstTy =
2445 CastDstTy, OldMask,
CostKind, 0,
nullptr, {}, &
I);
2458 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2460 if (NewCost > OldCost)
2464 C1->getOperand(0), NewMask);
2468 if (
auto *NewInst = dyn_cast<Instruction>(Cast)) {
2469 NewInst->copyIRFlags(C0);
2470 NewInst->andIRFlags(C1);
2474 replaceValue(
I, *Cast);
2484bool VectorCombine::foldShuffleOfShuffles(
Instruction &
I) {
2486 Value *OuterV0, *OuterV1;
2492 Value *X0, *X1, *Y0, *Y1;
2497 if (!Match0 && !Match1)
2503 if (!Match1 && isa<PoisonValue>(OuterV1)) {
2507 InnerMask1 = PoisonMask1;
2511 X0 = Match0 ? X0 : OuterV0;
2512 Y0 = Match0 ? Y0 : OuterV0;
2513 X1 = Match1 ? X1 : OuterV1;
2514 Y1 = Match1 ? Y1 : OuterV1;
2515 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2516 auto *ShuffleSrcTy = dyn_cast<FixedVectorType>(X0->
getType());
2517 auto *ShuffleImmTy = dyn_cast<FixedVectorType>(OuterV0->
getType());
2518 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2522 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2523 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2529 Value *NewX =
nullptr, *NewY =
nullptr;
2530 for (
int &M : NewMask) {
2531 Value *Src =
nullptr;
2532 if (0 <= M && M < (
int)NumImmElts) {
2536 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2537 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2539 }
else if (M >= (
int)NumImmElts) {
2544 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2545 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2549 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
2550 if (isa<UndefValue>(Src)) {
2553 if (!isa<PoisonValue>(Src))
2558 if (!NewX || NewX == Src) {
2562 if (!NewY || NewY == Src) {
2578 replaceValue(
I, *NewX);
2595 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
2601 nullptr, {NewX, NewY});
2603 NewCost += InnerCost0;
2605 NewCost += InnerCost1;
2608 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2610 if (NewCost > OldCost)
2614 replaceValue(
I, *Shuf);
2620bool VectorCombine::foldShuffleOfIntrinsics(
Instruction &
I) {
2627 auto *II0 = dyn_cast<IntrinsicInst>(V0);
2628 auto *II1 = dyn_cast<IntrinsicInst>(V1);
2633 if (IID != II1->getIntrinsicID())
2636 auto *ShuffleDstTy = dyn_cast<FixedVectorType>(
I.getType());
2637 auto *II0Ty = dyn_cast<FixedVectorType>(II0->getType());
2638 if (!ShuffleDstTy || !II0Ty)
2644 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2646 II0->getArgOperand(
I) != II1->getArgOperand(
I))
2653 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
2657 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I) {
2659 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
2661 auto *VecTy = cast<FixedVectorType>(II0->getArgOperand(
I)->getType());
2663 ShuffleDstTy->getNumElements());
2673 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2676 if (NewCost > OldCost)
2680 for (
unsigned I = 0, E = II0->arg_size();
I != E; ++
I)
2685 II1->getArgOperand(
I), OldMask);
2692 if (
auto *NewInst = dyn_cast<Instruction>(NewIntrinsic)) {
2694 NewInst->andIRFlags(II1);
2697 replaceValue(
I, *NewIntrinsic);
2704 while (
auto *SV = dyn_cast<ShuffleVectorInst>(U->get())) {
2706 cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
2707 int M = SV->getMaskValue(Lane);
2710 if (
static_cast<unsigned>(M) < NumElts) {
2711 U = &SV->getOperandUse(0);
2714 U = &SV->getOperandUse(1);
2725 auto [U, Lane] = IL;
2738 auto *Ty = cast<FixedVectorType>(Item.
front().first->get()->getType());
2739 unsigned NumElts = Ty->getNumElements();
2740 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
2746 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
2752 unsigned NumSlices = Item.
size() / NumElts;
2757 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
2758 Use *SliceV = Item[Slice * NumElts].first;
2759 if (!SliceV || SliceV->get()->
getType() != Ty)
2761 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
2762 auto [V, Lane] = Item[Slice * NumElts + Elt];
2763 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
2776 auto [FrontU, FrontLane] = Item.
front();
2778 if (IdentityLeafs.
contains(FrontU)) {
2779 return FrontU->get();
2785 if (ConcatLeafs.
contains(FrontU)) {
2787 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
2789 for (
unsigned S = 0; S < Values.
size(); ++S)
2790 Values[S] = Item[S * NumElts].first->get();
2792 while (Values.
size() > 1) {
2795 std::iota(Mask.begin(), Mask.end(), 0);
2797 for (
unsigned S = 0; S < NewValues.
size(); ++S)
2805 auto *
I = cast<Instruction>(FrontU->get());
2806 auto *
II = dyn_cast<IntrinsicInst>(
I);
2807 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
2809 for (
unsigned Idx = 0;
Idx < NumOps;
Idx++) {
2816 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
2821 for (
const auto &Lane : Item)
2827 if (
auto *BI = dyn_cast<BinaryOperator>(
I)) {
2833 if (
auto *CI = dyn_cast<CmpInst>(
I)) {
2834 auto *
Value = Builder.
CreateCmp(CI->getPredicate(), Ops[0], Ops[1]);
2838 if (
auto *SI = dyn_cast<SelectInst>(
I)) {
2843 if (
auto *CI = dyn_cast<CastInst>(
I)) {
2853 assert(isa<UnaryInstruction>(
I) &&
"Unexpected instruction type in Generate");
2863bool VectorCombine::foldShuffleToIdentity(
Instruction &
I) {
2864 auto *Ty = dyn_cast<FixedVectorType>(
I.getType());
2865 if (!Ty ||
I.use_empty())
2869 for (
unsigned M = 0, E = Ty->getNumElements(); M < E; ++M)
2875 unsigned NumVisited = 0;
2877 while (!Worklist.
empty()) {
2882 auto [FrontU, FrontLane] = Item.
front();
2890 return X->getType() ==
Y->getType() &&
2895 if (FrontLane == 0 &&
2896 cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
2897 Ty->getNumElements() &&
2900 return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&
2901 E.value().second == (
int)E.index());
2903 IdentityLeafs.
insert(FrontU);
2907 if (
auto *
C = dyn_cast<Constant>(FrontU);
2908 C &&
C->getSplatValue() &&
2912 return !U || (isa<Constant>(
U->get()) &&
2913 cast<Constant>(
U->get())->getSplatValue() ==
2914 cast<Constant>(FrontV)->getSplatValue());
2916 SplatLeafs.
insert(FrontU);
2921 auto [FrontU, FrontLane] = Item.
front();
2922 auto [
U, Lane] = IL;
2923 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
2925 SplatLeafs.
insert(FrontU);
2931 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
2935 Value *
V = IL.first->get();
2936 if (
auto *
I = dyn_cast<Instruction>(V);
I && !
I->hasOneUser())
2940 if (
auto *CI = dyn_cast<CmpInst>(V))
2941 if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
2943 if (
auto *CI = dyn_cast<CastInst>(V))
2944 if (CI->getSrcTy()->getScalarType() !=
2945 cast<CastInst>(FrontV)->getSrcTy()->getScalarType())
2947 if (
auto *SI = dyn_cast<SelectInst>(V))
2948 if (!isa<VectorType>(
SI->getOperand(0)->getType()) ||
2949 SI->getOperand(0)->getType() !=
2950 cast<SelectInst>(FrontV)->getOperand(0)->getType())
2952 if (isa<CallInst>(V) && !isa<IntrinsicInst>(V))
2954 auto *
II = dyn_cast<IntrinsicInst>(V);
2955 return !
II || (isa<IntrinsicInst>(FrontV) &&
2956 II->getIntrinsicID() ==
2957 cast<IntrinsicInst>(FrontV)->getIntrinsicID() &&
2958 !
II->hasOperandBundles());
2962 if (isa<BinaryOperator, CmpInst>(FrontU)) {
2964 if (
auto *BO = dyn_cast<BinaryOperator>(FrontU);
2965 BO && BO->isIntDivRem())
2974 }
else if (
auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {
2976 auto *DstTy = dyn_cast<FixedVectorType>(BitCast->getDestTy());
2977 auto *SrcTy = dyn_cast<FixedVectorType>(BitCast->getSrcTy());
2978 if (DstTy && SrcTy &&
2979 SrcTy->getNumElements() == DstTy->getNumElements()) {
2983 }
else if (isa<SelectInst>(FrontU)) {
2988 }
else if (
auto *
II = dyn_cast<IntrinsicInst>(FrontU);
2990 !
II->hasOperandBundles()) {
2991 for (
unsigned Op = 0, E =
II->getNumOperands() - 1;
Op < E;
Op++) {
2997 return !U || (cast<Instruction>(
U->get())->getOperand(
Op) ==
2998 cast<Instruction>(FrontV)->getOperand(
Op));
3010 ConcatLeafs.
insert(FrontU);
3017 if (NumVisited <= 1)
3020 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3026 ConcatLeafs, Builder, &
TTI);
3027 replaceValue(
I, *V);
3034bool VectorCombine::foldShuffleFromReductions(
Instruction &
I) {
3035 auto *
II = dyn_cast<IntrinsicInst>(&
I);
3038 switch (
II->getIntrinsicID()) {
3039 case Intrinsic::vector_reduce_add:
3040 case Intrinsic::vector_reduce_mul:
3041 case Intrinsic::vector_reduce_and:
3042 case Intrinsic::vector_reduce_or:
3043 case Intrinsic::vector_reduce_xor:
3044 case Intrinsic::vector_reduce_smin:
3045 case Intrinsic::vector_reduce_smax:
3046 case Intrinsic::vector_reduce_umin:
3047 case Intrinsic::vector_reduce_umax:
3056 std::queue<Value *> Worklist;
3059 if (
auto *
Op = dyn_cast<Instruction>(
I.getOperand(0)))
3062 while (!Worklist.empty()) {
3063 Value *CV = Worklist.front();
3074 if (
auto *CI = dyn_cast<Instruction>(CV)) {
3075 if (CI->isBinaryOp()) {
3076 for (
auto *
Op : CI->operand_values())
3079 }
else if (
auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
3080 if (Shuffle && Shuffle != SV)
3097 for (
auto *V : Visited)
3098 for (
auto *U :
V->users())
3099 if (!Visited.contains(U) && U != &
I)
3103 dyn_cast<FixedVectorType>(
II->getOperand(0)->getType());
3108 if (!ShuffleInputType)
3116 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (
unsigned)
Y; });
3117 bool UsesSecondVec =
3118 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3125 ShuffleInputType, ConcatMask,
CostKind);
3127 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3129 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3131 bool MadeChanges =
false;
3132 if (NewCost < OldCost) {
3136 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3137 replaceValue(*Shuffle, *NewShuffle);
3143 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3189bool VectorCombine::foldShuffleChainsToReduce(
Instruction &
I) {
3191 std::queue<Value *> InstWorklist;
3195 std::optional<unsigned int> CommonCallOp = std::nullopt;
3196 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3198 bool IsFirstCallOrBinInst =
true;
3199 bool ShouldBeCallOrBinInst =
true;
3211 auto *FVT = dyn_cast<FixedVectorType>(VecOpEE->
getType());
3215 int64_t
VecSize = FVT->getNumElements();
3221 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3222 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3232 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3233 Cur = (Cur + 1) / 2, --Mask) {
3235 ExpectedParityMask |= (1ll <<
Mask);
3238 InstWorklist.push(VecOpEE);
3240 while (!InstWorklist.empty()) {
3241 Value *CI = InstWorklist.front();
3244 if (
auto *
II = dyn_cast<IntrinsicInst>(CI)) {
3245 if (!ShouldBeCallOrBinInst)
3248 if (!IsFirstCallOrBinInst &&
3249 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3254 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3256 IsFirstCallOrBinInst =
false;
3259 CommonCallOp =
II->getIntrinsicID();
3260 if (
II->getIntrinsicID() != *CommonCallOp)
3263 switch (
II->getIntrinsicID()) {
3264 case Intrinsic::umin:
3265 case Intrinsic::umax:
3266 case Intrinsic::smin:
3267 case Intrinsic::smax: {
3268 auto *Op0 =
II->getOperand(0);
3269 auto *Op1 =
II->getOperand(1);
3277 ShouldBeCallOrBinInst ^= 1;
3280 *CommonCallOp,
II->getType(),
3281 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
3286 if (!isa<ShuffleVectorInst>(PrevVecV[1]))
3288 InstWorklist.push(PrevVecV[1]);
3289 InstWorklist.push(PrevVecV[0]);
3290 }
else if (
auto *BinOp = dyn_cast<BinaryOperator>(CI)) {
3293 if (!ShouldBeCallOrBinInst)
3296 if (!IsFirstCallOrBinInst &&
3297 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3300 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3302 IsFirstCallOrBinInst =
false;
3310 switch (*CommonBinOp) {
3311 case BinaryOperator::Add:
3312 case BinaryOperator::Mul:
3313 case BinaryOperator::Or:
3314 case BinaryOperator::And:
3315 case BinaryOperator::Xor: {
3325 ShouldBeCallOrBinInst ^= 1;
3330 if (!isa<ShuffleVectorInst>(PrevVecV[1]))
3332 InstWorklist.push(PrevVecV[1]);
3333 InstWorklist.push(PrevVecV[0]);
3334 }
else if (
auto *SVInst = dyn_cast<ShuffleVectorInst>(CI)) {
3337 if (ShouldBeCallOrBinInst ||
3338 any_of(PrevVecV, [](
Value *VecV) {
return VecV ==
nullptr; }))
3341 if (SVInst != PrevVecV[1])
3350 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
3351 if (Mask < ShuffleMaskHalf &&
3352 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
3354 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
3359 ShuffleMaskHalf *= 2;
3360 ShuffleMaskHalf -= (ExpectedParityMask & 1);
3361 ExpectedParityMask >>= 1;
3364 SVInst->getType(), SVInst->getType(),
3368 if (!ExpectedParityMask && VisitedCnt == NumLevels)
3371 ShouldBeCallOrBinInst ^= 1;
3378 if (ShouldBeCallOrBinInst)
3381 assert(VecSize != -1 &&
"Expected Match for Vector Size");
3383 Value *FinalVecV = PrevVecV[0];
3387 auto *FinalVecVTy = cast<FixedVectorType>(FinalVecV->
getType());
3398 if (NewCost >= OrigCost)
3401 auto *ReducedResult =
3403 replaceValue(
I, *ReducedResult);
3412bool VectorCombine::foldCastFromReductions(
Instruction &
I) {
3413 auto *
II = dyn_cast<IntrinsicInst>(&
I);
3417 bool TruncOnly =
false;
3420 case Intrinsic::vector_reduce_add:
3421 case Intrinsic::vector_reduce_mul:
3424 case Intrinsic::vector_reduce_and:
3425 case Intrinsic::vector_reduce_or:
3426 case Intrinsic::vector_reduce_xor:
3433 Value *ReductionSrc =
I.getOperand(0);
3443 auto *SrcTy = cast<VectorType>(Src->getType());
3444 auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->
getType());
3445 Type *ResultTy =
I.getType();
3448 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
3451 cast<CastInst>(ReductionSrc));
3458 if (OldCost <= NewCost || !NewCost.
isValid())
3462 II->getIntrinsicID(), {Src});
3464 replaceValue(
I, *NewCast);
3473 constexpr unsigned MaxVisited = 32;
3476 bool FoundReduction =
false;
3479 while (!WorkList.
empty()) {
3481 for (
User *U :
I->users()) {
3482 auto *UI = cast<Instruction>(U);
3483 if (!UI || !Visited.
insert(UI).second)
3485 if (Visited.
size() > MaxVisited)
3487 if (
auto *
II = dyn_cast<IntrinsicInst>(UI)) {
3491 switch (
II->getIntrinsicID()) {
3492 case Intrinsic::vector_reduce_add:
3493 case Intrinsic::vector_reduce_mul:
3494 case Intrinsic::vector_reduce_and:
3495 case Intrinsic::vector_reduce_or:
3496 case Intrinsic::vector_reduce_xor:
3497 case Intrinsic::vector_reduce_smin:
3498 case Intrinsic::vector_reduce_smax:
3499 case Intrinsic::vector_reduce_umin:
3500 case Intrinsic::vector_reduce_umax:
3501 FoundReduction =
true;
3508 if (!isa<BinaryOperator>(UI) && !isa<ShuffleVectorInst>(UI))
3514 return FoundReduction;
3527bool VectorCombine::foldSelectShuffle(
Instruction &
I,
bool FromReduction) {
3528 auto *SVI = cast<ShuffleVectorInst>(&
I);
3529 auto *VT = cast<FixedVectorType>(
I.getType());
3530 auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
3531 auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
3532 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
3536 auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
3537 auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
3538 auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
3539 auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
3542 if (!
I ||
I->getOperand(0)->getType() != VT)
3545 return U != Op0 && U != Op1 &&
3546 !(isa<ShuffleVectorInst>(U) &&
3547 (InputShuffles.contains(cast<Instruction>(U)) ||
3548 isInstructionTriviallyDead(cast<Instruction>(U))));
3551 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
3552 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
3560 for (
auto *U :
I->users()) {
3561 auto *SV = dyn_cast<ShuffleVectorInst>(U);
3562 if (!SV || SV->getType() != VT)
3564 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
3565 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
3572 if (!collectShuffles(Op0) || !collectShuffles(Op1))
3576 if (FromReduction && Shuffles.
size() > 1)
3581 if (!FromReduction) {
3583 for (
auto *U : SV->users()) {
3586 Shuffles.push_back(SSV);
3598 int MaxV1Elt = 0, MaxV2Elt = 0;
3599 unsigned NumElts = VT->getNumElements();
3602 SVN->getShuffleMask(Mask);
3606 Value *SVOp0 = SVN->getOperand(0);
3607 Value *SVOp1 = SVN->getOperand(1);
3608 if (isa<UndefValue>(SVOp1)) {
3609 auto *SSV = cast<ShuffleVectorInst>(SVOp0);
3612 for (
int &Elem : Mask) {
3618 if (SVOp0 == Op1 && SVOp1 == Op0) {
3622 if (SVOp0 != Op0 || SVOp1 != Op1)
3629 for (
unsigned I = 0;
I <
Mask.size();
I++) {
3632 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
3633 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
3634 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
3635 return Mask[
I] ==
A.first;
3644 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
3645 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
3646 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
3660 sort(ReconstructMask);
3661 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
3669 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
3670 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
3677 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
3680 if (isa<UndefValue>(SV->getOperand(1)))
3681 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
3682 if (InputShuffles.contains(SSV))
3684 return SV->getMaskValue(M);
3692 std::pair<int, int>
Y) {
3693 int MXA = GetBaseMaskValue(
A,
X.first);
3694 int MYA = GetBaseMaskValue(
A,
Y.first);
3697 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3698 return SortBase(SVI0A,
A,
B);
3700 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
3701 return SortBase(SVI1A,
A,
B);
3706 for (
const auto &Mask : OrigReconstructMasks) {
3708 for (
int M : Mask) {
3710 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
3711 assert(It !=
V.end() &&
"Expected all entries in Mask");
3712 return std::distance(
V.begin(), It);
3716 else if (M <
static_cast<int>(NumElts)) {
3717 ReconstructMask.
push_back(FindIndex(V1, M));
3719 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
3722 ReconstructMasks.push_back(std::move(ReconstructMask));
3728 for (
unsigned I = 0;
I < V1.
size();
I++) {
3729 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
3730 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
3732 for (
unsigned I = 0;
I < V2.
size();
I++) {
3733 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
3734 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
3736 while (V1A.
size() < NumElts) {
3740 while (V2A.
size() < NumElts) {
3746 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
3752 VT, VT, SV->getShuffleMask(),
CostKind);
3759 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
3760 unsigned MaxVectorSize =
3762 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
3770 std::set<SmallVector<int, 4>> UniqueShuffles;
3775 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
3776 if (NumFullVectors < 2)
3777 return C + ShuffleCost;
3779 unsigned NumUniqueGroups = 0;
3780 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
3783 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
3784 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
3785 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
3786 if (UniqueShuffles.insert(SubShuffle).second)
3787 NumUniqueGroups += 1;
3789 return C + ShuffleCost * NumUniqueGroups / NumGroups;
3792 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
3796 SV->getShuffleMask(Mask);
3797 return AddShuffleMaskAdjustedCost(
C, Mask);
3800 auto AllShufflesHaveSameOperands =
3802 if (InputShuffles.size() < 2)
3805 dyn_cast<ShuffleVectorInst>(*InputShuffles.begin());
3811 std::next(InputShuffles.begin()), InputShuffles.end(),
3813 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
3814 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
3823 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
3825 if (AllShufflesHaveSameOperands(InputShuffles)) {
3826 UniqueShuffles.clear();
3827 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
3830 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
3843 UniqueShuffles.clear();
3844 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
3846 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
3848 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
3851 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
3853 <<
" vs CostAfter: " << CostAfter <<
"\n");
3854 if (CostBefore < CostAfter ||
3860 auto *SV = dyn_cast<ShuffleVectorInst>(
I);
3863 if (isa<UndefValue>(SV->getOperand(1)))
3864 if (
auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
3865 if (InputShuffles.contains(SSV))
3867 return SV->getOperand(
Op);
3871 GetShuffleOperand(SVI0A, 1), V1A);
3874 GetShuffleOperand(SVI0B, 1), V1B);
3877 GetShuffleOperand(SVI1A, 1), V2A);
3880 GetShuffleOperand(SVI1B, 1), V2B);
3884 if (
auto *
I = dyn_cast<Instruction>(NOp0))
3885 I->copyIRFlags(Op0,
true);
3889 if (
auto *
I = dyn_cast<Instruction>(NOp1))
3890 I->copyIRFlags(Op1,
true);
3892 for (
int S = 0, E = ReconstructMasks.size(); S != E; S++) {
3895 replaceValue(*Shuffles[S], *NSV,
false);
3898 Worklist.pushValue(NSV0A);
3899 Worklist.pushValue(NSV0B);
3900 Worklist.pushValue(NSV1A);
3901 Worklist.pushValue(NSV1B);
3912 Value *ZExted, *OtherOperand;
3918 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
3920 auto *BigTy = cast<FixedVectorType>(
I.getType());
3921 auto *SmallTy = cast<FixedVectorType>(ZExted->
getType());
3922 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
3924 if (
I.getOpcode() == Instruction::LShr) {
3941 Instruction::ZExt, BigTy, SmallTy,
3942 TargetTransformInfo::CastContextHint::None,
CostKind);
3948 auto *UI = cast<Instruction>(U);
3954 ShrinkCost += ZExtCost;
3969 ShrinkCost += ZExtCost;
3974 if (!isa<Constant>(OtherOperand))
3976 Instruction::Trunc, SmallTy, BigTy,
3977 TargetTransformInfo::CastContextHint::None,
CostKind);
3982 if (ShrinkCost > CurrentCost)
3986 Value *Op0 = ZExted;
3989 if (
I.getOperand(0) == OtherOperand)
3993 cast<Instruction>(NewBinOp)->copyIRFlags(&
I);
3994 cast<Instruction>(NewBinOp)->copyMetadata(
I);
3996 replaceValue(
I, *NewZExtr);
4002bool VectorCombine::foldInsExtVectorToShuffle(
Instruction &
I) {
4003 Value *DstVec, *SrcVec;
4011 auto *DstVecTy = dyn_cast<FixedVectorType>(
I.getType());
4012 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->
getType());
4014 if (!DstVecTy || !SrcVecTy ||
4015 SrcVecTy->getElementType() != DstVecTy->getElementType())
4018 unsigned NumDstElts = DstVecTy->getNumElements();
4019 unsigned NumSrcElts = SrcVecTy->getNumElements();
4020 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
4027 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
4028 bool IsExtIdxInBounds = ExtIdx < NumDstElts;
4029 bool NeedDstSrcSwap = isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec);
4030 if (NeedDstSrcSwap) {
4032 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4035 Mask[InsIdx] = ExtIdx;
4039 std::iota(
Mask.begin(),
Mask.end(), 0);
4040 if (!IsExtIdxInBounds && NeedExpOrNarrow)
4041 Mask[InsIdx] = NumDstElts;
4043 Mask[InsIdx] = ExtIdx + NumDstElts;
4047 auto *
Ins = cast<InsertElementInst>(&
I);
4048 auto *
Ext = cast<ExtractElementInst>(
I.getOperand(1));
4057 if (!NeedExpOrNarrow) {
4062 nullptr, {DstVec, SrcVec});
4068 if (IsExtIdxInBounds)
4069 ExtToVecMask[ExtIdx] = ExtIdx;
4071 ExtToVecMask[0] = ExtIdx;
4074 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
4078 if (!
Ext->hasOneUse())
4081 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
4082 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4085 if (OldCost < NewCost)
4088 if (NeedExpOrNarrow) {
4089 if (!NeedDstSrcSwap)
4096 if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
4102 replaceValue(
I, *Shuf);
4111bool VectorCombine::foldInterleaveIntrinsics(
Instruction &
I) {
4112 const APInt *SplatVal0, *SplatVal1;
4113 if (!
match(&
I, m_Intrinsic<Intrinsic::vector_interleave2>(
4121 cast<VectorType>(cast<IntrinsicInst>(
I).getArgOperand(0)->
getType());
4122 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
4123 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
4132 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
4133 << *
I.getType() <<
" is too high.\n");
4137 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
4138 NewSplatVal <<= Width;
4139 NewSplatVal |= SplatVal0->
zext(Width * 2);
4141 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
4149bool VectorCombine::shrinkLoadForShuffles(
Instruction &
I) {
4150 auto *OldLoad = dyn_cast<LoadInst>(&
I);
4151 if (!OldLoad || !OldLoad->isSimple())
4154 auto *OldLoadTy = dyn_cast<FixedVectorType>(OldLoad->getType());
4158 unsigned const OldNumElements = OldLoadTy->getNumElements();
4164 using IndexRange = std::pair<int, int>;
4165 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
4166 IndexRange OutputRange = IndexRange(OldNumElements, -1);
4174 return std::nullopt;
4181 for (
int Index : Mask) {
4182 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
4183 OutputRange.first = std::min(Index, OutputRange.first);
4184 OutputRange.second = std::max(Index, OutputRange.second);
4189 if (OutputRange.second < OutputRange.first)
4190 return std::nullopt;
4196 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
4197 unsigned const NewNumElements = Indices->second + 1u;
4201 if (NewNumElements < OldNumElements) {
4206 Type *ElemTy = OldLoadTy->getElementType();
4208 Value *PtrOp = OldLoad->getPointerOperand();
4211 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
4212 OldLoad->getPointerAddressSpace(),
CostKind);
4215 OldLoad->getPointerAddressSpace(),
CostKind);
4217 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
4219 unsigned const MaxIndex = NewNumElements * 2u;
4222 auto *Shuffle = cast<ShuffleVectorInst>(
Use.
getUser());
4229 for (
int Index : OldMask) {
4230 if (Index >=
static_cast<int>(MaxIndex))
4244 dbgs() <<
"Found a load used only by shufflevector instructions: "
4245 <<
I <<
"\n OldCost: " << OldCost
4246 <<
" vs NewCost: " << NewCost <<
"\n");
4248 if (OldCost < NewCost || !NewCost.
isValid())
4252 auto *NewLoad = cast<LoadInst>(
4254 NewLoad->copyMetadata(
I);
4257 for (UseEntry &
Use : NewUses) {
4259 std::vector<int> &NewMask =
Use.second;
4266 replaceValue(*Shuffle, *NewShuffle,
false);
4279bool VectorCombine::shrinkPhiOfShuffles(
Instruction &
I) {
4280 auto *
Phi = dyn_cast<PHINode>(&
I);
4281 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
4294 auto *Shuf = cast<ShuffleVectorInst>(
Phi->getOperand(0u));
4297 auto *InputVT = cast<FixedVectorType>(
Op->getType());
4298 auto *ResultVT = cast<FixedVectorType>(Shuf->
getType());
4299 auto const InputNumElements = InputVT->getNumElements();
4301 if (InputNumElements >= ResultVT->getNumElements())
4309 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
4310 if (
M0 >= 0 &&
M1 >= 0)
4312 else if (
M0 == -1 &&
M1 == -1)
4325 int MaskOffset = NewMask[0
u];
4326 unsigned Index = (InputNumElements - MaskOffset) % InputNumElements;
4329 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
4343 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
4346 if (NewCost > OldCost)
4354 Worklist.push(cast<Instruction>(NewShuf0));
4358 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
4360 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
4366 replaceValue(*Phi, *NewShuf1);
4372bool VectorCombine::run() {
4384 bool IsVectorType = isa<VectorType>(
I.getType());
4385 bool IsFixedVectorType = isa<FixedVectorType>(
I.getType());
4386 auto Opcode =
I.getOpcode();
4394 if (IsFixedVectorType) {
4396 case Instruction::InsertElement:
4397 if (vectorizeLoadInsert(
I))
4400 case Instruction::ShuffleVector:
4401 if (widenSubvectorLoad(
I))
4412 if (scalarizeOpOrCmp(
I))
4414 if (scalarizeLoadExtract(
I))
4416 if (scalarizeExtExtract(
I))
4418 if (scalarizeVPIntrinsic(
I))
4420 if (foldInterleaveIntrinsics(
I))
4424 if (Opcode == Instruction::Store)
4425 if (foldSingleElementStore(
I))
4429 if (TryEarlyFoldsOnly)
4436 if (IsFixedVectorType) {
4438 case Instruction::InsertElement:
4439 if (foldInsExtFNeg(
I))
4441 if (foldInsExtBinop(
I))
4443 if (foldInsExtVectorToShuffle(
I))
4446 case Instruction::ShuffleVector:
4447 if (foldPermuteOfBinops(
I))
4449 if (foldShuffleOfBinops(
I))
4451 if (foldShuffleOfSelects(
I))
4453 if (foldShuffleOfCastops(
I))
4455 if (foldShuffleOfShuffles(
I))
4457 if (foldShuffleOfIntrinsics(
I))
4459 if (foldSelectShuffle(
I))
4461 if (foldShuffleToIdentity(
I))
4464 case Instruction::Load:
4465 if (shrinkLoadForShuffles(
I))
4468 case Instruction::BitCast:
4469 if (foldBitcastShuffle(
I))
4472 case Instruction::And:
4473 case Instruction::Or:
4474 case Instruction::Xor:
4475 if (foldBitOpOfCastops(
I))
4478 case Instruction::PHI:
4479 if (shrinkPhiOfShuffles(
I))
4489 case Instruction::Call:
4490 if (foldShuffleFromReductions(
I))
4492 if (foldCastFromReductions(
I))
4495 case Instruction::ExtractElement:
4496 if (foldShuffleChainsToReduce(
I))
4499 case Instruction::ICmp:
4500 case Instruction::FCmp:
4501 if (foldExtractExtract(
I))
4504 case Instruction::Or:
4505 if (foldConcatOfBoolMasks(
I))
4510 if (foldExtractExtract(
I))
4512 if (foldExtractedCmps(
I))
4514 if (foldBinopOfReductions(
I))
4523 bool MadeChange =
false;
4535 NextInst =
I->getNextNode();
4536 if (!
I->isDebugOrPseudoInst())
4537 MadeChange |= FoldInst(*
I);
4544 while (!Worklist.isEmpty()) {
4554 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This is the interface for a simple mod/ref and alias analysis over globals.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
FunctionAnalysisManager FAM
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Value * peekThroughBitcasts(Value *V)
Return the source operand of a potentially bitcasted value.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
A private abstract base class describing the concept of an individual alias analysis implementation.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM Basic Block Representation.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
void push(Instruction *I)
Push the instruction onto the worklist stack.
void remove(Instruction *I)
Remove I from the worklist if it exists.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
This class represents a sign extension of integer types.
This class represents a cast from signed integer to floating point.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This class represents a cast unsigned integer to floating point.
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
This is the common base class for vector predication intrinsics.
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
This class represents zero extension of integer types.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.