45 cl::desc(
"Enable use of wide get active lane mask instructions"));
50 GetIntOrFpInductionDescriptor,
54 Plan->getVectorLoopRegion());
57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
74 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
80 VPValue *Start = Plan->getOrAddLiveIn(
II->getStartValue());
84 Phi, Start, Step, &Plan->getVF(), *
II, Ingredient.getDebugLoc());
88 "only VPInstructions expected here");
93 *Load, Ingredient.getOperand(0),
nullptr ,
95 Ingredient.getDebugLoc());
98 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
99 nullptr ,
false ,
false ,
109 {Ingredient.op_begin(), Ingredient.op_end() - 1}, CI->getType(),
115 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
126 "Only recpies with zero or one defined values expected");
127 Ingredient.eraseFromParent();
146 for (
auto &Recipe : *VPBB) {
150 WorkList.
insert({VPBB, Def});
156 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
159 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
160 if (SinkCandidate->
getParent() == SinkTo ||
165 if (!ScalarVFOnly && RepR->isSingleScalar())
170 bool NeedsDuplicating =
false;
175 auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
176 SinkCandidate](
VPUser *U) {
178 if (UI->getParent() == SinkTo)
180 NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
183 return NeedsDuplicating &&
186 if (!
all_of(SinkCandidate->
users(), CanSinkWithUser))
189 if (NeedsDuplicating) {
193 if (
auto *SinkCandidateRepR =
199 nullptr , *SinkCandidateRepR);
202 Clone = SinkCandidate->
clone();
214 WorkList.
insert({SinkTo, Def});
224 if (!EntryBB || EntryBB->size() != 1 ||
234 if (EntryBB->getNumSuccessors() != 2)
239 if (!Succ0 || !Succ1)
242 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
244 if (Succ0->getSingleSuccessor() == Succ1)
246 if (Succ1->getSingleSuccessor() == Succ0)
263 if (!Region1->isReplicator())
265 auto *MiddleBasicBlock =
267 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
272 if (!Region2 || !Region2->isReplicator())
277 if (!Mask1 || Mask1 != Mask2)
280 assert(Mask1 && Mask2 &&
"both region must have conditions");
286 if (TransformedRegions.
contains(Region1))
293 if (!Then1 || !Then2)
313 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
319 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
320 Phi1ToMove.eraseFromParent();
323 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
337 TransformedRegions.
insert(Region1);
340 return !TransformedRegions.
empty();
347 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
348 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
349 auto *BlockInMask = PredRecipe->
getMask();
368 RecipeWithoutMask->getDebugLoc());
392 if (RepR->isPredicated())
411 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
425 if (!VPBB->getParent())
429 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
438 R.moveBefore(*PredVPBB, PredVPBB->
end());
440 auto *ParentRegion = VPBB->getParent();
441 if (ParentRegion && ParentRegion->getExiting() == VPBB)
442 ParentRegion->setExiting(PredVPBB);
443 for (
auto *Succ :
to_vector(VPBB->successors())) {
449 return !WorkList.
empty();
456 bool ShouldSimplify =
true;
457 while (ShouldSimplify) {
473 if (!
IV ||
IV->getTruncInst())
484 auto &Casts =
IV->getInductionDescriptor().getCastInsts();
488 for (
auto *U : FindMyCast->
users()) {
490 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
491 FoundUserCast = UserCast;
495 FindMyCast = FoundUserCast;
519 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
540 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
542 if (IsConditionalAssume)
545 if (R.mayHaveSideEffects())
549 return all_of(R.definedValues(),
550 [](
VPValue *V) { return V->getNumUsers() == 0; });
566 if (!PhiR || PhiR->getNumOperands() != 2 || PhiR->getNumUsers() != 1)
569 if (*PhiR->user_begin() !=
Incoming->getDefiningRecipe() ||
572 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
573 PhiR->eraseFromParent();
574 Incoming->getDefiningRecipe()->eraseFromParent();
588 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
598 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
604 if (ResultTy != StepTy) {
611 Builder.setInsertPoint(VecPreheader);
612 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
614 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
620 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
625 Users.insert_range(V->users());
627 return Users.takeVector();
661 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
662 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
670 Def->operands(),
true);
671 Clone->insertAfter(Def);
672 Def->replaceAllUsesWith(Clone);
684 VPValue *StepV = PtrIV->getOperand(1);
687 nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
689 VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
699 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
700 return U->usesScalars(WideIV);
706 Plan,
ID.getKind(),
ID.getInductionOpcode(),
708 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
709 WideIV->getDebugLoc(), Builder);
712 if (!HasOnlyVectorVFs)
713 WideIV->replaceAllUsesWith(Steps);
715 WideIV->replaceUsesWithIf(Steps, [WideIV](
VPUser &U,
unsigned) {
716 return U.usesScalars(WideIV);
731 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
736 if (!Def || Def->getNumOperands() != 2)
744 auto IsWideIVInc = [&]() {
745 auto &
ID = WideIV->getInductionDescriptor();
748 VPValue *IVStep = WideIV->getStepValue();
749 switch (
ID.getInductionOpcode()) {
750 case Instruction::Add:
752 case Instruction::FAdd:
755 case Instruction::FSub:
758 case Instruction::Sub: {
777 return IsWideIVInc() ? WideIV :
nullptr;
799 if (WideIntOrFp && WideIntOrFp->getTruncInst())
811 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
812 FirstActiveLaneType,
DL);
813 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, FirstActiveLane},
DL);
820 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
823 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
825 VPValue *Start = WideIV->getStartValue();
826 VPValue *Step = WideIV->getStepValue();
827 EndValue =
B.createDerivedIV(
829 Start, EndValue, Step);
849 assert(EndValue &&
"end value must have been pre-computed");
859 VPValue *Step = WideIV->getStepValue();
862 return B.createNaryOp(Instruction::Sub, {EndValue, Step}, {},
"ind.escape");
866 return B.createPtrAdd(EndValue,
867 B.createNaryOp(Instruction::Sub, {Zero, Step}),
871 const auto &
ID = WideIV->getInductionDescriptor();
872 return B.createNaryOp(
873 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
876 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
891 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
893 if (PredVPBB == MiddleVPBB)
895 ExitIRI->getOperand(Idx),
899 ExitIRI->getOperand(Idx), SE);
901 ExitIRI->setOperand(Idx, Escape);
918 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
921 ExpR->replaceAllUsesWith(V->second);
922 ExpR->eraseFromParent();
931 while (!WorkList.
empty()) {
933 if (!Seen.
insert(Cur).second)
940 WorkList.
append(R->op_begin(), R->op_end());
941 R->eraseFromParent();
953 if (!
Op->isLiveIn() || !
Op->getLiveInIRValue())
955 Ops.push_back(
Op->getLiveInIRValue());
967 return Folder.FoldSelect(
Ops[0],
Ops[1],
970 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
972 case Instruction::Select:
973 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
974 case Instruction::ICmp:
975 case Instruction::FCmp:
978 case Instruction::GetElementPtr: {
982 RFlags.getGEPNoWrapFlags());
991 case Instruction::ExtractElement:
1000 VPlan *Plan = R.getParent()->getPlan();
1019 .Default([](
auto *) {
return false; }))
1026 PredPHI->replaceAllUsesWith(
Op);
1033 if (TruncTy == ATy) {
1034 Def->replaceAllUsesWith(
A);
1043 : Instruction::ZExt;
1046 if (
auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
1048 VPC->setUnderlyingValue(UnderlyingExt);
1050 VPC->insertBefore(&R);
1051 Def->replaceAllUsesWith(VPC);
1054 VPC->insertBefore(&R);
1055 Def->replaceAllUsesWith(VPC);
1063 for (
VPUser *U :
A->users()) {
1065 for (
VPValue *VPV : R->definedValues())
1079 Def->replaceAllUsesWith(
X);
1080 Def->eraseFromParent();
1086 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1090 return Def->replaceAllUsesWith(
X);
1094 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) ==
X));
1098 return Def->replaceAllUsesWith(Def->getOperand(1));
1106 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1107 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1108 return Def->replaceAllUsesWith(
1109 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1113 return Def->replaceAllUsesWith(Plan->
getFalse());
1116 return Def->replaceAllUsesWith(
X);
1121 Def->setOperand(0,
C);
1122 Def->setOperand(1,
Y);
1123 Def->setOperand(2,
X);
1132 X->hasMoreThanOneUniqueUser())
1133 return Def->replaceAllUsesWith(
1134 Builder.createLogicalAnd(
X, Builder.createLogicalAnd(
Y, Z)));
1137 return Def->replaceAllUsesWith(
A);
1140 return Def->replaceAllUsesWith(R.getOperand(0) ==
A ? R.getOperand(1)
1145 return Def->replaceAllUsesWith(
A);
1160 R->setOperand(1,
Y);
1161 R->setOperand(2,
X);
1165 R->replaceAllUsesWith(Cmp);
1170 if (!Cmp->getDebugLoc() && R.getDebugLoc())
1171 Cmp->setDebugLoc(R.getDebugLoc());
1181 return Def->replaceAllUsesWith(Def->getOperand(1));
1187 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1188 Def->replaceAllUsesWith(
X);
1198 Def->setOperand(1, Def->getOperand(0));
1199 Def->setOperand(0,
Y);
1204 if (Phi->getOperand(0) == Phi->getOperand(1))
1205 Def->replaceAllUsesWith(Phi->getOperand(0));
1212 Def->replaceAllUsesWith(
1213 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1221 Def->replaceAllUsesWith(
1222 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1227 if (Phi->getNumOperands() == 1)
1228 Phi->replaceAllUsesWith(Phi->getOperand(0));
1239 if (VecPtr->isFirstPart()) {
1240 VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
1249 Steps->replaceAllUsesWith(Steps->getOperand(0));
1257 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1259 return PhiR && PhiR->isInLoop();
1265 Def->replaceAllUsesWith(
A);
1275 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1276 return Def->replaceAllUsesWith(
A);
1305 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1312 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1313 true ,
nullptr , *RepR );
1314 Clone->insertBefore(RepOrWidenR);
1316 {Clone->getOperand(0)});
1317 Ext->insertBefore(Clone);
1318 Clone->setOperand(0, Ext);
1319 RepR->eraseFromParent();
1327 !
all_of(RepOrWidenR->users(), [RepOrWidenR](
const VPUser *U) {
1328 return U->usesScalars(RepOrWidenR) ||
1329 match(cast<VPRecipeBase>(U),
1330 m_ExtractLastElement(m_VPValue()));
1335 RepOrWidenR->operands(),
1337 Clone->insertBefore(RepOrWidenR);
1338 RepOrWidenR->replaceAllUsesWith(Clone);
1374 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1375 UniqueValues.
insert(Blend->getIncomingValue(0));
1376 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1378 UniqueValues.
insert(Blend->getIncomingValue(
I));
1380 if (UniqueValues.
size() == 1) {
1381 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1382 Blend->eraseFromParent();
1386 if (Blend->isNormalized())
1392 unsigned StartIndex = 0;
1393 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1398 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1405 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1407 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1408 if (
I == StartIndex)
1410 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1411 OperandsWithMask.
push_back(Blend->getMask(
I));
1416 OperandsWithMask, Blend->getDebugLoc());
1417 NewBlend->insertBefore(&R);
1419 VPValue *DeadMask = Blend->getMask(StartIndex);
1421 Blend->eraseFromParent();
1426 if (NewBlend->getNumOperands() == 3 &&
1428 VPValue *Inc0 = NewBlend->getOperand(0);
1429 VPValue *Inc1 = NewBlend->getOperand(1);
1430 VPValue *OldMask = NewBlend->getOperand(2);
1431 NewBlend->setOperand(0, Inc1);
1432 NewBlend->setOperand(1, Inc0);
1433 NewBlend->setOperand(2, NewMask);
1463 APInt MaxVal = AlignedTC - 1;
1466 unsigned NewBitWidth =
1472 bool MadeChange =
false;
1481 if (!WideIV || !WideIV->isCanonical() ||
1482 WideIV->hasMoreThanOneUniqueUser() ||
1483 NewIVTy == WideIV->getScalarType())
1488 if (!
match(*WideIV->user_begin(),
1495 auto *NewStart = Plan.
getOrAddLiveIn(ConstantInt::get(NewIVTy, 0));
1496 WideIV->setStartValue(NewStart);
1497 auto *NewStep = Plan.
getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
1498 WideIV->setStepValue(NewStep);
1504 Cmp->setOperand(1, NewBTC);
1518 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1520 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
1533 const SCEV *VectorTripCount =
1538 "Trip count SCEV must be computable");
1558 auto *Term = &ExitingVPBB->
back();
1571 for (
unsigned Part = 0; Part < UF; ++Part) {
1578 Extracts[Part] = Ext;
1579 Ext->insertAfter(ALM);
1590 match(Phi->getBackedgeValue(),
1592 assert(Index &&
"Expected index from ActiveLaneMask instruction");
1605 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
1612 "Expected incoming values of Phi to be ActiveLaneMasks");
1618 EntryALM->setOperand(2, ALMMultiplier);
1619 LoopALM->setOperand(2, ALMMultiplier);
1623 ExtractFromALM(EntryALM, EntryExtracts);
1628 ExtractFromALM(LoopALM, LoopExtracts);
1630 Not->setOperand(0, LoopExtracts[0]);
1633 for (
unsigned Part = 0; Part < UF; ++Part) {
1634 Phis[Part]->setStartValue(EntryExtracts[Part]);
1635 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
1648 auto *Term = &ExitingVPBB->
back();
1657 const SCEV *TripCount =
1660 "Trip count SCEV must be computable");
1663 if (TripCount->
isZero() ||
1683 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
1684 return R->isCanonical();
1685 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
1686 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
1692 R->getScalarType());
1694 HeaderR.eraseFromParent();
1698 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
1699 HeaderR.eraseFromParent();
1708 B->setParent(
nullptr);
1717 Term->getDebugLoc());
1721 Term->eraseFromParent();
1729 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
1730 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
1738 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
1756 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
1759 if (SinkCandidate == Previous)
1763 !Seen.
insert(SinkCandidate).second ||
1767 if (SinkCandidate->mayHaveSideEffects())
1776 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
1779 "only recipes with a single defined value expected");
1794 if (SinkCandidate == FOR)
1797 SinkCandidate->moveAfter(Previous);
1798 Previous = SinkCandidate;
1816 for (
VPUser *U : FOR->users()) {
1822 [&VPDT, HoistPoint](
VPUser *U) {
1823 auto *R = cast<VPRecipeBase>(U);
1824 return HoistPoint == R ||
1825 VPDT.properlyDominates(HoistPoint, R);
1827 "HoistPoint must dominate all users of FOR");
1829 auto NeedsHoisting = [HoistPoint, &VPDT,
1831 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
1832 if (!HoistCandidate)
1838 "CFG in VPlan should still be flat, without replicate regions");
1840 if (!Visited.
insert(HoistCandidate).second)
1852 return HoistCandidate;
1857 return !HoistCandidate->mayHaveSideEffects();
1866 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
1869 "only recipes with a single defined value expected");
1870 if (!CanHoist(Current))
1881 if (
auto *R = NeedsHoisting(
Op))
1893 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
1913 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
1916 while (
auto *PrevPhi =
1918 assert(PrevPhi->getParent() == FOR->getParent());
1920 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
1938 {FOR, FOR->getBackedgeValue()});
1940 FOR->replaceAllUsesWith(RecurSplice);
1943 RecurSplice->setOperand(0, FOR);
1954 RecurKind RK = PhiR->getRecurrenceKind();
1961 RecWithFlags->dropPoisonGeneratingFlags();
1967struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
1969 return Def == getEmptyKey() || Def == getTombstoneKey();
1975 static std::optional<std::pair<bool, unsigned>>
1978 std::optional<std::pair<bool, unsigned>>>(R)
1981 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1982 .Case<VPWidenIntrinsicRecipe>([](
auto *
I) {
1983 return std::make_pair(
true,
I->getVectorIntrinsicID());
1985 .
Default([](
auto *) {
return std::nullopt; });
1990 static Type *getGEPSourceElementType(
const VPSingleDefRecipe *R) {
1993 return TypeSwitch<const VPSingleDefRecipe *, Type *>(R)
1994 .Case<VPReplicateRecipe>([](
auto *
I) ->
Type * {
1996 return GEP->getSourceElementType();
1999 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2000 [](
auto *
I) {
return I->getSourceElementType(); })
2001 .
Default([](
auto *) {
return nullptr; });
2005 static bool canHandle(
const VPSingleDefRecipe *Def) {
2011 ? std::make_pair(
false, Instruction::GetElementPtr)
2012 : getOpcodeOrIntrinsicID(
Def);
2017 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2018 C->second == Instruction::ExtractValue)))
2024 return !
Def->mayReadFromMemory();
2028 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2029 const VPlan *Plan =
Def->getParent()->getPlan();
2030 VPTypeAnalysis TypeInfo(*Plan);
2032 Def->getVPDefID(), getOpcodeOrIntrinsicID(Def),
2033 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2036 if (RFlags->hasPredicate())
2042 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2045 if (
L->getVPDefID() !=
R->getVPDefID() ||
2046 getOpcodeOrIntrinsicID(L) != getOpcodeOrIntrinsicID(R) ||
2047 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2049 !
equal(
L->operands(),
R->operands()))
2052 if (LFlags->hasPredicate() &&
2053 LFlags->getPredicate() !=
2056 const VPlan *Plan =
L->getParent()->getPlan();
2057 VPTypeAnalysis TypeInfo(*Plan);
2058 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2073 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2077 if (!VPDT.
dominates(V->getParent(), VPBB))
2082 Def->replaceAllUsesWith(V);
2100 return RepR && RepR->getOpcode() == Instruction::Alloca;
2110 if (CannotHoistRecipe(R))
2114 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||
2116 return !Op->isDefinedOutsideLoopRegions();
2119 R.moveBefore(*Preheader, Preheader->
end());
2143 VPValue *ResultVPV = R.getVPSingleValue();
2145 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2146 if (!NewResSizeInBits)
2159 (void)OldResSizeInBits;
2167 VPW->dropPoisonGeneratingFlags();
2169 if (OldResSizeInBits != NewResSizeInBits &&
2174 Ext->insertAfter(&R);
2176 Ext->setOperand(0, ResultVPV);
2177 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2180 "Only ICmps should not need extending the result.");
2189 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2190 auto *
Op = R.getOperand(Idx);
2191 unsigned OpSizeInBits =
2193 if (OpSizeInBits == NewResSizeInBits)
2195 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2196 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2201 : ProcessedIter->second;
2202 R.setOperand(Idx, NewOp);
2205 ProcessedIter->second = NewOp;
2206 if (!
Op->isLiveIn()) {
2225 assert(VPBB->getNumSuccessors() == 2 &&
2226 "Two successors expected for BranchOnCond");
2227 unsigned RemovedIdx;
2238 "There must be a single edge between VPBB and its successor");
2247 VPBB->back().eraseFromParent();
2308 VPValue *StartV = CanonicalIVPHI->getStartValue();
2310 auto *CanonicalIVIncrement =
2314 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2315 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2325 VPValue *TripCount, *IncrementValue;
2330 IncrementValue = CanonicalIVIncrement;
2336 IncrementValue = CanonicalIVPHI;
2340 auto *EntryIncrement = Builder.createOverflowingOp(
2348 {EntryIncrement, TC, ALMMultiplier},
DL,
2349 "active.lane.mask.entry");
2355 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2360 Builder.setInsertPoint(OriginalTerminator);
2361 auto *InLoopIncrement =
2363 {IncrementValue}, {
false,
false},
DL);
2365 {InLoopIncrement, TripCount, ALMMultiplier},
2366 DL,
"active.lane.mask.next");
2371 auto *NotMask = Builder.createNot(ALM,
DL);
2387 "Must have at most one VPWideCanonicalIVRecipe");
2389 auto *WideCanonicalIV =
2391 WideCanonicalIVs.
push_back(WideCanonicalIV);
2399 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2400 WideCanonicalIVs.
push_back(WidenOriginalIV);
2406 for (
auto *Wide : WideCanonicalIVs) {
2412 assert(VPI->getOperand(0) == Wide &&
2413 "WidenCanonicalIV must be the first operand of the compare");
2414 assert(!HeaderMask &&
"Multiple header masks found?");
2422 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2425 UseActiveLaneMaskForControlFlow) &&
2426 "DataAndControlFlowWithoutRuntimeCheck implies "
2427 "UseActiveLaneMaskForControlFlow");
2431 assert(FoundWidenCanonicalIVUser &&
2432 "Must have widened canonical IV when tail folding!");
2434 auto *WideCanonicalIV =
2437 if (UseActiveLaneMaskForControlFlow) {
2447 nullptr,
"active.lane.mask");
2473 assert(OrigMask &&
"Unmasked recipe when folding tail");
2478 return HeaderMask == OrigMask ? nullptr : OrigMask;
2482 auto GetNewAddr = [&CurRecipe, &EVL](
VPValue *Addr) ->
VPValue * {
2486 assert(EndPtr->getOperand(1) == &EndPtr->getParent()->getPlan()->getVF() &&
2487 "VPVectorEndPointerRecipe with non-VF VF operand?");
2491 return cast<VPWidenMemoryRecipe>(U)->isReverse();
2493 "VPVectorEndPointRecipe not used by reversed widened memory recipe?");
2502 VPValue *NewMask = GetNewMask(L->getMask());
2503 VPValue *NewAddr = GetNewAddr(L->getAddr());
2512 VPValue *NewMask = GetNewMask(
IR->getMask());
2516 VPValue *NewMask = GetNewMask(Red->getCondOp());
2531 Intrinsic::vp_merge, {&AllOneMask,
LHS,
RHS, &EVL},
2547 "User of VF that we can't transform to EVL.");
2554 return match(U, m_c_Add(m_Specific(Plan.getCanonicalIV()),
2555 m_Specific(&Plan.getVFxUF()))) ||
2556 isa<VPWidenPointerInductionRecipe>(U);
2558 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
2559 "increment of the canonical induction.");
2579 MaxEVL = Builder.createScalarZExtOrTrunc(
2583 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
2584 VPValue *PrevEVL = Builder.createScalarPhi(
2598 Intrinsic::experimental_vp_splice,
2599 {V1, V2, Imm, AllOneMask, PrevEVL, &EVL},
2602 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2620 VPValue *EVLMask = Builder.createICmp(
2638 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
2639 "New recipe must define the same number of values as the "
2644 for (
unsigned I = 0;
I < NumDefVal; ++
I) {
2645 VPValue *CurVPV = CurRecipe->getVPValue(
I);
2657 R->eraseFromParent();
2707 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
2714 VPValue *StartV = CanonicalIVPHI->getStartValue();
2718 EVLPhi->insertAfter(CanonicalIVPHI);
2719 VPBuilder Builder(Header, Header->getFirstNonPhi());
2722 VPPhi *AVLPhi = Builder.createScalarPhi(
2726 if (MaxSafeElements) {
2729 Plan.
getOrAddLiveIn(ConstantInt::get(CanIVTy, *MaxSafeElements));
2737 auto *CanonicalIVIncrement =
2739 Builder.setInsertPoint(CanonicalIVIncrement);
2743 OpVPEVL = Builder.createScalarZExtOrTrunc(
2744 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
2746 auto *NextEVLIV = Builder.createOverflowingOp(
2747 Instruction::Add, {OpVPEVL, EVLPhi},
2748 {CanonicalIVIncrement->hasNoUnsignedWrap(),
2749 CanonicalIVIncrement->hasNoSignedWrap()},
2750 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
2751 EVLPhi->addOperand(NextEVLIV);
2753 VPValue *NextAVL = Builder.createOverflowingOp(
2754 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
2762 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
2763 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
2777 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
2788 [[maybe_unused]]
bool FoundAVL =
2791 assert(FoundAVL &&
"Didn't find AVL?");
2799 [[maybe_unused]]
bool FoundAVLNext =
2802 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
2813 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
2816 "Unexpected canonical iv");
2822 CanonicalIV->eraseFromParent();
2835 match(LatchExitingBr,
2838 "Unexpected terminator in EVL loop");
2846 LatchExitingBr->eraseFromParent();
2856 return R->getParent()->getParent() ||
2860 for (
const SCEV *Stride : StridesMap.
values()) {
2863 const APInt *StrideConst;
2864 if (!
match(PSE.
getSCEV(StrideV), m_scev_APInt(StrideConst)))
2869 Plan.
getOrAddLiveIn(ConstantInt::get(Stride->getType(), *StrideConst));
2881 unsigned BW = U->getType()->getScalarSizeInBits();
2887 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
2894 const SCEV *ScevExpr = ExpSCEV->getSCEV();
2897 if (NewSCEV != ScevExpr) {
2899 ExpSCEV->replaceAllUsesWith(NewExp);
2908 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
2912 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
2917 while (!Worklist.
empty()) {
2920 if (!Visited.
insert(CurRec).second)
2942 RecWithFlags->isDisjoint()) {
2945 Instruction::Add, {
A,
B}, {
false,
false},
2946 RecWithFlags->getDebugLoc());
2947 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
2948 RecWithFlags->replaceAllUsesWith(New);
2949 RecWithFlags->eraseFromParent();
2952 RecWithFlags->dropPoisonGeneratingFlags();
2957 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
2958 "found instruction with poison generating flags not covered by "
2959 "VPRecipeWithIRFlags");
2964 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
2976 Instruction &UnderlyingInstr = WidenRec->getIngredient();
2977 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
2978 if (AddrDef && WidenRec->isConsecutive() &&
2979 BlockNeedsPredication(UnderlyingInstr.
getParent()))
2980 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
2982 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
2986 InterleaveRec->getInterleaveGroup();
2987 bool NeedPredication =
false;
2989 I < NumMembers; ++
I) {
2992 NeedPredication |= BlockNeedsPredication(Member->getParent());
2995 if (NeedPredication)
2996 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3008 if (InterleaveGroups.empty())
3016 for (
const auto *IG : InterleaveGroups) {
3022 StoredValues.
push_back(StoreR->getStoredValue());
3023 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3030 StoredValues.
push_back(StoreR->getStoredValue());
3034 bool NeedsMaskForGaps =
3035 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3036 (!StoredValues.
empty() && !IG->isFull());
3048 VPValue *Addr = Start->getAddr();
3057 assert(IG->getIndex(IRInsertPos) != 0 &&
3058 "index of insert position shouldn't be zero");
3062 IG->getIndex(IRInsertPos),
3067 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3073 if (IG->isReverse()) {
3076 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3077 ReversePtr->insertBefore(InsertPos);
3081 InsertPos->getMask(), NeedsMaskForGaps,
3082 InterleaveMD, InsertPos->getDebugLoc());
3083 VPIG->insertBefore(InsertPos);
3086 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3089 if (!Member->getType()->isVoidTy()) {
3150 AddOp = Instruction::Add;
3151 MulOp = Instruction::Mul;
3153 AddOp =
ID.getInductionOpcode();
3154 MulOp = Instruction::FMul;
3155 Flags =
ID.getInductionBinOp()->getFastMathFlags();
3163 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3164 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3173 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3178 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3179 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3185 WidePHI->addOperand(
Init);
3186 WidePHI->insertBefore(WidenIVR);
3197 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3201 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3204 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3207 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3214 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3217 WidePHI->addOperand(
Next);
3245 VPlan *Plan = R->getParent()->getPlan();
3246 VPValue *Start = R->getStartValue();
3247 VPValue *Step = R->getStepValue();
3248 VPValue *VF = R->getVFValue();
3250 assert(R->getInductionDescriptor().getKind() ==
3252 "Not a pointer induction according to InductionDescriptor!");
3255 "Recipe should have been replaced");
3261 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3265 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3268 Offset = Builder.createNaryOp(Instruction::Mul, {
Offset, Step});
3269 VPValue *PtrAdd = Builder.createNaryOp(
3271 R->replaceAllUsesWith(PtrAdd);
3276 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3278 VPValue *Inc = Builder.createNaryOp(Instruction::Mul, {Step, VF});
3281 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3290 if (!R->isReplicator())
3294 R->dissolveToCFGLoop();
3319 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3320 Select = Builder.createSelect(Blend->getMask(
I),
3321 Blend->getIncomingValue(
I),
Select,
3322 R.getDebugLoc(),
"predphi");
3323 Blend->replaceAllUsesWith(
Select);
3343 ? Instruction::UIToFP
3344 : Instruction::Trunc;
3345 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
3351 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
3356 Flags = {VPI->getFastMathFlags()};
3361 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3363 VPI->replaceAllUsesWith(VectorStep);
3369 R->eraseFromParent();
3382 "unsupported early exit VPBB");
3394 "Terminator must be be BranchOnCond");
3395 VPValue *CondOfEarlyExitingVPBB =
3397 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3398 ? CondOfEarlyExitingVPBB
3399 : Builder.createNot(CondOfEarlyExitingVPBB);
3416 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3421 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3422 if (ExitIRI->getNumOperands() != 1) {
3425 ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
3428 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
3429 if (!IncomingFromEarlyExit->
isLiveIn()) {
3433 "first.active.lane");
3436 nullptr,
"early.exit.value");
3437 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
3447 "Unexpected terminator");
3448 auto *IsLatchExitTaken =
3450 LatchExitingBranch->getOperand(1));
3451 auto *AnyExitTaken = Builder.createNaryOp(
3452 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
3454 LatchExitingBranch->eraseFromParent();
3464 Type *RedTy = Ctx.Types.inferScalarType(Red);
3465 VPValue *VecOp = Red->getVecOp();
3468 auto IsExtendedRedValidAndClampRange = [&](
unsigned Opcode,
bool isZExt,
3469 Type *SrcTy) ->
bool {
3475 Opcode, isZExt, RedTy, SrcVecTy, Red->getFastMathFlags(),
3480 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
3488 IsExtendedRedValidAndClampRange(
3491 Instruction::CastOps::ZExt,
3492 Ctx.Types.inferScalarType(
A)))
3510 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
3513 Type *RedTy = Ctx.Types.inferScalarType(Red);
3516 auto IsMulAccValidAndClampRange =
3523 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
3526 isZExt, Opcode, RedTy, SrcVecTy,
CostKind);
3531 ExtCost += Ext0->computeCost(VF, Ctx);
3533 ExtCost += Ext1->computeCost(VF, Ctx);
3535 ExtCost += OuterExt->computeCost(VF, Ctx);
3537 return MulAccCost.
isValid() &&
3538 MulAccCost < ExtCost + MulCost + RedCost;
3543 VPValue *VecOp = Red->getVecOp();
3562 if (RecipeA && RecipeB &&
3563 (RecipeA->getOpcode() == RecipeB->getOpcode() ||
A ==
B) &&
3566 IsMulAccValidAndClampRange(RecipeA->getOpcode() ==
3567 Instruction::CastOps::ZExt,
3568 Mul, RecipeA, RecipeB,
nullptr)) {
3577 IsMulAccValidAndClampRange(
true,
Mul,
nullptr,
nullptr,
nullptr))
3595 if ((Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
3596 Ext0->getOpcode() == Ext1->getOpcode() &&
3597 IsMulAccValidAndClampRange(Ext0->getOpcode() ==
3598 Instruction::CastOps::ZExt,
3599 Mul, Ext0, Ext1, Ext)) {
3601 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
3602 *Ext0, Ext0->getDebugLoc());
3603 NewExt0->insertBefore(Ext0);
3608 Ext->getResultType(), *Ext1, *Ext1,
3609 Ext1->getDebugLoc());
3612 Mul->setOperand(0, NewExt0);
3613 Mul->setOperand(1, NewExt1);
3614 Red->setOperand(1,
Mul);
3627 auto IP = std::next(Red->getIterator());
3628 auto *VPBB = Red->getParent();
3638 Red->replaceAllUsesWith(AbstractR);
3669 for (
VPValue *VPV : VPValues) {
3671 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
3679 if (
User->usesScalars(VPV))
3682 HoistPoint = HoistBlock->
begin();
3686 "All users must be in the vector preheader or dominated by it");
3691 VPV->replaceUsesWithIf(Broadcast,
3692 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
3693 return Broadcast != &U && !U.usesScalars(VPV);
3701 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
3702 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
3736 auto *TCMO = Builder.createNaryOp(
3765 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
3768 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
3775 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
3785 DefR->replaceUsesWithIf(
3786 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
3788 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
3797 bool RequiresScalarEpilogue) {
3799 assert(VectorTC.
isLiveIn() &&
"vector-trip-count must be a live-in");
3818 if (TailByMasking) {
3819 TC = Builder.createNaryOp(
3821 {TC, Builder.createNaryOp(
3833 Builder.createNaryOp(Instruction::URem, {TC, Step},
3842 if (RequiresScalarEpilogue) {
3844 "requiring scalar epilogue is not supported with fail folding");
3845 VPValue *IsZero = Builder.createICmp(
3847 R = Builder.createSelect(IsZero, Step, R);
3850 VPValue *Res = Builder.createNaryOp(
3869 Builder.createElementCount(TCTy, VFEC * Plan.
getUF());
3876 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
3880 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
3885 VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
3895 BasicBlock *EntryBB = Entry->getIRBasicBlock();
3903 const SCEV *Expr = ExpSCEV->getSCEV();
3906 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
3911 ExpSCEV->eraseFromParent();
3914 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
3915 "after any VPIRInstructions");
3918 auto EI = Entry->begin();
3928 return ExpandedSCEVs;
3948 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
3957 unsigned VectorRegWidth) {
3961 Type *GroupElementTy =
nullptr;
3965 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
3966 return TypeInfo.inferScalarType(Op) == GroupElementTy;
3973 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
3974 return TypeInfo.inferScalarType(Op) == GroupElementTy;
3981 return IG->getFactor() == VF && IG->getNumMembers() == VF &&
3982 GroupSize == VectorRegWidth;
3990 return RepR && RepR->isSingleScalar();
3994 unsigned VectorRegWidth) {
4020 if (R.mayWriteToMemory() && !InterleaveR)
4042 if (InterleaveR->getStoredValues().empty())
4047 auto *Member0 = InterleaveR->getStoredValues()[0];
4049 all_of(InterleaveR->getStoredValues(),
4050 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
4058 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
4061 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
4062 return IR && IR->getInterleaveGroup()->isFull() &&
4063 IR->getVPValue(Op.index()) == Op.value();
4072 InterleaveR->getStoredValues()[0]->getDefiningRecipe());
4075 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
4077 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
4078 R->getNumOperands() > 2)
4081 [WideMember0, Idx =
I](
const auto &
P) {
4082 const auto &[OpIdx, OpV] = P;
4083 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
4090 if (StoreGroups.
empty())
4096 auto *R = V->getDefiningRecipe();
4103 *
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()),
4104 LoadGroup->getAddr(), LoadGroup->getMask(),
true,
4105 false, {}, LoadGroup->getDebugLoc());
4106 L->insertBefore(LoadGroup);
4112 assert(RepR->isSingleScalar() &&
4114 "must be a single scalar load");
4115 NarrowedOps.
insert(RepR);
4120 VPValue *PtrOp = WideLoad->getAddr();
4122 PtrOp = VecPtr->getOperand(0);
4127 nullptr, *WideLoad);
4128 N->insertBefore(WideLoad);
4134 for (
auto *StoreGroup : StoreGroups) {
4136 VPValue *Member0 = StoreGroup->getStoredValues()[0];
4139 }
else if (
auto *WideMember0 =
4141 for (
unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx)
4142 WideMember0->setOperand(Idx, NarrowOp(WideMember0->getOperand(Idx)));
4145 Res = NarrowOp(Member0);
4150 StoreGroup->getAddr(), Res,
nullptr,
true,
4151 false, {}, StoreGroup->getDebugLoc());
4152 S->insertBefore(StoreGroup);
4153 StoreGroup->eraseFromParent();
4163 ConstantInt::get(CanIV->getScalarType(), 1 * Plan.
getUF()));
4171 Inc->setOperand(1, UF);
4173 Plan.
getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
4190 "must have a BranchOnCond");
4193 if (VF.
isScalable() && VScaleForTuning.has_value())
4194 VectorStep *= *VScaleForTuning;
4195 assert(VectorStep > 0 &&
"trip count should not be zero");
4199 MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
mir Rename Register Operands
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
size_t getNumPredecessors() const
const VPBlocksTy & getPredecessors() const
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createElementCount(Type *Ty, ElementCount EC)
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Canonical scalar induction phi of the vector loop.
Type * getScalarType() const
Returns the scalar type of the induction.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
const InterleaveGroup< Instruction > * getInterleaveGroup() const
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
operand_iterator op_end()
operand_iterator op_begin()
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
VPValue * getSplatVFValue()
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
VPValue * getMask() const
Return the mask used by this recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
VPValue & getVectorTripCount()
The vector trip count.
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getTrue()
Return a VPValue wrapping i1 true.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPValue * getFalse()
Return a VPValue wrapping i1 false.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastElement, Op0_t > m_ExtractLastElement(const Op0_t &Op0)
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount, Op0_t, Op1_t > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond, Op0_t > m_BranchOnCond(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
const SCEV * getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE)
Return the SCEV expression for V.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool isHeaderMask(const VPValue *V, VPlan &Plan)
Return true if V is a header mask in Plan.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
std::unique_ptr< VPlan > VPlanPtr
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A recipe for handling first-order recurrence phis.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening select instructions.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...