40using namespace VPlanPatternMatch;
45 GetIntOrFpInductionDescriptor,
49 Plan->getVectorLoopRegion());
50 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
52 if (!VPBB->getParent())
55 auto EndIter = Term ? Term->getIterator() : VPBB->end();
60 VPValue *VPV = Ingredient.getVPSingleValue();
67 if (
auto *PhiR = dyn_cast<VPPhi>(&Ingredient)) {
68 auto *Phi = cast<PHINode>(PhiR->getUnderlyingValue());
69 const auto *
II = GetIntOrFpInductionDescriptor(Phi);
75 VPValue *Start = Plan->getOrAddLiveIn(
II->getStartValue());
79 Phi, Start, Step, &Plan->getVF(), *
II, Ingredient.getDebugLoc());
82 assert(isa<VPInstruction>(&Ingredient) &&
83 "only VPInstructions expected here");
84 assert(!isa<PHINode>(Inst) &&
"phis should be handled above");
86 if (
LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
88 *Load, Ingredient.getOperand(0),
nullptr ,
90 Ingredient.getDebugLoc());
91 }
else if (
StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
93 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
94 nullptr ,
false ,
false ,
98 }
else if (
CallInst *CI = dyn_cast<CallInst>(Inst)) {
104 {Ingredient.op_begin(), Ingredient.op_end() - 1}, CI->getType(),
106 }
else if (
SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
108 }
else if (
auto *CI = dyn_cast<CastInst>(Inst)) {
110 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
121 "Only recpies with zero or one defined values expected");
122 Ingredient.eraseFromParent();
130 bool Changed =
false;
134 for (
VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
141 for (
auto &Recipe : *VPBB) {
144 dyn_cast_or_null<VPSingleDefRecipe>(
Op->getDefiningRecipe()))
145 WorkList.
insert({VPBB, Def});
151 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
154 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
155 if (SinkCandidate->
getParent() == SinkTo ||
159 if (
auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
160 if (!ScalarVFOnly && RepR->isSingleScalar())
162 }
else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
165 bool NeedsDuplicating =
false;
170 auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
171 SinkCandidate](
VPUser *U) {
172 auto *UI = cast<VPRecipeBase>(U);
173 if (UI->getParent() == SinkTo)
175 NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
178 return NeedsDuplicating &&
179 isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(SinkCandidate);
181 if (!
all_of(SinkCandidate->
users(), CanSinkWithUser))
184 if (NeedsDuplicating) {
188 if (
auto *SinkCandidateRepR =
189 dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
194 nullptr , *SinkCandidateRepR);
197 Clone = SinkCandidate->
clone();
202 return cast<VPRecipeBase>(&U)->
getParent() != SinkTo;
208 dyn_cast_or_null<VPSingleDefRecipe>(
Op->getDefiningRecipe()))
209 WorkList.
insert({SinkTo, Def});
218 auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry());
219 if (!EntryBB || EntryBB->size() != 1 ||
220 !isa<VPBranchOnMaskRecipe>(EntryBB->begin()))
223 return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0);
228 auto *EntryBB = cast<VPBasicBlock>(R->getEntry());
229 if (EntryBB->getNumSuccessors() != 2)
232 auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]);
233 auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]);
234 if (!Succ0 || !Succ1)
237 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
239 if (Succ0->getSingleSuccessor() == Succ1)
241 if (Succ1->getSingleSuccessor() == Succ0)
256 for (
VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>(
258 if (!Region1->isReplicator())
260 auto *MiddleBasicBlock =
261 dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());
262 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
266 dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
267 if (!Region2 || !Region2->isReplicator())
272 if (!Mask1 || Mask1 != Mask2)
275 assert(Mask1 && Mask2 &&
"both region must have conditions");
281 if (TransformedRegions.
contains(Region1))
283 auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor());
284 auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
288 if (!Then1 || !Then2)
307 cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
308 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
310 return cast<VPRecipeBase>(&U)->getParent() == Then2;
314 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
315 Phi1ToMove.eraseFromParent();
318 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
332 TransformedRegions.
insert(Region1);
335 return !TransformedRegions.
empty();
342 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
343 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
344 auto *BlockInMask = PredRecipe->
getMask();
347 BlockInMask, MaskDef ? MaskDef->getDebugLoc() :
DebugLoc());
363 RecipeWithoutMask->getDebugLoc());
383 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
386 if (
auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
387 if (RepR->isPredicated())
406 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
415 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
420 if (!VPBB->getParent())
423 dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
424 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
425 isa<VPIRBasicBlock>(PredVPBB))
431 VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor());
433 R.moveBefore(*PredVPBB, PredVPBB->
end());
435 auto *ParentRegion = VPBB->getParent();
436 if (ParentRegion && ParentRegion->getExiting() == VPBB)
437 ParentRegion->setExiting(PredVPBB);
438 for (
auto *Succ :
to_vector(VPBB->successors())) {
444 return !WorkList.
empty();
451 bool ShouldSimplify =
true;
452 while (ShouldSimplify) {
467 auto *
IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
468 if (!
IV ||
IV->getTruncInst())
479 auto &Casts =
IV->getInductionDescriptor().getCastInsts();
483 for (
auto *U : FindMyCast->
users()) {
484 auto *UserCast = dyn_cast<VPSingleDefRecipe>(U);
485 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
486 FoundUserCast = UserCast;
490 FindMyCast = FoundUserCast;
502 WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
512 auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
514 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
534 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
535 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
536 match(RepR, m_Intrinsic<Intrinsic::assume>());
537 if (IsConditionalAssume)
540 if (R.mayHaveSideEffects())
544 return all_of(R.definedValues(),
545 [](
VPValue *V) { return V->getNumUsers() == 0; });
549 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
560 auto *PhiR = dyn_cast<VPPhi>(&R);
561 if (!PhiR || PhiR->getNumOperands() != 2 || PhiR->getNumUsers() != 1)
564 if (*PhiR->user_begin() !=
Incoming->getDefiningRecipe() ||
567 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
568 PhiR->eraseFromParent();
569 Incoming->getDefiningRecipe()->eraseFromParent();
583 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
599 if (ResultTy != StepTy) {
615 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
617 if (isa<VPHeaderPHIRecipe>(Cur))
620 Users.insert_range(V->users());
622 return Users.takeVector();
642 auto *PhiR = dyn_cast<VPWidenInductionRecipe>(&Phi);
652 auto *Def = dyn_cast<VPSingleDefRecipe>(U);
653 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
655 if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) ||
656 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
657 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
665 Def->operands(),
true);
666 Clone->insertAfter(Def);
667 Def->replaceAllUsesWith(Clone);
672 if (
auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
679 VPValue *StepV = PtrIV->getOperand(1);
682 nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
693 auto *WideIV = cast<VPWidenIntOrFpInductionRecipe>(&Phi);
694 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
695 return U->usesScalars(WideIV);
701 Plan,
ID.getKind(),
ID.getInductionOpcode(),
702 dyn_cast_or_null<FPMathOperator>(
ID.getInductionBinOp()),
703 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
704 WideIV->getDebugLoc(), Builder);
707 if (!HasOnlyVectorVFs)
708 WideIV->replaceAllUsesWith(Steps);
710 WideIV->replaceUsesWithIf(Steps, [WideIV](
VPUser &U,
unsigned) {
711 return U.usesScalars(WideIV);
721 auto *WideIV = dyn_cast<VPWidenInductionRecipe>(VPV);
725 auto *IntOrFpIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
726 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
731 if (!Def || Def->getNumOperands() != 2)
733 WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(0));
735 WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(1));
739 auto IsWideIVInc = [&]() {
740 auto &
ID = WideIV->getInductionDescriptor();
743 VPValue *IVStep = WideIV->getStepValue();
744 switch (
ID.getInductionOpcode()) {
745 case Instruction::Add:
747 case Instruction::FAdd:
750 case Instruction::FSub:
753 case Instruction::Sub: {
761 return !isa<SCEVCouldNotCompute>(IVStepSCEV) &&
762 !isa<SCEVCouldNotCompute>(StepSCEV) &&
772 return IsWideIVInc() ? WideIV :
nullptr;
783 if (!
match(
Op, m_VPInstruction<VPInstruction::ExtractLane>(
784 m_VPInstruction<VPInstruction::FirstActiveLane>(
793 auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
794 if (WideIntOrFp && WideIntOrFp->getTruncInst())
806 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
807 FirstActiveLaneType,
DL);
808 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, FirstActiveLane},
DL);
815 EndValue =
B.createNaryOp(Instruction::Add, {EndValue, One},
DL);
818 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
820 VPValue *Start = WideIV->getStartValue();
821 VPValue *Step = WideIV->getStepValue();
822 EndValue =
B.createDerivedIV(
823 ID.getKind(), dyn_cast_or_null<FPMathOperator>(
ID.getInductionBinOp()),
824 Start, EndValue, Step);
844 assert(EndValue &&
"end value must have been pre-computed");
853 VPBuilder B(cast<VPBasicBlock>(PredVPBB)->getTerminator());
854 VPValue *Step = WideIV->getStepValue();
857 return B.createNaryOp(Instruction::Sub, {EndValue, Step}, {},
"ind.escape");
861 return B.createPtrAdd(EndValue,
862 B.createNaryOp(Instruction::Sub, {Zero, Step}), {},
866 const auto &
ID = WideIV->getInductionDescriptor();
867 return B.createNaryOp(
868 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
871 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
884 auto *ExitIRI = cast<VPIRPhi>(&R);
886 for (
auto [
Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
888 if (PredVPBB == MiddleVPBB)
890 ExitIRI->getOperand(
Idx),
894 ExitIRI->getOperand(
Idx), SE);
896 ExitIRI->setOperand(
Idx, Escape);
909 auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R);
913 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
916 ExpR->replaceAllUsesWith(V->second);
917 ExpR->eraseFromParent();
926 while (!WorkList.
empty()) {
928 if (!Seen.
insert(Cur).second)
935 WorkList.
append(R->op_begin(), R->op_end());
936 R->eraseFromParent();
948 if (!
Op->isLiveIn() || !
Op->getLiveInIRValue())
965 return Folder.
FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
967 case Instruction::Select:
968 return Folder.
FoldSelect(Ops[0], Ops[1], Ops[2]);
969 case Instruction::ICmp:
970 case Instruction::FCmp:
971 return Folder.
FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
973 case Instruction::GetElementPtr: {
974 auto &RFlags = cast<VPRecipeWithIRFlags>(R);
975 auto *
GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
977 RFlags.getGEPNoWrapFlags());
983 cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
986 case Instruction::ExtractElement:
987 assert(!Ops[0]->
getType()->isVectorTy() &&
"Live-ins should be scalar");
995 VPlan *Plan = R.getParent()->getPlan();
997 auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
1014 .
Default([](
auto *) {
return false; }))
1018 if (
auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) {
1021 PredPHI->replaceAllUsesWith(
Op);
1028 if (TruncTy == ATy) {
1029 Def->replaceAllUsesWith(
A);
1032 if (isa<VPReplicateRecipe>(Def))
1038 : Instruction::ZExt;
1041 if (
auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
1043 VPC->setUnderlyingValue(UnderlyingExt);
1045 VPC->insertBefore(&R);
1046 Def->replaceAllUsesWith(VPC);
1049 VPC->insertBefore(&R);
1050 Def->replaceAllUsesWith(VPC);
1058 for (
VPUser *U :
A->users()) {
1059 auto *R = cast<VPRecipeBase>(U);
1060 for (
VPValue *VPV : R->definedValues())
1074 Def->replaceAllUsesWith(
X);
1075 Def->eraseFromParent();
1081 Def->replaceAllUsesWith(Def->getOperand(0) ==
X ? Def->getOperand(1)
1082 : Def->getOperand(0));
1083 Def->eraseFromParent();
1089 Def->replaceAllUsesWith(R.getOperand(0) ==
X ? R.getOperand(1)
1095 return Def->replaceAllUsesWith(
X);
1100 Def->setOperand(0,
C);
1101 Def->setOperand(1,
Y);
1102 Def->setOperand(2,
X);
1107 return Def->replaceAllUsesWith(
A);
1110 return Def->replaceAllUsesWith(R.getOperand(0) ==
A ? R.getOperand(1)
1115 return Def->replaceAllUsesWith(
A);
1120 auto *Cmp = cast<VPRecipeWithIRFlags>(
A);
1122 return match(U, m_CombineOr(m_Not(m_Specific(Cmp)),
1123 m_Select(m_Specific(Cmp), m_VPValue(),
1128 auto *R = cast<VPSingleDefRecipe>(U);
1131 R->setOperand(1,
Y);
1132 R->setOperand(2,
X);
1136 R->replaceAllUsesWith(Cmp);
1141 if (!Cmp->getDebugLoc() && R.getDebugLoc())
1142 Cmp->setDebugLoc(R.getDebugLoc());
1154 return Def->replaceAllUsesWith(Def->getOperand(1));
1156 if (
match(Def, m_VPInstruction<VPInstruction::WideIVStep>(
1161 Def->replaceAllUsesWith(
X);
1171 Def->setOperand(1, Def->getOperand(0));
1172 Def->setOperand(0,
Y);
1176 if (
auto *Phi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(Def)) {
1177 if (Phi->getOperand(0) == Phi->getOperand(1))
1178 Def->replaceAllUsesWith(Phi->getOperand(0));
1184 auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
1185 Def->replaceAllUsesWith(
1186 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1191 if (
match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
1193 auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
1194 Def->replaceAllUsesWith(
1195 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1199 if (
auto *Phi = dyn_cast<VPPhi>(Def)) {
1200 if (Phi->getNumOperands() == 1)
1201 Phi->replaceAllUsesWith(Phi->getOperand(0));
1211 if (
auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) {
1212 if (VecPtr->isFirstPart()) {
1213 VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
1220 if (
auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
1222 Steps->replaceAllUsesWith(Steps->getOperand(0));
1228 if (
match(Def, m_VPInstruction<VPInstruction::ReductionStartVector>(
1230 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1231 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&U);
1232 return PhiR && PhiR->isInLoop();
1238 Def->replaceAllUsesWith(
A);
1245 Def->replaceAllUsesWith(OpVPI);
1254 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
1269 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1272 if (!isa<VPWidenRecipe, VPWidenSelectRecipe, VPReplicateRecipe>(&R))
1274 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
1275 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1278 auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
1287 RepOrWidenR->operands(),
1289 Clone->insertBefore(RepOrWidenR);
1290 RepOrWidenR->replaceAllUsesWith(Clone);
1298 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1301 auto *Blend = dyn_cast<VPBlendRecipe>(&R);
1307 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1308 UniqueValues.
insert(Blend->getIncomingValue(0));
1309 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1311 UniqueValues.
insert(Blend->getIncomingValue(
I));
1313 if (UniqueValues.
size() == 1) {
1314 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1315 Blend->eraseFromParent();
1319 if (Blend->isNormalized())
1325 unsigned StartIndex = 0;
1326 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1331 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1338 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1340 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1341 if (
I == StartIndex)
1343 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1344 OperandsWithMask.
push_back(Blend->getMask(
I));
1348 new VPBlendRecipe(cast_or_null<PHINode>(Blend->getUnderlyingValue()),
1349 OperandsWithMask, Blend->getDebugLoc());
1350 NewBlend->insertBefore(&R);
1352 VPValue *DeadMask = Blend->getMask(StartIndex);
1354 Blend->eraseFromParent();
1359 if (NewBlend->getNumOperands() == 3 &&
1361 VPValue *Inc0 = NewBlend->getOperand(0);
1362 VPValue *Inc1 = NewBlend->getOperand(1);
1363 VPValue *OldMask = NewBlend->getOperand(2);
1364 NewBlend->setOperand(0, Inc1);
1365 NewBlend->setOperand(1, Inc0);
1366 NewBlend->setOperand(2, NewMask);
1368 cast<VPInstruction>(OldMask)->eraseFromParent();
1385 auto *TC = dyn_cast_if_present<ConstantInt>(
1396 APInt MaxVal = AlignedTC - 1;
1399 unsigned NewBitWidth =
1405 bool MadeChange =
false;
1409 auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
1414 if (!WideIV || !WideIV->isCanonical() ||
1415 WideIV->hasMoreThanOneUniqueUser() ||
1416 NewIVTy == WideIV->getScalarType())
1421 if (!
match(*WideIV->user_begin(),
1428 auto *NewStart = Plan.
getOrAddLiveIn(ConstantInt::get(NewIVTy, 0));
1429 WideIV->setStartValue(NewStart);
1430 auto *NewStep = Plan.
getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
1431 WideIV->setStepValue(NewStep);
1436 auto *Cmp = cast<VPInstruction>(*WideIV->user_begin());
1437 Cmp->setOperand(1, NewBTC);
1451 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1453 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
1466 const SCEV *VectorTripCount =
1468 if (isa<SCEVCouldNotCompute>(VectorTripCount))
1470 assert(!isa<SCEVCouldNotCompute>(VectorTripCount) &&
1471 "Trip count SCEV must be computable");
1484 auto *Term = &ExitingVPBB->
back();
1493 const SCEV *TripCount =
1495 assert(!isa<SCEVCouldNotCompute>(TripCount) &&
1496 "Trip count SCEV must be computable");
1499 if (TripCount->
isZero() ||
1517 auto *Header = cast<VPBasicBlock>(VectorRegion->
getEntry());
1519 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
1520 return R->isCanonical();
1521 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
1522 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
1525 if (
auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&HeaderR)) {
1528 R->getScalarType());
1530 HeaderR.eraseFromParent();
1533 auto *Phi = cast<VPPhiAccessors>(&HeaderR);
1534 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
1535 HeaderR.eraseFromParent();
1544 B->setParent(
nullptr);
1553 Term->getDebugLoc());
1557 Term->eraseFromParent();
1565 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
1566 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
1574 assert(Plan.
getUF() == BestUF &&
"BestUF must match the Plan's UF");
1592 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
1595 if (SinkCandidate == Previous)
1598 if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
1599 !Seen.
insert(SinkCandidate).second ||
1603 if (SinkCandidate->mayHaveSideEffects())
1612 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
1615 "only recipes with a single defined value expected");
1618 if (!TryToPushSinkCandidate(cast<VPRecipeBase>(
User)))
1630 if (SinkCandidate == FOR)
1633 SinkCandidate->moveAfter(Previous);
1634 Previous = SinkCandidate;
1652 for (
VPUser *U : FOR->users()) {
1653 auto *R = cast<VPRecipeBase>(U);
1658 [&VPDT, HoistPoint](
VPUser *U) {
1659 auto *R = cast<VPRecipeBase>(U);
1660 return HoistPoint == R ||
1661 VPDT.properlyDominates(HoistPoint, R);
1663 "HoistPoint must dominate all users of FOR");
1665 auto NeedsHoisting = [HoistPoint, &VPDT,
1667 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
1668 if (!HoistCandidate)
1674 "CFG in VPlan should still be flat, without replicate regions");
1676 if (!Visited.
insert(HoistCandidate).second)
1681 if (!EnclosingLoopRegion || isa<VPHeaderPHIRecipe>(HoistCandidate))
1688 return HoistCandidate;
1702 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
1705 "only recipes with a single defined value expected");
1706 if (!CanHoist(Current))
1717 if (
auto *R = NeedsHoisting(
Op))
1729 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
1744 if (
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
1749 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
1752 while (
auto *PrevPhi =
1753 dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) {
1754 assert(PrevPhi->getParent() == FOR->getParent());
1756 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
1766 if (isa<VPHeaderPHIRecipe>(Previous))
1774 {FOR, FOR->getBackedgeValue()});
1776 FOR->replaceAllUsesWith(RecurSplice);
1779 RecurSplice->setOperand(0, FOR);
1787 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
1790 RecurKind RK = PhiR->getRecurrenceKind();
1796 if (
auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(U)) {
1797 RecWithFlags->dropPoisonGeneratingFlags();
1811 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
1812 return RepR && RepR->getOpcode() == Instruction::Alloca;
1819 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1822 if (CannotHoistRecipe(R))
1826 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||
1828 return !Op->isDefinedOutsideLoopRegions();
1831 R.moveBefore(*Preheader, Preheader->
end());
1845 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1853 VPValue *ResultVPV = R.getVPSingleValue();
1855 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
1856 if (!NewResSizeInBits)
1863 if (isa<VPReplicateRecipe, VPWidenCastRecipe>(&R))
1869 (void)OldResSizeInBits;
1876 if (
auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R))
1877 VPW->dropPoisonGeneratingFlags();
1879 if (OldResSizeInBits != NewResSizeInBits &&
1884 Ext->insertAfter(&R);
1886 Ext->setOperand(0, ResultVPV);
1887 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
1890 "Only ICmps should not need extending the result.");
1893 assert(!isa<VPWidenStoreRecipe>(&R) &&
"stores cannot be narrowed");
1894 if (isa<VPWidenLoadRecipe, VPWidenIntrinsicRecipe>(&R))
1898 unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0;
1899 for (
unsigned Idx = StartIdx;
Idx != R.getNumOperands(); ++
Idx) {
1900 auto *
Op = R.getOperand(
Idx);
1901 unsigned OpSizeInBits =
1903 if (OpSizeInBits == NewResSizeInBits)
1905 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
1906 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
1910 : ProcessedIter->second;
1911 R.setOperand(
Idx, NewOp);
1914 ProcessedIter->second = NewOp;
1915 if (!
Op->isLiveIn()) {
1927 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1930 if (VPBB->getNumSuccessors() != 2 || VPBB == Plan.
getEntry() ||
1934 unsigned RemovedIdx;
1943 cast<VPBasicBlock>(VPBB->getSuccessors()[RemovedIdx]);
1945 "There must be a single edge between VPBB and its successor");
1949 cast<VPPhiAccessors>(&R)->removeIncomingValueFor(VPBB);
1954 VPBB->back().eraseFromParent();
2015 VPValue *StartV = CanonicalIVPHI->getStartValue();
2017 auto *CanonicalIVIncrement =
2018 cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
2021 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2022 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2032 VPValue *TripCount, *IncrementValue;
2037 IncrementValue = CanonicalIVIncrement;
2043 IncrementValue = CanonicalIVPHI;
2054 DL,
"active.lane.mask.entry");
2059 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2065 auto *InLoopIncrement =
2067 {IncrementValue}, {
false,
false},
DL);
2069 {InLoopIncrement, TripCount},
DL,
2070 "active.lane.mask.next");
2087 auto *FoundWidenCanonicalIVUser =
2089 [](
VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
2091 [](
VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }) <=
2093 "Must have at most one VPWideCanonicalIVRecipe");
2095 auto *WideCanonicalIV =
2096 cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
2097 WideCanonicalIVs.
push_back(WideCanonicalIV);
2104 auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
2105 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
2106 WideCanonicalIVs.
push_back(WidenOriginalIV);
2112 for (
auto *Wide : WideCanonicalIVs) {
2114 auto *VPI = dyn_cast<VPInstruction>(U);
2118 assert(VPI->getOperand(0) == Wide &&
2119 "WidenCanonicalIV must be the first operand of the compare");
2120 assert(!HeaderMask &&
"Multiple header masks found?");
2128 VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
2131 UseActiveLaneMaskForControlFlow) &&
2132 "DataAndControlFlowWithoutRuntimeCheck implies "
2133 "UseActiveLaneMaskForControlFlow");
2135 auto *FoundWidenCanonicalIVUser =
2137 [](
VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
2138 assert(FoundWidenCanonicalIVUser &&
2139 "Must have widened canonical IV when tail folding!");
2141 auto *WideCanonicalIV =
2142 cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
2144 if (UseActiveLaneMaskForControlFlow) {
2151 "active.lane.mask");
2177 assert(OrigMask &&
"Unmasked recipe when folding tail");
2182 return HeaderMask == OrigMask ? nullptr : OrigMask;
2187 auto *EndPtr = dyn_cast<VPVectorEndPointerRecipe>(
Addr);
2190 assert(EndPtr->getOperand(1) == &EndPtr->getParent()->getPlan()->getVF() &&
2191 "VPVectorEndPointerRecipe with non-VF VF operand?");
2195 return cast<VPWidenMemoryRecipe>(U)->isReverse();
2197 "VPVectorEndPointRecipe not used by reversed widened memory recipe?");
2206 VPValue *NewMask = GetNewMask(L->getMask());
2207 VPValue *NewAddr = GetNewAddr(L->getAddr());
2216 VPValue *NewMask = GetNewMask(Red->getCondOp());
2231 Intrinsic::vp_merge, {&AllOneMask,
LHS,
RHS, &EVL},
2247 "User of VF that we can't transform to EVL.");
2249 return isa<VPWidenIntOrFpInductionRecipe, VPScalarIVStepsRecipe>(U);
2254 return match(U, m_c_Add(m_Specific(Plan.getCanonicalIV()),
2255 m_Specific(&Plan.getVFxUF()))) ||
2256 isa<VPWidenPointerInductionRecipe>(U);
2258 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
2259 "increment of the canonical induction.");
2263 return isa<VPWidenPointerInductionRecipe>(U);
2273 any_of(Header->phis(), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
2287 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2292 m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
2298 Intrinsic::experimental_vp_splice,
2299 {V1, V2, Imm, AllOneMask, PrevEVL, &EVL},
2302 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
2331 auto *CurRecipe = cast<VPRecipeBase>(U);
2338 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
2339 "New recipe must define the same number of values as the "
2342 "Only supports recipes with a single definition or without users.");
2344 if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(EVLRecipe)) {
2345 VPValue *CurVPV = CurRecipe->getVPSingleValue();
2356 R->eraseFromParent();
2406 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
2411 VPValue *StartV = CanonicalIVPHI->getStartValue();
2415 EVLPhi->insertAfter(CanonicalIVPHI);
2416 VPBuilder Builder(Header, Header->getFirstNonPhi());
2423 if (MaxSafeElements) {
2426 Plan.
getOrAddLiveIn(ConstantInt::get(CanIVTy, *MaxSafeElements));
2433 auto *CanonicalIVIncrement =
2434 cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
2440 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
2443 Instruction::Add, {OpVPEVL, EVLPhi},
2444 {CanonicalIVIncrement->hasNoUnsignedWrap(),
2445 CanonicalIVIncrement->hasNoSignedWrap()},
2446 CanonicalIVIncrement->getDebugLoc(),
"index.evl.next");
2450 Instruction::Sub, {AVLPhi, OpVPEVL}, {
true,
false},
2458 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
2459 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
2469 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2472 if (
auto *PhiR = dyn_cast<VPEVLBasedIVPHIRecipe>(&R)) {
2473 assert(!EVLPhi &&
"Found multiple EVL PHIs. Only one expected");
2484 [[maybe_unused]]
bool FoundAVL =
2487 assert(FoundAVL &&
"Didn't find AVL?");
2495 [[maybe_unused]]
bool FoundAVLNext =
2496 match(AVL, m_VPInstruction<Instruction::PHI>(
2498 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
2508 auto *CanonicalIV = cast<VPPhi>(&*HeaderVPBB->
begin());
2509 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
2512 "Unexpected canonical iv");
2518 CanonicalIV->eraseFromParent();
2526 auto *LatchExitingBr = cast<VPInstruction>(LatchExiting->
getTerminator());
2531 match(LatchExitingBr,
2534 "Unexpected terminator in EVL loop");
2547 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
2551 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
2556 while (!Worklist.
empty()) {
2557 VPRecipeBase *CurRec = Worklist.pop_back_val();
2559 if (!Visited.insert(CurRec).second)
2566 if (isa<VPWidenMemoryRecipe, VPInterleaveRecipe, VPScalarIVStepsRecipe,
2567 VPHeaderPHIRecipe>(CurRec))
2573 if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
2580 if (match(RecWithFlags, m_BinaryOr(m_VPValue(A), m_VPValue(B))) &&
2581 RecWithFlags->isDisjoint()) {
2582 VPBuilder Builder(RecWithFlags);
2583 VPInstruction *New = Builder.createOverflowingOp(
2584 Instruction::Add, {A, B}, {false, false},
2585 RecWithFlags->getDebugLoc());
2586 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
2587 RecWithFlags->replaceAllUsesWith(New);
2588 RecWithFlags->eraseFromParent();
2591 RecWithFlags->dropPoisonGeneratingFlags();
2593 Instruction *Instr = dyn_cast_or_null<Instruction>(
2594 CurRec->getVPSingleValue()->getUnderlyingValue());
2596 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
2597 "found instruction with poison generating flags not covered by "
2598 "VPRecipeWithIRFlags");
2612 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
2614 if (
auto *WidenRec = dyn_cast<VPWidenMemoryRecipe>(&Recipe)) {
2615 Instruction &UnderlyingInstr = WidenRec->getIngredient();
2616 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
2617 if (AddrDef && WidenRec->isConsecutive() &&
2618 BlockNeedsPredication(UnderlyingInstr.
getParent()))
2619 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
2620 }
else if (
auto *InterleaveRec = dyn_cast<VPInterleaveRecipe>(&Recipe)) {
2621 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
2625 InterleaveRec->getInterleaveGroup();
2626 bool NeedPredication =
false;
2628 I < NumMembers; ++
I) {
2631 NeedPredication |= BlockNeedsPredication(Member->getParent());
2634 if (NeedPredication)
2635 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
2647 if (InterleaveGroups.empty())
2655 for (
const auto *IG : InterleaveGroups) {
2657 cast<VPWidenMemoryRecipe>(RecipeBuilder.
getRecipe(IG->getMember(0)));
2660 if (
auto *StoreR = dyn_cast<VPWidenStoreRecipe>(Start))
2661 StoredValues.
push_back(StoreR->getStoredValue());
2662 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
2667 cast<VPWidenMemoryRecipe>(RecipeBuilder.
getRecipe(MemberI));
2668 if (
auto *StoreR = dyn_cast<VPWidenStoreRecipe>(MemoryR))
2669 StoredValues.
push_back(StoreR->getStoredValue());
2673 bool NeedsMaskForGaps =
2674 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
2675 (!StoredValues.
empty() && !IG->isFull());
2679 cast<VPWidenMemoryRecipe>(RecipeBuilder.
getRecipe(IRInsertPos));
2682 if (
auto *Gep = dyn_cast<GetElementPtrInst>(
2696 assert(IG->getIndex(IRInsertPos) != 0 &&
2697 "index of insert position shouldn't be zero");
2701 IG->getIndex(IRInsertPos),
2706 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
2712 if (IG->isReverse()) {
2715 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
2716 ReversePtr->insertBefore(InsertPos);
2720 InsertPos->getMask(), NeedsMaskForGaps,
2721 InterleaveMD, InsertPos->getDebugLoc());
2722 VPIG->insertBefore(InsertPos);
2725 for (
unsigned i = 0; i < IG->getFactor(); ++i)
2728 if (!Member->getType()->isVoidTy()) {
2789 AddOp = Instruction::Add;
2790 MulOp = Instruction::Mul;
2792 AddOp =
ID.getInductionOpcode();
2793 MulOp = Instruction::FMul;
2794 Flags =
ID.getInductionBinOp()->getFastMathFlags();
2824 WidePHI->addOperand(
Init);
2825 WidePHI->insertBefore(WidenIVR);
2836 Builder.
setInsertPoint(R->getParent(), std::next(R->getIterator()));
2853 auto *Next = Builder.
createNaryOp(AddOp, {Prev, Inc}, Flags,
2884 VPlan *Plan = R->getParent()->getPlan();
2885 VPValue *Start = R->getStartValue();
2886 VPValue *Step = R->getStepValue();
2887 VPValue *VF = R->getVFValue();
2889 assert(R->getInductionDescriptor().getKind() ==
2891 "Not a pointer induction according to InductionDescriptor!");
2894 "Recipe should have been replaced");
2904 Builder.
setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
2910 R->replaceAllUsesWith(PtrAdd);
2927 for (
VPRegionBlock *R : VPBlockUtils::blocksOnly<VPRegionBlock>(
2929 if (!R->isReplicator())
2933 R->dissolveToCFGLoop();
2939 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2942 if (
auto *WidenIVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
2948 if (
auto *WidenIVR = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
2956 if (
auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
2958 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
2960 Blend->getIncomingValue(
I),
Select,
2961 R.getDebugLoc(),
"predphi");
2966 if (
auto *Expr = dyn_cast<VPExpressionRecipe>(&R)) {
2973 if (!
match(&R, m_VPInstruction<VPInstruction::WideIVStep>(
2978 auto *VPI = cast<VPInstruction>(&R);
2982 ? Instruction::UIToFP
2983 : Instruction::Trunc;
2987 [[maybe_unused]]
auto *ConstStep =
2991 assert(!ConstStep || ConstStep->getValue() != 1);
3000 Flags = {VPI->getFastMathFlags()};
3005 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
3007 VPI->replaceAllUsesWith(VectorStep);
3013 R->eraseFromParent();
3026 "unsupported early exit VPBB");
3031 cast<VPIRPhi>(&R)->swapOperands();
3038 "Terminator must be be BranchOnCond");
3039 VPValue *CondOfEarlyExitingVPBB =
3041 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
3042 ? CondOfEarlyExitingVPBB
3043 : Builder.
createNot(CondOfEarlyExitingVPBB);
3060 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
3062 auto *ExitIRI = cast<VPIRPhi>(&R);
3065 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
3066 if (ExitIRI->getNumOperands() != 1) {
3069 ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
3072 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
3073 if (!IncomingFromEarlyExit->
isLiveIn()) {
3077 "first.active.lane");
3080 nullptr,
"early.exit.value");
3081 ExitIRI->
setOperand(EarlyExitIdx, IncomingFromEarlyExit);
3089 auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->
getTerminator());
3091 "Unexpected terminator");
3092 auto *IsLatchExitTaken =
3094 LatchExitingBranch->getOperand(1));
3096 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
3109 VPValue *VecOp = Red->getVecOp();
3112 auto IsExtendedRedValidAndClampRange = [&](
unsigned Opcode,
bool isZExt,
3113 Type *SrcTy) ->
bool {
3116 auto *SrcVecTy = cast<VectorType>(
toVectorTy(SrcTy, VF));
3119 Opcode, isZExt, RedTy, SrcVecTy, Red->getFastMathFlags(),
3122 cast<VPWidenCastRecipe>(VecOp)->computeCost(VF, Ctx);
3124 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
3132 IsExtendedRedValidAndClampRange(
3134 cast<VPWidenCastRecipe>(VecOp)->getOpcode() ==
3135 Instruction::CastOps::ZExt,
3154 if (Opcode != Instruction::Add)
3160 auto IsMulAccValidAndClampRange =
3168 auto *SrcVecTy = cast<VectorType>(
toVectorTy(SrcTy, VF));
3175 ExtCost += Ext0->computeCost(VF, Ctx);
3177 ExtCost += Ext1->computeCost(VF, Ctx);
3179 ExtCost += OuterExt->computeCost(VF, Ctx);
3181 return MulAccCost.
isValid() &&
3182 MulAccCost < ExtCost + MulCost + RedCost;
3187 VPValue *VecOp = Red->getVecOp();
3192 dyn_cast_if_present<VPWidenCastRecipe>(
A->getDefiningRecipe());
3194 dyn_cast_if_present<VPWidenCastRecipe>(
B->getDefiningRecipe());
3198 if (RecipeA && RecipeB &&
3199 (RecipeA->getOpcode() == RecipeB->getOpcode() ||
A ==
B) &&
3202 IsMulAccValidAndClampRange(RecipeA->getOpcode() ==
3203 Instruction::CastOps::ZExt,
3204 Mul, RecipeA, RecipeB,
nullptr)) {
3208 if (IsMulAccValidAndClampRange(
true,
Mul,
nullptr,
nullptr,
nullptr))
3217 auto *
Mul = cast<VPWidenRecipe>(Ext->getOperand(0)->getDefiningRecipe());
3219 cast<VPWidenCastRecipe>(
Mul->getOperand(0)->getDefiningRecipe());
3221 cast<VPWidenCastRecipe>(
Mul->getOperand(1)->getDefiningRecipe());
3222 if ((Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
3223 Ext0->getOpcode() == Ext1->getOpcode() &&
3224 IsMulAccValidAndClampRange(Ext0->getOpcode() ==
3225 Instruction::CastOps::ZExt,
3226 Mul, Ext0, Ext1, Ext)) {
3228 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
3229 Ext0->getDebugLoc());
3230 NewExt0->insertBefore(Ext0);
3235 Ext->getResultType(), *Ext1,
3236 Ext1->getDebugLoc());
3239 Mul->setOperand(0, NewExt0);
3240 Mul->setOperand(1, NewExt1);
3241 Red->setOperand(1,
Mul);
3254 auto IP = std::next(Red->getIterator());
3255 auto *VPBB = Red->getParent();
3265 Red->replaceAllUsesWith(AbstractR);
3270 for (
VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
3273 if (
auto *Red = dyn_cast<VPReductionRecipe>(&R))
3296 for (
VPValue *VPV : VPValues) {
3298 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
3299 isa<Constant>(VPV->getLiveInIRValue())))
3306 if (
User->usesScalars(VPV))
3308 if (cast<VPRecipeBase>(
User)->
getParent() == VectorPreheader)
3309 HoistPoint = HoistBlock->
begin();
3312 cast<VPRecipeBase>(
User)->getParent()) &&
3313 "All users must be in the vector preheader or dominated by it");
3316 VPBuilder Builder(cast<VPBasicBlock>(HoistBlock), HoistPoint);
3319 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
3320 return Broadcast != &U && !U.usesScalars(VPV);
3328 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
3329 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
3347 if (!isa<SCEVConstant>(TCScev))
3351 if (
auto *ConstVecTC = dyn_cast<SCEVConstant>(VecTCScev))
3376 auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
3378 auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
3388 concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
3390 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
3391 auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](
VPUser *U) {
3394 return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
3396 if (!RepR || RepR->isSingleScalar() ||
3397 none_of(RepR->users(), UsesVectorOrInsideReplicateRegion))
3407 RepR->replaceUsesWithIf(
3408 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
3410 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
3419 bool RequiresScalarEpilogue) {
3421 assert(VectorTC.
isLiveIn() &&
"vector-trip-count must be a live-in");
3440 if (TailByMasking) {
3464 if (RequiresScalarEpilogue) {
3466 "requiring scalar epilogue is not supported with fail folding");
3502 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
3516 auto *Entry = cast<VPIRBasicBlock>(Plan.
getEntry());
3517 BasicBlock *EntryBB = Entry->getIRBasicBlock();
3520 if (isa<VPIRInstruction, VPIRPhi>(&R))
3522 auto *ExpSCEV = dyn_cast<VPExpandSCEVRecipe>(&R);
3525 const SCEV *Expr = ExpSCEV->getSCEV();
3528 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
3533 ExpSCEV->eraseFromParent();
3536 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
3537 "after any VPIRInstructions");
3540 auto EI = Entry->begin();
3542 if (EI != Entry->end() && isa<VPIRInstruction>(*EI) &&
3543 &cast<VPIRInstruction>(&*EI)->getInstruction() == &
I) {
3550 return ExpandedSCEVs;
3566 if (
auto *W = dyn_cast<VPWidenLoadRecipe>(DefR))
3569 if (
auto *
IR = dyn_cast<VPInterleaveRecipe>(DefR))
3570 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(
Idx) == OpV;
3579 unsigned VectorRegWidth) {
3583 Type *GroupElementTy =
nullptr;
3587 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
3588 return TypeInfo.inferScalarType(Op) == GroupElementTy;
3595 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
3596 return TypeInfo.inferScalarType(Op) == GroupElementTy;
3603 return IG->getFactor() == VF && IG->getNumMembers() == VF &&
3604 GroupSize == VectorRegWidth;
3611 auto *RepR = dyn_cast<VPReplicateRecipe>(VPV);
3612 return RepR && RepR->isSingleScalar();
3616 unsigned VectorRegWidth) {
3626 if (isa<VPCanonicalIVPHIRecipe>(&R) ||
3630 if (isa<VPDerivedIVRecipe, VPScalarIVStepsRecipe>(&R) &&
3641 auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R);
3642 if (R.mayWriteToMemory() && !InterleaveR)
3656 if (InterleaveR->getStoredValues().empty())
3661 auto *Member0 = InterleaveR->getStoredValues()[0];
3663 all_of(InterleaveR->getStoredValues(),
3664 [Member0](
VPValue *VPV) { return Member0 == VPV; })) {
3672 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
3675 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
3676 return IR && IR->getInterleaveGroup()->isFull() &&
3677 IR->getVPValue(Op.index()) == Op.value();
3685 auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>(
3686 InterleaveR->getStoredValues()[0]->getDefiningRecipe());
3689 for (
const auto &[
I, V] :
enumerate(InterleaveR->getStoredValues())) {
3690 auto *R = dyn_cast_or_null<VPWidenRecipe>(V->getDefiningRecipe());
3691 if (!R || R->getOpcode() != WideMember0->getOpcode() ||
3692 R->getNumOperands() > 2)
3695 [WideMember0,
Idx =
I](
const auto &
P) {
3696 const auto &[OpIdx, OpV] = P;
3697 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
3704 if (StoreGroups.
empty())
3709 auto *R = V->getDefiningRecipe();
3712 if (
auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) {
3716 *cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()),
3717 LoadGroup->getAddr(), LoadGroup->getMask(),
true,
3718 false, {}, LoadGroup->getDebugLoc());
3719 L->insertBefore(LoadGroup);
3723 if (
auto *RepR = dyn_cast<VPReplicateRecipe>(R)) {
3724 assert(RepR->isSingleScalar() &&
3725 isa<LoadInst>(RepR->getUnderlyingInstr()) &&
3726 "must be a single scalar load");
3729 auto *WideLoad = cast<VPWidenLoadRecipe>(R);
3731 VPValue *PtrOp = WideLoad->getAddr();
3732 if (
auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(PtrOp))
3733 PtrOp = VecPtr->getOperand(0);
3738 nullptr, *WideLoad);
3739 N->insertBefore(WideLoad);
3744 for (
auto *StoreGroup : StoreGroups) {
3746 VPValue *Member0 = StoreGroup->getStoredValues()[0];
3749 }
else if (
auto *WideMember0 =
3751 for (
unsigned Idx = 0, E = WideMember0->getNumOperands();
Idx != E; ++
Idx)
3752 WideMember0->setOperand(
Idx, NarrowOp(WideMember0->getOperand(
Idx)));
3755 Res = NarrowOp(Member0);
3759 *cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos()),
3760 StoreGroup->getAddr(), Res,
nullptr,
true,
3761 false, {}, StoreGroup->getDebugLoc());
3762 S->insertBefore(StoreGroup);
3763 StoreGroup->eraseFromParent();
3769 auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
3771 CanIV->getScalarType(), 1 * Plan.
getUF())));
3773 Plan.
getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
3776 IsaPred<VPVectorPointerRecipe>) &&
3777 "All VPVectorPointerRecipes should have been removed");
3786 dyn_cast_or_null<VPInstruction>(MiddleVPBB->
getTerminator());
3792 "must have a BranchOnCond");
3795 if (VF.
isScalable() && VScaleForTuning.has_value())
3796 VectorStep *= *VScaleForTuning;
3797 assert(VectorStep > 0 &&
"trip count should not be zero");
3801 MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights);
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefAnalysis InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
iv Induction Variable Users
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
mir Rename Register Operands
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Class for arbitrary precision integers.
unsigned getActiveBits() const
Compute the number of active bits in the value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Utility class for floating point operations which can have information about relaxed accuracy require...
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
Value * FoldGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, GEPNoWrapFlags NW) const override
Value * FoldBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS) const override
Value * FoldSelect(Value *C, Value *True, Value *False) const override
Value * FoldCast(Instruction::CastOps Op, Value *V, Type *DestTy) const override
Value * FoldCmp(CmpInst::Predicate P, Value *LHS, Value *RHS) const override
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
unsigned getOpcode() const
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
This class represents an analyzed expression in the program.
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
size_t getNumPredecessors() const
const VPBlocksTy & getPredecessors() const
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL)
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Canonical scalar induction phi of the vector loop.
Type * getScalarType() const
Returns the scalar type of the induction.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ FirstOrderRecurrenceSplice
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ CalculateTripCountMinusVF
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
const InterleaveGroup< Instruction > * getInterleaveGroup()
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
operand_iterator op_end()
operand_iterator op_begin()
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
PHINode * getPHINode() const
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
VPValue * getSplatVFValue()
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
VPValue * getMask() const
Return the mask used by this recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
VPValue & getVectorTripCount()
The vector trip count.
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getTrue()
Return a VPValue wrapping i1 true.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1)
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastElement, Op0_t > m_ExtractLastElement(const Op0_t &Op0)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount, Op0_t, Op1_t > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond, Op0_t > m_BranchOnCond(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
const SCEV * getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE)
Return the SCEV expression for V.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool isHeaderMask(const VPValue *V, VPlan &Plan)
Return true if V is a header mask in Plan.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
std::unique_ptr< VPlan > VPlanPtr
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
const TargetTransformInfo & TTI
A recipe for handling first-order recurrence phis.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening select instructions.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...