LLVM 21.0.0git
VPlanTransforms.cpp
Go to the documentation of this file.
1//===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a set of utility VPlan to VPlan transformations.
11///
12//===----------------------------------------------------------------------===//
13
14#include "VPlanTransforms.h"
15#include "VPRecipeBuilder.h"
16#include "VPlan.h"
17#include "VPlanAnalysis.h"
18#include "VPlanCFG.h"
19#include "VPlanDominatorTree.h"
20#include "VPlanPatternMatch.h"
21#include "VPlanUtils.h"
22#include "VPlanVerifier.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SetVector.h"
26#include "llvm/ADT/TypeSwitch.h"
30#include "llvm/IR/Intrinsics.h"
32
33using namespace llvm;
34
36 VPlanPtr &Plan,
38 GetIntOrFpInductionDescriptor,
39 ScalarEvolution &SE, const TargetLibraryInfo &TLI) {
40
42 Plan->getVectorLoopRegion());
43 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
44 // Skip blocks outside region
45 if (!VPBB->getParent())
46 break;
47 VPRecipeBase *Term = VPBB->getTerminator();
48 auto EndIter = Term ? Term->getIterator() : VPBB->end();
49 // Introduce each ingredient into VPlan.
50 for (VPRecipeBase &Ingredient :
51 make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
52
53 VPValue *VPV = Ingredient.getVPSingleValue();
54 Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
55
56 VPRecipeBase *NewRecipe = nullptr;
57 if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {
58 auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
59 const auto *II = GetIntOrFpInductionDescriptor(Phi);
60 if (!II)
61 continue;
62
63 VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue());
64 VPValue *Step =
65 vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
66 NewRecipe = new VPWidenIntOrFpInductionRecipe(
67 Phi, Start, Step, &Plan->getVF(), *II, Ingredient.getDebugLoc());
68 } else {
69 assert(isa<VPInstruction>(&Ingredient) &&
70 "only VPInstructions expected here");
71 assert(!isa<PHINode>(Inst) && "phis should be handled above");
72 // Create VPWidenMemoryRecipe for loads and stores.
73 if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
74 NewRecipe = new VPWidenLoadRecipe(
75 *Load, Ingredient.getOperand(0), nullptr /*Mask*/,
76 false /*Consecutive*/, false /*Reverse*/,
77 Ingredient.getDebugLoc());
78 } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
79 NewRecipe = new VPWidenStoreRecipe(
80 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
81 nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
82 Ingredient.getDebugLoc());
83 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
84 NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
85 } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
86 NewRecipe = new VPWidenIntrinsicRecipe(
87 *CI, getVectorIntrinsicIDForCall(CI, &TLI),
88 {Ingredient.op_begin(), Ingredient.op_end() - 1}, CI->getType(),
89 CI->getDebugLoc());
90 } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
91 NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
92 } else if (auto *CI = dyn_cast<CastInst>(Inst)) {
93 NewRecipe = new VPWidenCastRecipe(
94 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
95 } else {
96 NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands());
97 }
98 }
99
100 NewRecipe->insertBefore(&Ingredient);
101 if (NewRecipe->getNumDefinedValues() == 1)
102 VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
103 else
104 assert(NewRecipe->getNumDefinedValues() == 0 &&
105 "Only recpies with zero or one defined values expected");
106 Ingredient.eraseFromParent();
107 }
108 }
109}
110
111static bool sinkScalarOperands(VPlan &Plan) {
112 auto Iter = vp_depth_first_deep(Plan.getEntry());
113 bool Changed = false;
114 // First, collect the operands of all recipes in replicate blocks as seeds for
115 // sinking.
117 for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
118 VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
119 if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
120 continue;
121 VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
122 if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
123 continue;
124 for (auto &Recipe : *VPBB) {
125 for (VPValue *Op : Recipe.operands())
126 if (auto *Def =
127 dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
128 WorkList.insert(std::make_pair(VPBB, Def));
129 }
130 }
131
132 bool ScalarVFOnly = Plan.hasScalarVFOnly();
133 // Try to sink each replicate or scalar IV steps recipe in the worklist.
134 for (unsigned I = 0; I != WorkList.size(); ++I) {
135 VPBasicBlock *SinkTo;
136 VPSingleDefRecipe *SinkCandidate;
137 std::tie(SinkTo, SinkCandidate) = WorkList[I];
138 if (SinkCandidate->getParent() == SinkTo ||
139 SinkCandidate->mayHaveSideEffects() ||
140 SinkCandidate->mayReadOrWriteMemory())
141 continue;
142 if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
143 if (!ScalarVFOnly && RepR->isUniform())
144 continue;
145 } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
146 continue;
147
148 bool NeedsDuplicating = false;
149 // All recipe users of the sink candidate must be in the same block SinkTo
150 // or all users outside of SinkTo must be uniform-after-vectorization (
151 // i.e., only first lane is used) . In the latter case, we need to duplicate
152 // SinkCandidate.
153 auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
154 SinkCandidate](VPUser *U) {
155 auto *UI = cast<VPRecipeBase>(U);
156 if (UI->getParent() == SinkTo)
157 return true;
158 NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
159 // We only know how to duplicate VPRecipeRecipes for now.
160 return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate);
161 };
162 if (!all_of(SinkCandidate->users(), CanSinkWithUser))
163 continue;
164
165 if (NeedsDuplicating) {
166 if (ScalarVFOnly)
167 continue;
168 Instruction *I = SinkCandidate->getUnderlyingInstr();
169 auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true);
170 // TODO: add ".cloned" suffix to name of Clone's VPValue.
171
172 Clone->insertBefore(SinkCandidate);
173 SinkCandidate->replaceUsesWithIf(Clone, [SinkTo](VPUser &U, unsigned) {
174 return cast<VPRecipeBase>(&U)->getParent() != SinkTo;
175 });
176 }
177 SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
178 for (VPValue *Op : SinkCandidate->operands())
179 if (auto *Def =
180 dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
181 WorkList.insert(std::make_pair(SinkTo, Def));
182 Changed = true;
183 }
184 return Changed;
185}
186
187/// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return
188/// the mask.
190 auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry());
191 if (!EntryBB || EntryBB->size() != 1 ||
192 !isa<VPBranchOnMaskRecipe>(EntryBB->begin()))
193 return nullptr;
194
195 return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0);
196}
197
198/// If \p R is a triangle region, return the 'then' block of the triangle.
200 auto *EntryBB = cast<VPBasicBlock>(R->getEntry());
201 if (EntryBB->getNumSuccessors() != 2)
202 return nullptr;
203
204 auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]);
205 auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]);
206 if (!Succ0 || !Succ1)
207 return nullptr;
208
209 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
210 return nullptr;
211 if (Succ0->getSingleSuccessor() == Succ1)
212 return Succ0;
213 if (Succ1->getSingleSuccessor() == Succ0)
214 return Succ1;
215 return nullptr;
216}
217
218// Merge replicate regions in their successor region, if a replicate region
219// is connected to a successor replicate region with the same predicate by a
220// single, empty VPBasicBlock.
222 SmallPtrSet<VPRegionBlock *, 4> TransformedRegions;
223
224 // Collect replicate regions followed by an empty block, followed by another
225 // replicate region with matching masks to process front. This is to avoid
226 // iterator invalidation issues while merging regions.
228 for (VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>(
229 vp_depth_first_deep(Plan.getEntry()))) {
230 if (!Region1->isReplicator())
231 continue;
232 auto *MiddleBasicBlock =
233 dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());
234 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
235 continue;
236
237 auto *Region2 =
238 dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
239 if (!Region2 || !Region2->isReplicator())
240 continue;
241
242 VPValue *Mask1 = getPredicatedMask(Region1);
243 VPValue *Mask2 = getPredicatedMask(Region2);
244 if (!Mask1 || Mask1 != Mask2)
245 continue;
246
247 assert(Mask1 && Mask2 && "both region must have conditions");
248 WorkList.push_back(Region1);
249 }
250
251 // Move recipes from Region1 to its successor region, if both are triangles.
252 for (VPRegionBlock *Region1 : WorkList) {
253 if (TransformedRegions.contains(Region1))
254 continue;
255 auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor());
256 auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
257
258 VPBasicBlock *Then1 = getPredicatedThenBlock(Region1);
259 VPBasicBlock *Then2 = getPredicatedThenBlock(Region2);
260 if (!Then1 || !Then2)
261 continue;
262
263 // Note: No fusion-preventing memory dependencies are expected in either
264 // region. Such dependencies should be rejected during earlier dependence
265 // checks, which guarantee accesses can be re-ordered for vectorization.
266 //
267 // Move recipes to the successor region.
268 for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1)))
269 ToMove.moveBefore(*Then2, Then2->getFirstNonPhi());
270
271 auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor());
272 auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor());
273
274 // Move VPPredInstPHIRecipes from the merge block to the successor region's
275 // merge block. Update all users inside the successor region to use the
276 // original values.
277 for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
278 VPValue *PredInst1 =
279 cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
280 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
281 Phi1ToMoveV->replaceUsesWithIf(PredInst1, [Then2](VPUser &U, unsigned) {
282 return cast<VPRecipeBase>(&U)->getParent() == Then2;
283 });
284
285 // Remove phi recipes that are unused after merging the regions.
286 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
287 Phi1ToMove.eraseFromParent();
288 continue;
289 }
290 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
291 }
292
293 // Finally, remove the first region.
294 for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) {
295 VPBlockUtils::disconnectBlocks(Pred, Region1);
296 VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock);
297 }
298 VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock);
299 TransformedRegions.insert(Region1);
300 }
301
302 return !TransformedRegions.empty();
303}
304
306 VPlan &Plan) {
307 Instruction *Instr = PredRecipe->getUnderlyingInstr();
308 // Build the triangular if-then region.
309 std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();
310 assert(Instr->getParent() && "Predicated instruction not in any basic block");
311 auto *BlockInMask = PredRecipe->getMask();
312 auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask);
313 auto *Entry =
314 Plan.createVPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
315
316 // Replace predicated replicate recipe with a replicate recipe without a
317 // mask but in the replicate region.
318 auto *RecipeWithoutMask = new VPReplicateRecipe(
319 PredRecipe->getUnderlyingInstr(),
320 make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())),
321 PredRecipe->isUniform());
322 auto *Pred =
323 Plan.createVPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask);
324
325 VPPredInstPHIRecipe *PHIRecipe = nullptr;
326 if (PredRecipe->getNumUsers() != 0) {
327 PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask,
328 RecipeWithoutMask->getDebugLoc());
329 PredRecipe->replaceAllUsesWith(PHIRecipe);
330 PHIRecipe->setOperand(0, RecipeWithoutMask);
331 }
332 PredRecipe->eraseFromParent();
333 auto *Exiting =
334 Plan.createVPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
336 Plan.createVPRegionBlock(Entry, Exiting, RegionName, true);
337
338 // Note: first set Entry as region entry and then connect successors starting
339 // from it in order, to propagate the "parent" of each VPBasicBlock.
340 VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry);
341 VPBlockUtils::connectBlocks(Pred, Exiting);
342
343 return Region;
344}
345
346static void addReplicateRegions(VPlan &Plan) {
348 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
349 vp_depth_first_deep(Plan.getEntry()))) {
350 for (VPRecipeBase &R : *VPBB)
351 if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
352 if (RepR->isPredicated())
353 WorkList.push_back(RepR);
354 }
355 }
356
357 unsigned BBNum = 0;
358 for (VPReplicateRecipe *RepR : WorkList) {
359 VPBasicBlock *CurrentBlock = RepR->getParent();
360 VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator());
361
362 BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent();
364 OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : "");
365 // Record predicated instructions for above packing optimizations.
367 Region->setParent(CurrentBlock->getParent());
369 }
370}
371
372/// Remove redundant VPBasicBlocks by merging them into their predecessor if
373/// the predecessor has a single successor.
376 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
377 vp_depth_first_deep(Plan.getEntry()))) {
378 // Don't fold the blocks in the skeleton of the Plan into their single
379 // predecessors for now.
380 // TODO: Remove restriction once more of the skeleton is modeled in VPlan.
381 if (!VPBB->getParent())
382 continue;
383 auto *PredVPBB =
384 dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
385 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
386 isa<VPIRBasicBlock>(PredVPBB))
387 continue;
388 WorkList.push_back(VPBB);
389 }
390
391 for (VPBasicBlock *VPBB : WorkList) {
392 VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor());
393 for (VPRecipeBase &R : make_early_inc_range(*VPBB))
394 R.moveBefore(*PredVPBB, PredVPBB->end());
395 VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);
396 auto *ParentRegion = cast_or_null<VPRegionBlock>(VPBB->getParent());
397 if (ParentRegion && ParentRegion->getExiting() == VPBB)
398 ParentRegion->setExiting(PredVPBB);
399 for (auto *Succ : to_vector(VPBB->successors())) {
401 VPBlockUtils::connectBlocks(PredVPBB, Succ);
402 }
403 // VPBB is now dead and will be cleaned up when the plan gets destroyed.
404 }
405 return !WorkList.empty();
406}
407
409 // Convert masked VPReplicateRecipes to if-then region blocks.
411
412 bool ShouldSimplify = true;
413 while (ShouldSimplify) {
414 ShouldSimplify = sinkScalarOperands(Plan);
415 ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan);
416 ShouldSimplify |= mergeBlocksIntoPredecessors(Plan);
417 }
418}
419
420/// Remove redundant casts of inductions.
421///
422/// Such redundant casts are casts of induction variables that can be ignored,
423/// because we already proved that the casted phi is equal to the uncasted phi
424/// in the vectorized loop. There is no need to vectorize the cast - the same
425/// value can be used for both the phi and casts in the vector loop.
427 for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
428 auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
429 if (!IV || IV->getTruncInst())
430 continue;
431
432 // A sequence of IR Casts has potentially been recorded for IV, which
433 // *must be bypassed* when the IV is vectorized, because the vectorized IV
434 // will produce the desired casted value. This sequence forms a def-use
435 // chain and is provided in reverse order, ending with the cast that uses
436 // the IV phi. Search for the recipe of the last cast in the chain and
437 // replace it with the original IV. Note that only the final cast is
438 // expected to have users outside the cast-chain and the dead casts left
439 // over will be cleaned up later.
440 auto &Casts = IV->getInductionDescriptor().getCastInsts();
441 VPValue *FindMyCast = IV;
442 for (Instruction *IRCast : reverse(Casts)) {
443 VPSingleDefRecipe *FoundUserCast = nullptr;
444 for (auto *U : FindMyCast->users()) {
445 auto *UserCast = dyn_cast<VPSingleDefRecipe>(U);
446 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
447 FoundUserCast = UserCast;
448 break;
449 }
450 }
451 FindMyCast = FoundUserCast;
452 }
453 FindMyCast->replaceAllUsesWith(IV);
454 }
455}
456
457/// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
458/// recipe, if it exists.
460 VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
461 VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
462 for (VPUser *U : CanonicalIV->users()) {
463 WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
464 if (WidenNewIV)
465 break;
466 }
467
468 if (!WidenNewIV)
469 return;
470
472 for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
473 auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
474
475 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
476 continue;
477
478 // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
479 // everything WidenNewIV's users need. That is, WidenOriginalIV will
480 // generate a vector phi or all users of WidenNewIV demand the first lane
481 // only.
482 if (any_of(WidenOriginalIV->users(),
483 [WidenOriginalIV](VPUser *U) {
484 return !U->usesScalars(WidenOriginalIV);
485 }) ||
486 vputils::onlyFirstLaneUsed(WidenNewIV)) {
487 WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
488 WidenNewIV->eraseFromParent();
489 return;
490 }
491 }
492}
493
494/// Returns true if \p R is dead and can be removed.
495static bool isDeadRecipe(VPRecipeBase &R) {
496 using namespace llvm::PatternMatch;
497 // Do remove conditional assume instructions as their conditions may be
498 // flattened.
499 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
500 bool IsConditionalAssume =
501 RepR && RepR->isPredicated() &&
502 match(RepR->getUnderlyingInstr(), m_Intrinsic<Intrinsic::assume>());
503 if (IsConditionalAssume)
504 return true;
505
506 if (R.mayHaveSideEffects())
507 return false;
508
509 // Recipe is dead if no user keeps the recipe alive.
510 return all_of(R.definedValues(),
511 [](VPValue *V) { return V->getNumUsers() == 0; });
512}
513
516 Plan.getEntry());
517
518 for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
519 // The recipes in the block are processed in reverse order, to catch chains
520 // of dead recipes.
521 for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
522 if (isDeadRecipe(R))
523 R.eraseFromParent();
524 }
525 }
526}
527
530 Instruction::BinaryOps InductionOpcode,
531 FPMathOperator *FPBinOp, Instruction *TruncI,
532 VPValue *StartV, VPValue *Step, DebugLoc DL,
533 VPBuilder &Builder) {
535 VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
536 VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
537 Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
538
539 // Truncate base induction if needed.
540 Type *CanonicalIVType = CanonicalIV->getScalarType();
541 VPTypeAnalysis TypeInfo(CanonicalIVType);
542 Type *ResultTy = TypeInfo.inferScalarType(BaseIV);
543 if (TruncI) {
544 Type *TruncTy = TruncI->getType();
545 assert(ResultTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits() &&
546 "Not truncating.");
547 assert(ResultTy->isIntegerTy() && "Truncation requires an integer type");
548 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy, DL);
549 ResultTy = TruncTy;
550 }
551
552 // Truncate step if needed.
553 Type *StepTy = TypeInfo.inferScalarType(Step);
554 if (ResultTy != StepTy) {
555 assert(StepTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits() &&
556 "Not truncating.");
557 assert(StepTy->isIntegerTy() && "Truncation requires an integer type");
558 auto *VecPreheader =
559 cast<VPBasicBlock>(HeaderVPBB->getSingleHierarchicalPredecessor());
560 VPBuilder::InsertPointGuard Guard(Builder);
561 Builder.setInsertPoint(VecPreheader);
562 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy, DL);
563 }
564 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step);
565}
566
568 SetVector<VPUser *> Users(V->user_begin(), V->user_end());
569 for (unsigned I = 0; I != Users.size(); ++I) {
570 VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
571 if (isa<VPHeaderPHIRecipe>(Cur))
572 continue;
573 for (VPValue *V : Cur->definedValues())
574 Users.insert(V->user_begin(), V->user_end());
575 }
576 return Users.takeVector();
577}
578
579/// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
580/// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
581/// VPWidenPointerInductionRecipe will generate vectors only. If some users
582/// require vectors while other require scalars, the scalar uses need to extract
583/// the scalars from the generated vectors (Note that this is different to how
584/// int/fp inductions are handled). Legalize extract-from-ends using uniform
585/// VPReplicateRecipe of wide inductions to use regular VPReplicateRecipe, so
586/// the correct end value is available. Also optimize
587/// VPWidenIntOrFpInductionRecipe, if any of its users needs scalar values, by
588/// providing them scalar steps built on the canonical scalar IV and update the
589/// original IV's users. This is an optional optimization to reduce the needs of
590/// vector extracts.
592 using namespace llvm::VPlanPatternMatch;
594 bool HasOnlyVectorVFs = !Plan.hasScalarVFOnly();
595 VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
596 for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
597 auto *PhiR = dyn_cast<VPWidenInductionRecipe>(&Phi);
598 if (!PhiR)
599 continue;
600
601 // Try to narrow wide and replicating recipes to uniform recipes, based on
602 // VPlan analysis.
603 // TODO: Apply to all recipes in the future, to replace legacy uniformity
604 // analysis.
605 auto Users = collectUsersRecursively(PhiR);
606 for (VPUser *U : reverse(Users)) {
607 auto *Def = dyn_cast<VPSingleDefRecipe>(U);
608 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
609 // Skip recipes that shouldn't be narrowed.
610 if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) ||
611 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
612 (RepR && (RepR->isUniform() || RepR->isPredicated())))
613 continue;
614
615 // Skip recipes that may have other lanes than their first used.
618 continue;
619
620 auto *Clone = new VPReplicateRecipe(Def->getUnderlyingInstr(),
621 Def->operands(), /*IsUniform*/ true);
622 Clone->insertAfter(Def);
623 Def->replaceAllUsesWith(Clone);
624 }
625
626 // Replace wide pointer inductions which have only their scalars used by
627 // PtrAdd(IndStart, ScalarIVSteps (0, Step)).
628 if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
629 if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))
630 continue;
631
632 const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
633 VPValue *StartV =
634 Plan.getOrAddLiveIn(ConstantInt::get(ID.getStep()->getType(), 0));
635 VPValue *StepV = PtrIV->getOperand(1);
637 Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,
638 nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
639
640 VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
641 PtrIV->getDebugLoc(), "next.gep");
642
643 PtrIV->replaceAllUsesWith(PtrAdd);
644 continue;
645 }
646
647 // Replace widened induction with scalar steps for users that only use
648 // scalars.
649 auto *WideIV = cast<VPWidenIntOrFpInductionRecipe>(&Phi);
650 if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) {
651 return U->usesScalars(WideIV);
652 }))
653 continue;
654
655 const InductionDescriptor &ID = WideIV->getInductionDescriptor();
657 Plan, ID.getKind(), ID.getInductionOpcode(),
658 dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),
659 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
660 WideIV->getDebugLoc(), Builder);
661
662 // Update scalar users of IV to use Step instead.
663 if (!HasOnlyVectorVFs)
664 WideIV->replaceAllUsesWith(Steps);
665 else
666 WideIV->replaceUsesWithIf(Steps, [WideIV](VPUser &U, unsigned) {
667 return U.usesScalars(WideIV);
668 });
669 }
670}
671
672/// Check if \p VPV is an untruncated wide induction, either before or after the
673/// increment. If so return the header IV (before the increment), otherwise
674/// return null.
676 auto *WideIV = dyn_cast<VPWidenInductionRecipe>(VPV);
677 if (WideIV) {
678 // VPV itself is a wide induction, separately compute the end value for exit
679 // users if it is not a truncated IV.
680 auto *IntOrFpIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
681 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
682 }
683
684 // Check if VPV is an optimizable induction increment.
685 VPRecipeBase *Def = VPV->getDefiningRecipe();
686 if (!Def || Def->getNumOperands() != 2)
687 return nullptr;
688 WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(0));
689 if (!WideIV)
690 WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(1));
691 if (!WideIV)
692 return nullptr;
693
694 auto IsWideIVInc = [&]() {
695 using namespace VPlanPatternMatch;
696 auto &ID = WideIV->getInductionDescriptor();
697
698 // Check if VPV increments the induction by the induction step.
699 VPValue *IVStep = WideIV->getStepValue();
700 switch (ID.getInductionOpcode()) {
701 case Instruction::Add:
702 return match(VPV, m_c_Binary<Instruction::Add>(m_Specific(WideIV),
703 m_Specific(IVStep)));
704 case Instruction::FAdd:
705 return match(VPV, m_c_Binary<Instruction::FAdd>(m_Specific(WideIV),
706 m_Specific(IVStep)));
707 case Instruction::FSub:
708 return match(VPV, m_Binary<Instruction::FSub>(m_Specific(WideIV),
709 m_Specific(IVStep)));
710 case Instruction::Sub: {
711 // IVStep will be the negated step of the subtraction. Check if Step == -1
712 // * IVStep.
713 VPValue *Step;
714 if (!match(VPV,
715 m_Binary<Instruction::Sub>(m_VPValue(), m_VPValue(Step))) ||
716 !Step->isLiveIn() || !IVStep->isLiveIn())
717 return false;
718 auto *StepCI = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
719 auto *IVStepCI = dyn_cast<ConstantInt>(IVStep->getLiveInIRValue());
720 return StepCI && IVStepCI &&
721 StepCI->getValue() == (-1 * IVStepCI->getValue());
722 }
723 default:
724 return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
725 match(VPV, m_GetElementPtr(m_Specific(WideIV),
726 m_Specific(WideIV->getStepValue())));
727 }
728 llvm_unreachable("should have been covered by switch above");
729 };
730 return IsWideIVInc() ? WideIV : nullptr;
731}
732
734 VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues) {
735 using namespace VPlanPatternMatch;
737 if (ExitVPBBs.size() != 1)
738 return;
739
740 VPIRBasicBlock *ExitVPBB = ExitVPBBs[0];
741 VPBlockBase *PredVPBB = ExitVPBB->getSinglePredecessor();
742 if (!PredVPBB)
743 return;
744 assert(PredVPBB == Plan.getMiddleBlock() &&
745 "predecessor must be the middle block");
746
747 VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
749 for (VPRecipeBase &R : *ExitVPBB) {
750 auto *ExitIRI = cast<VPIRInstruction>(&R);
751 if (!isa<PHINode>(ExitIRI->getInstruction()))
752 break;
753
755 if (!match(ExitIRI->getOperand(0),
756 m_VPInstruction<VPInstruction::ExtractFromEnd>(
757 m_VPValue(Incoming), m_SpecificInt(1))))
758 continue;
759
760 auto *WideIV = getOptimizableIVOf(Incoming);
761 if (!WideIV)
762 continue;
763 VPValue *EndValue = EndValues.lookup(WideIV);
764 assert(EndValue && "end value must have been pre-computed");
765
766 if (Incoming != WideIV) {
767 ExitIRI->setOperand(0, EndValue);
768 continue;
769 }
770
771 VPValue *Escape = nullptr;
772 VPValue *Step = WideIV->getStepValue();
773 Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
774 if (ScalarTy->isIntegerTy()) {
775 Escape =
776 B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
777 } else if (ScalarTy->isPointerTy()) {
778 auto *Zero = Plan.getOrAddLiveIn(
779 ConstantInt::get(Step->getLiveInIRValue()->getType(), 0));
780 Escape = B.createPtrAdd(EndValue,
781 B.createNaryOp(Instruction::Sub, {Zero, Step}),
782 {}, "ind.escape");
783 } else if (ScalarTy->isFloatingPointTy()) {
784 const auto &ID = WideIV->getInductionDescriptor();
785 Escape = B.createNaryOp(
786 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
787 ? Instruction::FSub
788 : Instruction::FAdd,
789 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
790 } else {
791 llvm_unreachable("all possible induction types must be handled");
792 }
793 ExitIRI->setOperand(0, Escape);
794 }
795}
796
797/// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing
798/// them with already existing recipes expanding the same SCEV expression.
801
802 for (VPRecipeBase &R :
804 auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R);
805 if (!ExpR)
806 continue;
807
808 auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR});
809 if (I.second)
810 continue;
811 ExpR->replaceAllUsesWith(I.first->second);
812 ExpR->eraseFromParent();
813 }
814}
815
817 SmallVector<VPValue *> WorkList;
819 WorkList.push_back(V);
820
821 while (!WorkList.empty()) {
822 VPValue *Cur = WorkList.pop_back_val();
823 if (!Seen.insert(Cur).second)
824 continue;
826 if (!R)
827 continue;
828 if (!isDeadRecipe(*R))
829 continue;
830 WorkList.append(R->op_begin(), R->op_end());
831 R->eraseFromParent();
832 }
833}
834
835/// Try to simplify recipe \p R.
836static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
837 using namespace llvm::VPlanPatternMatch;
838
839 if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
840 // Try to remove redundant blend recipes.
841 SmallPtrSet<VPValue *, 4> UniqueValues;
842 if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
843 UniqueValues.insert(Blend->getIncomingValue(0));
844 for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
845 if (!match(Blend->getMask(I), m_False()))
846 UniqueValues.insert(Blend->getIncomingValue(I));
847
848 if (UniqueValues.size() == 1) {
849 Blend->replaceAllUsesWith(*UniqueValues.begin());
850 Blend->eraseFromParent();
851 return;
852 }
853
854 if (Blend->isNormalized())
855 return;
856
857 // Normalize the blend so its first incoming value is used as the initial
858 // value with the others blended into it.
859
860 unsigned StartIndex = 0;
861 for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
862 // If a value's mask is used only by the blend then is can be deadcoded.
863 // TODO: Find the most expensive mask that can be deadcoded, or a mask
864 // that's used by multiple blends where it can be removed from them all.
865 VPValue *Mask = Blend->getMask(I);
866 if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
867 StartIndex = I;
868 break;
869 }
870 }
871
872 SmallVector<VPValue *, 4> OperandsWithMask;
873 OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
874
875 for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
876 if (I == StartIndex)
877 continue;
878 OperandsWithMask.push_back(Blend->getIncomingValue(I));
879 OperandsWithMask.push_back(Blend->getMask(I));
880 }
881
882 auto *NewBlend = new VPBlendRecipe(
883 cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
884 NewBlend->insertBefore(&R);
885
886 VPValue *DeadMask = Blend->getMask(StartIndex);
887 Blend->replaceAllUsesWith(NewBlend);
888 Blend->eraseFromParent();
890 return;
891 }
892
893 VPValue *A;
894 if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
895 VPValue *Trunc = R.getVPSingleValue();
896 Type *TruncTy = TypeInfo.inferScalarType(Trunc);
897 Type *ATy = TypeInfo.inferScalarType(A);
898 if (TruncTy == ATy) {
899 Trunc->replaceAllUsesWith(A);
900 } else {
901 // Don't replace a scalarizing recipe with a widened cast.
902 if (isa<VPReplicateRecipe>(&R))
903 return;
904 if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
905
906 unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
907 ? Instruction::SExt
908 : Instruction::ZExt;
909 auto *VPC =
910 new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
911 if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
912 // UnderlyingExt has distinct return type, used to retain legacy cost.
913 VPC->setUnderlyingValue(UnderlyingExt);
914 }
915 VPC->insertBefore(&R);
916 Trunc->replaceAllUsesWith(VPC);
917 } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
918 auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
919 VPC->insertBefore(&R);
920 Trunc->replaceAllUsesWith(VPC);
921 }
922 }
923#ifndef NDEBUG
924 // Verify that the cached type info is for both A and its users is still
925 // accurate by comparing it to freshly computed types.
926 VPTypeAnalysis TypeInfo2(
927 R.getParent()->getPlan()->getCanonicalIV()->getScalarType());
928 assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
929 for (VPUser *U : A->users()) {
930 auto *R = cast<VPRecipeBase>(U);
931 for (VPValue *VPV : R->definedValues())
932 assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
933 }
934#endif
935 }
936
937 // Simplify (X && Y) || (X && !Y) -> X.
938 // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
939 // && (Y || Z) and (X || !X) into true. This requires queuing newly created
940 // recipes to be visited during simplification.
941 VPValue *X, *Y, *X1, *Y1;
942 if (match(&R,
943 m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
944 m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
945 X == X1 && Y == Y1) {
946 R.getVPSingleValue()->replaceAllUsesWith(X);
947 R.eraseFromParent();
948 return;
949 }
950
951 if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
952 return R.getVPSingleValue()->replaceAllUsesWith(A);
953
954 if (match(&R, m_Not(m_Not(m_VPValue(A)))))
955 return R.getVPSingleValue()->replaceAllUsesWith(A);
956
957 // Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
958 if ((match(&R,
959 m_DerivedIV(m_SpecificInt(0), m_VPValue(A), m_SpecificInt(1))) ||
960 match(&R,
961 m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) &&
962 TypeInfo.inferScalarType(R.getOperand(1)) ==
963 TypeInfo.inferScalarType(R.getVPSingleValue()))
964 return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
965}
966
967void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
969 Plan.getEntry());
970 VPTypeAnalysis TypeInfo(&CanonicalIVTy);
971 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
972 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
973 simplifyRecipe(R, TypeInfo);
974 }
975 }
976}
977
979 unsigned BestUF,
981 assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
982 assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
983 VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
984 VPBasicBlock *ExitingVPBB = VectorRegion->getExitingBasicBlock();
985 auto *Term = &ExitingVPBB->back();
986 // Try to simplify the branch condition if TC <= VF * UF when preparing to
987 // execute the plan for the main vector loop. We only do this if the
988 // terminator is:
989 // 1. BranchOnCount, or
990 // 2. BranchOnCond where the input is Not(ActiveLaneMask).
991 using namespace llvm::VPlanPatternMatch;
992 if (!match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
993 !match(Term,
994 m_BranchOnCond(m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue())))))
995 return;
996
997 ScalarEvolution &SE = *PSE.getSE();
998 const SCEV *TripCount =
1000 assert(!isa<SCEVCouldNotCompute>(TripCount) &&
1001 "Trip count SCEV must be computable");
1002 ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
1003 const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
1004 if (TripCount->isZero() ||
1005 !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
1006 return;
1007
1008 // The vector loop region only executes once. If possible, completely remove
1009 // the region, otherwise replace the terminator controlling the latch with
1010 // (BranchOnCond true).
1011 auto *Header = cast<VPBasicBlock>(VectorRegion->getEntry());
1012 auto *CanIVTy = Plan.getCanonicalIV()->getScalarType();
1013 if (all_of(
1014 Header->phis(),
1015 IsaPred<VPCanonicalIVPHIRecipe, VPFirstOrderRecurrencePHIRecipe>)) {
1016 for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) {
1017 auto *HeaderPhiR = cast<VPHeaderPHIRecipe>(&HeaderR);
1018 HeaderPhiR->replaceAllUsesWith(HeaderPhiR->getStartValue());
1019 HeaderPhiR->eraseFromParent();
1020 }
1021
1022 VPBlockBase *Preheader = VectorRegion->getSinglePredecessor();
1023 VPBlockBase *Exit = VectorRegion->getSingleSuccessor();
1024 VPBlockUtils::disconnectBlocks(Preheader, VectorRegion);
1025 VPBlockUtils::disconnectBlocks(VectorRegion, Exit);
1026
1027 for (VPBlockBase *B : vp_depth_first_shallow(VectorRegion->getEntry()))
1028 B->setParent(nullptr);
1029
1030 VPBlockUtils::connectBlocks(Preheader, Header);
1031 VPBlockUtils::connectBlocks(ExitingVPBB, Exit);
1032 simplifyRecipes(Plan, *CanIVTy);
1033 } else {
1034 // The vector region contains header phis for which we cannot remove the
1035 // loop region yet.
1036 LLVMContext &Ctx = SE.getContext();
1037 auto *BOC = new VPInstruction(
1039 {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}, Term->getDebugLoc());
1040 ExitingVPBB->appendRecipe(BOC);
1041 }
1042
1043 Term->eraseFromParent();
1044
1045 Plan.setVF(BestVF);
1046 Plan.setUF(BestUF);
1047 // TODO: Further simplifications are possible
1048 // 1. Replace inductions with constants.
1049 // 2. Replace vector loop region with VPBasicBlock.
1050}
1051
1052/// Sink users of \p FOR after the recipe defining the previous value \p
1053/// Previous of the recurrence. \returns true if all users of \p FOR could be
1054/// re-arranged as needed or false if it is not possible.
1055static bool
1057 VPRecipeBase *Previous,
1058 VPDominatorTree &VPDT) {
1059 // Collect recipes that need sinking.
1062 Seen.insert(Previous);
1063 auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
1064 // The previous value must not depend on the users of the recurrence phi. In
1065 // that case, FOR is not a fixed order recurrence.
1066 if (SinkCandidate == Previous)
1067 return false;
1068
1069 if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
1070 !Seen.insert(SinkCandidate).second ||
1071 VPDT.properlyDominates(Previous, SinkCandidate))
1072 return true;
1073
1074 if (SinkCandidate->mayHaveSideEffects())
1075 return false;
1076
1077 WorkList.push_back(SinkCandidate);
1078 return true;
1079 };
1080
1081 // Recursively sink users of FOR after Previous.
1082 WorkList.push_back(FOR);
1083 for (unsigned I = 0; I != WorkList.size(); ++I) {
1084 VPRecipeBase *Current = WorkList[I];
1085 assert(Current->getNumDefinedValues() == 1 &&
1086 "only recipes with a single defined value expected");
1087
1088 for (VPUser *User : Current->getVPSingleValue()->users()) {
1089 if (!TryToPushSinkCandidate(cast<VPRecipeBase>(User)))
1090 return false;
1091 }
1092 }
1093
1094 // Keep recipes to sink ordered by dominance so earlier instructions are
1095 // processed first.
1096 sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
1097 return VPDT.properlyDominates(A, B);
1098 });
1099
1100 for (VPRecipeBase *SinkCandidate : WorkList) {
1101 if (SinkCandidate == FOR)
1102 continue;
1103
1104 SinkCandidate->moveAfter(Previous);
1105 Previous = SinkCandidate;
1106 }
1107 return true;
1108}
1109
1110/// Try to hoist \p Previous and its operands before all users of \p FOR.
1112 VPRecipeBase *Previous,
1113 VPDominatorTree &VPDT) {
1114 if (Previous->mayHaveSideEffects() || Previous->mayReadFromMemory())
1115 return false;
1116
1117 // Collect recipes that need hoisting.
1118 SmallVector<VPRecipeBase *> HoistCandidates;
1120 VPRecipeBase *HoistPoint = nullptr;
1121 // Find the closest hoist point by looking at all users of FOR and selecting
1122 // the recipe dominating all other users.
1123 for (VPUser *U : FOR->users()) {
1124 auto *R = cast<VPRecipeBase>(U);
1125 if (!HoistPoint || VPDT.properlyDominates(R, HoistPoint))
1126 HoistPoint = R;
1127 }
1128 assert(all_of(FOR->users(),
1129 [&VPDT, HoistPoint](VPUser *U) {
1130 auto *R = cast<VPRecipeBase>(U);
1131 return HoistPoint == R ||
1132 VPDT.properlyDominates(HoistPoint, R);
1133 }) &&
1134 "HoistPoint must dominate all users of FOR");
1135
1136 auto NeedsHoisting = [HoistPoint, &VPDT,
1137 &Visited](VPValue *HoistCandidateV) -> VPRecipeBase * {
1138 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
1139 if (!HoistCandidate)
1140 return nullptr;
1141 VPRegionBlock *EnclosingLoopRegion =
1142 HoistCandidate->getParent()->getEnclosingLoopRegion();
1143 assert((!HoistCandidate->getParent()->getParent() ||
1144 HoistCandidate->getParent()->getParent() == EnclosingLoopRegion) &&
1145 "CFG in VPlan should still be flat, without replicate regions");
1146 // Hoist candidate was already visited, no need to hoist.
1147 if (!Visited.insert(HoistCandidate).second)
1148 return nullptr;
1149
1150 // Candidate is outside loop region or a header phi, dominates FOR users w/o
1151 // hoisting.
1152 if (!EnclosingLoopRegion || isa<VPHeaderPHIRecipe>(HoistCandidate))
1153 return nullptr;
1154
1155 // If we reached a recipe that dominates HoistPoint, we don't need to
1156 // hoist the recipe.
1157 if (VPDT.properlyDominates(HoistCandidate, HoistPoint))
1158 return nullptr;
1159 return HoistCandidate;
1160 };
1161 auto CanHoist = [&](VPRecipeBase *HoistCandidate) {
1162 // Avoid hoisting candidates with side-effects, as we do not yet analyze
1163 // associated dependencies.
1164 return !HoistCandidate->mayHaveSideEffects();
1165 };
1166
1167 if (!NeedsHoisting(Previous->getVPSingleValue()))
1168 return true;
1169
1170 // Recursively try to hoist Previous and its operands before all users of FOR.
1171 HoistCandidates.push_back(Previous);
1172
1173 for (unsigned I = 0; I != HoistCandidates.size(); ++I) {
1174 VPRecipeBase *Current = HoistCandidates[I];
1175 assert(Current->getNumDefinedValues() == 1 &&
1176 "only recipes with a single defined value expected");
1177 if (!CanHoist(Current))
1178 return false;
1179
1180 for (VPValue *Op : Current->operands()) {
1181 // If we reach FOR, it means the original Previous depends on some other
1182 // recurrence that in turn depends on FOR. If that is the case, we would
1183 // also need to hoist recipes involving the other FOR, which may break
1184 // dependencies.
1185 if (Op == FOR)
1186 return false;
1187
1188 if (auto *R = NeedsHoisting(Op))
1189 HoistCandidates.push_back(R);
1190 }
1191 }
1192
1193 // Order recipes to hoist by dominance so earlier instructions are processed
1194 // first.
1195 sort(HoistCandidates, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
1196 return VPDT.properlyDominates(A, B);
1197 });
1198
1199 for (VPRecipeBase *HoistCandidate : HoistCandidates) {
1200 HoistCandidate->moveBefore(*HoistPoint->getParent(),
1201 HoistPoint->getIterator());
1202 }
1203
1204 return true;
1205}
1206
1208 VPBuilder &LoopBuilder) {
1209 VPDominatorTree VPDT;
1210 VPDT.recalculate(Plan);
1211
1213 for (VPRecipeBase &R :
1215 if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
1216 RecurrencePhis.push_back(FOR);
1217
1218 for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) {
1220 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
1221 // Fixed-order recurrences do not contain cycles, so this loop is guaranteed
1222 // to terminate.
1223 while (auto *PrevPhi =
1224 dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) {
1225 assert(PrevPhi->getParent() == FOR->getParent());
1226 assert(SeenPhis.insert(PrevPhi).second);
1227 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
1228 }
1229
1230 if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT) &&
1231 !hoistPreviousBeforeFORUsers(FOR, Previous, VPDT))
1232 return false;
1233
1234 // Introduce a recipe to combine the incoming and previous values of a
1235 // fixed-order recurrence.
1236 VPBasicBlock *InsertBlock = Previous->getParent();
1237 if (isa<VPHeaderPHIRecipe>(Previous))
1238 LoopBuilder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());
1239 else
1240 LoopBuilder.setInsertPoint(InsertBlock,
1241 std::next(Previous->getIterator()));
1242
1243 auto *RecurSplice = cast<VPInstruction>(
1245 {FOR, FOR->getBackedgeValue()}));
1246
1247 FOR->replaceAllUsesWith(RecurSplice);
1248 // Set the first operand of RecurSplice to FOR again, after replacing
1249 // all users.
1250 RecurSplice->setOperand(0, FOR);
1251 }
1252 return true;
1253}
1254
1256 for (VPRecipeBase &R :
1258 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
1259 if (!PhiR)
1260 continue;
1261 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
1262 RecurKind RK = RdxDesc.getRecurrenceKind();
1263 if (RK != RecurKind::Add && RK != RecurKind::Mul)
1264 continue;
1265
1266 for (VPUser *U : collectUsersRecursively(PhiR))
1267 if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(U)) {
1268 RecWithFlags->dropPoisonGeneratingFlags();
1269 }
1270 }
1271}
1272
1273/// Move loop-invariant recipes out of the vector loop region in \p Plan.
1274static void licm(VPlan &Plan) {
1275 VPBasicBlock *Preheader = Plan.getVectorPreheader();
1276
1277 // Return true if we do not know how to (mechanically) hoist a given recipe
1278 // out of a loop region. Does not address legality concerns such as aliasing
1279 // or speculation safety.
1280 auto CannotHoistRecipe = [](VPRecipeBase &R) {
1281 // Allocas cannot be hoisted.
1282 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
1283 return RepR && RepR->getOpcode() == Instruction::Alloca;
1284 };
1285
1286 // Hoist any loop invariant recipes from the vector loop region to the
1287 // preheader. Preform a shallow traversal of the vector loop region, to
1288 // exclude recipes in replicate regions.
1289 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1290 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1291 vp_depth_first_shallow(LoopRegion->getEntry()))) {
1292 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1293 if (CannotHoistRecipe(R))
1294 continue;
1295 // TODO: Relax checks in the future, e.g. we could also hoist reads, if
1296 // their memory location is not modified in the vector loop.
1297 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||
1298 any_of(R.operands(), [](VPValue *Op) {
1299 return !Op->isDefinedOutsideLoopRegions();
1300 }))
1301 continue;
1302 R.moveBefore(*Preheader, Preheader->end());
1303 }
1304 }
1305}
1306
1308 VPlan &Plan, const MapVector<Instruction *, uint64_t> &MinBWs) {
1309#ifndef NDEBUG
1310 // Count the processed recipes and cross check the count later with MinBWs
1311 // size, to make sure all entries in MinBWs have been handled.
1312 unsigned NumProcessedRecipes = 0;
1313#endif
1314 // Keep track of created truncates, so they can be re-used. Note that we
1315 // cannot use RAUW after creating a new truncate, as this would could make
1316 // other uses have different types for their operands, making them invalidly
1317 // typed.
1319 Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
1320 VPTypeAnalysis TypeInfo(CanonicalIVType);
1321 VPBasicBlock *PH = Plan.getVectorPreheader();
1322 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1324 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1327 continue;
1328
1329 VPValue *ResultVPV = R.getVPSingleValue();
1330 auto *UI = cast_or_null<Instruction>(ResultVPV->getUnderlyingValue());
1331 unsigned NewResSizeInBits = MinBWs.lookup(UI);
1332 if (!NewResSizeInBits)
1333 continue;
1334
1335#ifndef NDEBUG
1336 NumProcessedRecipes++;
1337#endif
1338 // If the value wasn't vectorized, we must maintain the original scalar
1339 // type. Skip those here, after incrementing NumProcessedRecipes. Also
1340 // skip casts which do not need to be handled explicitly here, as
1341 // redundant casts will be removed during recipe simplification.
1342 if (isa<VPReplicateRecipe, VPWidenCastRecipe>(&R)) {
1343#ifndef NDEBUG
1344 // If any of the operands is a live-in and not used by VPWidenRecipe or
1345 // VPWidenSelectRecipe, but in MinBWs, make sure it is counted as
1346 // processed as well. When MinBWs is currently constructed, there is no
1347 // information about whether recipes are widened or replicated and in
1348 // case they are reciplicated the operands are not truncated. Counting
1349 // them them here ensures we do not miss any recipes in MinBWs.
1350 // TODO: Remove once the analysis is done on VPlan.
1351 for (VPValue *Op : R.operands()) {
1352 if (!Op->isLiveIn())
1353 continue;
1354 auto *UV = dyn_cast_or_null<Instruction>(Op->getUnderlyingValue());
1355 if (UV && MinBWs.contains(UV) && !ProcessedTruncs.contains(Op) &&
1356 none_of(Op->users(),
1357 IsaPred<VPWidenRecipe, VPWidenSelectRecipe>)) {
1358 // Add an entry to ProcessedTruncs to avoid counting the same
1359 // operand multiple times.
1360 ProcessedTruncs[Op] = nullptr;
1361 NumProcessedRecipes += 1;
1362 }
1363 }
1364#endif
1365 continue;
1366 }
1367
1368 Type *OldResTy = TypeInfo.inferScalarType(ResultVPV);
1369 unsigned OldResSizeInBits = OldResTy->getScalarSizeInBits();
1370 assert(OldResTy->isIntegerTy() && "only integer types supported");
1371 (void)OldResSizeInBits;
1372
1373 LLVMContext &Ctx = CanonicalIVType->getContext();
1374 auto *NewResTy = IntegerType::get(Ctx, NewResSizeInBits);
1375
1376 // Any wrapping introduced by shrinking this operation shouldn't be
1377 // considered undefined behavior. So, we can't unconditionally copy
1378 // arithmetic wrapping flags to VPW.
1379 if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R))
1380 VPW->dropPoisonGeneratingFlags();
1381
1382 using namespace llvm::VPlanPatternMatch;
1383 if (OldResSizeInBits != NewResSizeInBits &&
1384 !match(&R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue()))) {
1385 // Extend result to original width.
1386 auto *Ext =
1387 new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy);
1388 Ext->insertAfter(&R);
1389 ResultVPV->replaceAllUsesWith(Ext);
1390 Ext->setOperand(0, ResultVPV);
1391 assert(OldResSizeInBits > NewResSizeInBits && "Nothing to shrink?");
1392 } else {
1393 assert(
1394 match(&R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue())) &&
1395 "Only ICmps should not need extending the result.");
1396 }
1397
1398 assert(!isa<VPWidenStoreRecipe>(&R) && "stores cannot be narrowed");
1399 if (isa<VPWidenLoadRecipe>(&R))
1400 continue;
1401
1402 // Shrink operands by introducing truncates as needed.
1403 unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0;
1404 for (unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
1405 auto *Op = R.getOperand(Idx);
1406 unsigned OpSizeInBits =
1408 if (OpSizeInBits == NewResSizeInBits)
1409 continue;
1410 assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate");
1411 auto [ProcessedIter, IterIsEmpty] =
1412 ProcessedTruncs.insert({Op, nullptr});
1413 VPWidenCastRecipe *NewOp =
1414 IterIsEmpty
1415 ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy)
1416 : ProcessedIter->second;
1417 R.setOperand(Idx, NewOp);
1418 if (!IterIsEmpty)
1419 continue;
1420 ProcessedIter->second = NewOp;
1421 if (!Op->isLiveIn()) {
1422 NewOp->insertBefore(&R);
1423 } else {
1424 PH->appendRecipe(NewOp);
1425#ifndef NDEBUG
1426 auto *OpInst = dyn_cast<Instruction>(Op->getLiveInIRValue());
1427 bool IsContained = MinBWs.contains(OpInst);
1428 NumProcessedRecipes += IsContained;
1429#endif
1430 }
1431 }
1432
1433 }
1434 }
1435
1436 assert(MinBWs.size() == NumProcessedRecipes &&
1437 "some entries in MinBWs haven't been processed");
1438}
1439
1443
1450
1453 runPass(licm, Plan);
1454}
1455
1456// Add a VPActiveLaneMaskPHIRecipe and related recipes to \p Plan and replace
1457// the loop terminator with a branch-on-cond recipe with the negated
1458// active-lane-mask as operand. Note that this turns the loop into an
1459// uncountable one. Only the existing terminator is replaced, all other existing
1460// recipes/users remain unchanged, except for poison-generating flags being
1461// dropped from the canonical IV increment. Return the created
1462// VPActiveLaneMaskPHIRecipe.
1463//
1464// The function uses the following definitions:
1465//
1466// %TripCount = DataWithControlFlowWithoutRuntimeCheck ?
1467// calculate-trip-count-minus-VF (original TC) : original TC
1468// %IncrementValue = DataWithControlFlowWithoutRuntimeCheck ?
1469// CanonicalIVPhi : CanonicalIVIncrement
1470// %StartV is the canonical induction start value.
1471//
1472// The function adds the following recipes:
1473//
1474// vector.ph:
1475// %TripCount = calculate-trip-count-minus-VF (original TC)
1476// [if DataWithControlFlowWithoutRuntimeCheck]
1477// %EntryInc = canonical-iv-increment-for-part %StartV
1478// %EntryALM = active-lane-mask %EntryInc, %TripCount
1479//
1480// vector.body:
1481// ...
1482// %P = active-lane-mask-phi [ %EntryALM, %vector.ph ], [ %ALM, %vector.body ]
1483// ...
1484// %InLoopInc = canonical-iv-increment-for-part %IncrementValue
1485// %ALM = active-lane-mask %InLoopInc, TripCount
1486// %Negated = Not %ALM
1487// branch-on-cond %Negated
1488//
1491 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
1492 VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
1493 auto *CanonicalIVPHI = Plan.getCanonicalIV();
1494 VPValue *StartV = CanonicalIVPHI->getStartValue();
1495
1496 auto *CanonicalIVIncrement =
1497 cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
1498 // TODO: Check if dropping the flags is needed if
1499 // !DataAndControlFlowWithoutRuntimeCheck.
1500 CanonicalIVIncrement->dropPoisonGeneratingFlags();
1501 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
1502 // We can't use StartV directly in the ActiveLaneMask VPInstruction, since
1503 // we have to take unrolling into account. Each part needs to start at
1504 // Part * VF
1505 auto *VecPreheader = Plan.getVectorPreheader();
1506 VPBuilder Builder(VecPreheader);
1507
1508 // Create the ActiveLaneMask instruction using the correct start values.
1509 VPValue *TC = Plan.getTripCount();
1510
1511 VPValue *TripCount, *IncrementValue;
1513 // When the loop is guarded by a runtime overflow check for the loop
1514 // induction variable increment by VF, we can increment the value before
1515 // the get.active.lane mask and use the unmodified tripcount.
1516 IncrementValue = CanonicalIVIncrement;
1517 TripCount = TC;
1518 } else {
1519 // When avoiding a runtime check, the active.lane.mask inside the loop
1520 // uses a modified trip count and the induction variable increment is
1521 // done after the active.lane.mask intrinsic is called.
1522 IncrementValue = CanonicalIVPHI;
1524 {TC}, DL);
1525 }
1526 auto *EntryIncrement = Builder.createOverflowingOp(
1527 VPInstruction::CanonicalIVIncrementForPart, {StartV}, {false, false}, DL,
1528 "index.part.next");
1529
1530 // Create the active lane mask instruction in the VPlan preheader.
1531 auto *EntryALM =
1532 Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC},
1533 DL, "active.lane.mask.entry");
1534
1535 // Now create the ActiveLaneMaskPhi recipe in the main loop using the
1536 // preheader ActiveLaneMask instruction.
1537 auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc());
1538 LaneMaskPhi->insertAfter(CanonicalIVPHI);
1539
1540 // Create the active lane mask for the next iteration of the loop before the
1541 // original terminator.
1542 VPRecipeBase *OriginalTerminator = EB->getTerminator();
1543 Builder.setInsertPoint(OriginalTerminator);
1544 auto *InLoopIncrement =
1546 {IncrementValue}, {false, false}, DL);
1547 auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
1548 {InLoopIncrement, TripCount}, DL,
1549 "active.lane.mask.next");
1550 LaneMaskPhi->addOperand(ALM);
1551
1552 // Replace the original terminator with BranchOnCond. We have to invert the
1553 // mask here because a true condition means jumping to the exit block.
1554 auto *NotMask = Builder.createNot(ALM, DL);
1555 Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL);
1556 OriginalTerminator->eraseFromParent();
1557 return LaneMaskPhi;
1558}
1559
1560/// Collect all VPValues representing a header mask through the (ICMP_ULE,
1561/// WideCanonicalIV, backedge-taken-count) pattern.
1562/// TODO: Introduce explicit recipe for header-mask instead of searching
1563/// for the header-mask pattern manually.
1565 SmallVector<VPValue *> WideCanonicalIVs;
1566 auto *FoundWidenCanonicalIVUser =
1567 find_if(Plan.getCanonicalIV()->users(),
1568 [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
1570 [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }) <=
1571 1 &&
1572 "Must have at most one VPWideCanonicalIVRecipe");
1573 if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) {
1574 auto *WideCanonicalIV =
1575 cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
1576 WideCanonicalIVs.push_back(WideCanonicalIV);
1577 }
1578
1579 // Also include VPWidenIntOrFpInductionRecipes that represent a widened
1580 // version of the canonical induction.
1581 VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
1582 for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
1583 auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
1584 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
1585 WideCanonicalIVs.push_back(WidenOriginalIV);
1586 }
1587
1588 // Walk users of wide canonical IVs and collect to all compares of the form
1589 // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
1590 SmallVector<VPValue *> HeaderMasks;
1591 for (auto *Wide : WideCanonicalIVs) {
1592 for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
1593 auto *HeaderMask = dyn_cast<VPInstruction>(U);
1594 if (!HeaderMask || !vputils::isHeaderMask(HeaderMask, Plan))
1595 continue;
1596
1597 assert(HeaderMask->getOperand(0) == Wide &&
1598 "WidenCanonicalIV must be the first operand of the compare");
1599 HeaderMasks.push_back(HeaderMask);
1600 }
1601 }
1602 return HeaderMasks;
1603}
1604
1606 VPlan &Plan, bool UseActiveLaneMaskForControlFlow,
1609 UseActiveLaneMaskForControlFlow) &&
1610 "DataAndControlFlowWithoutRuntimeCheck implies "
1611 "UseActiveLaneMaskForControlFlow");
1612
1613 auto *FoundWidenCanonicalIVUser =
1614 find_if(Plan.getCanonicalIV()->users(),
1615 [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
1616 assert(FoundWidenCanonicalIVUser &&
1617 "Must have widened canonical IV when tail folding!");
1618 auto *WideCanonicalIV =
1619 cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
1620 VPSingleDefRecipe *LaneMask;
1621 if (UseActiveLaneMaskForControlFlow) {
1624 } else {
1625 VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
1626 LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask,
1627 {WideCanonicalIV, Plan.getTripCount()}, nullptr,
1628 "active.lane.mask");
1629 }
1630
1631 // Walk users of WideCanonicalIV and replace all compares of the form
1632 // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
1633 // active-lane-mask.
1634 for (VPValue *HeaderMask : collectAllHeaderMasks(Plan))
1635 HeaderMask->replaceAllUsesWith(LaneMask);
1636}
1637
1638/// Try to convert \p CurRecipe to a corresponding EVL-based recipe. Returns
1639/// nullptr if no EVL-based recipe could be created.
1640/// \p HeaderMask Header Mask.
1641/// \p CurRecipe Recipe to be transform.
1642/// \p TypeInfo VPlan-based type analysis.
1643/// \p AllOneMask The vector mask parameter of vector-predication intrinsics.
1644/// \p EVL The explicit vector length parameter of vector-predication
1645/// intrinsics.
1647 VPRecipeBase &CurRecipe,
1648 VPTypeAnalysis &TypeInfo,
1649 VPValue &AllOneMask, VPValue &EVL) {
1650 using namespace llvm::VPlanPatternMatch;
1651 auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
1652 assert(OrigMask && "Unmasked recipe when folding tail");
1653 return HeaderMask == OrigMask ? nullptr : OrigMask;
1654 };
1655
1658 VPValue *NewMask = GetNewMask(L->getMask());
1659 return new VPWidenLoadEVLRecipe(*L, EVL, NewMask);
1660 })
1661 .Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
1662 VPValue *NewMask = GetNewMask(S->getMask());
1663 return new VPWidenStoreEVLRecipe(*S, EVL, NewMask);
1664 })
1665 .Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
1666 unsigned Opcode = W->getOpcode();
1667 if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1668 return nullptr;
1669 return new VPWidenEVLRecipe(*W, EVL);
1670 })
1671 .Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
1672 VPValue *NewMask = GetNewMask(Red->getCondOp());
1673 return new VPReductionEVLRecipe(*Red, EVL, NewMask);
1674 })
1675 .Case<VPWidenIntrinsicRecipe, VPWidenCastRecipe>(
1676 [&](auto *CR) -> VPRecipeBase * {
1677 Intrinsic::ID VPID;
1678 if (auto *CallR = dyn_cast<VPWidenIntrinsicRecipe>(CR)) {
1679 VPID =
1680 VPIntrinsic::getForIntrinsic(CallR->getVectorIntrinsicID());
1681 } else {
1682 auto *CastR = cast<VPWidenCastRecipe>(CR);
1683 VPID = VPIntrinsic::getForOpcode(CastR->getOpcode());
1684 }
1685
1686 // Not all intrinsics have a corresponding VP intrinsic.
1687 if (VPID == Intrinsic::not_intrinsic)
1688 return nullptr;
1691 "Expected VP intrinsic to have mask and EVL");
1692
1693 SmallVector<VPValue *> Ops(CR->operands());
1694 Ops.push_back(&AllOneMask);
1695 Ops.push_back(&EVL);
1696 return new VPWidenIntrinsicRecipe(
1697 VPID, Ops, TypeInfo.inferScalarType(CR), CR->getDebugLoc());
1698 })
1699 .Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
1700 SmallVector<VPValue *> Ops(Sel->operands());
1701 Ops.push_back(&EVL);
1702 return new VPWidenIntrinsicRecipe(Intrinsic::vp_select, Ops,
1703 TypeInfo.inferScalarType(Sel),
1704 Sel->getDebugLoc());
1705 })
1706 .Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
1707 VPValue *LHS, *RHS;
1708 // Transform select with a header mask condition
1709 // select(header_mask, LHS, RHS)
1710 // into vector predication merge.
1711 // vp.merge(all-true, LHS, RHS, EVL)
1712 if (!match(VPI, m_Select(m_Specific(HeaderMask), m_VPValue(LHS),
1713 m_VPValue(RHS))))
1714 return nullptr;
1715 // Use all true as the condition because this transformation is
1716 // limited to selects whose condition is a header mask.
1717 return new VPWidenIntrinsicRecipe(
1718 Intrinsic::vp_merge, {&AllOneMask, LHS, RHS, &EVL},
1719 TypeInfo.inferScalarType(LHS), VPI->getDebugLoc());
1720 })
1721 .Default([&](VPRecipeBase *R) { return nullptr; });
1722}
1723
1724/// Replace recipes with their EVL variants.
1726 Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
1727 VPTypeAnalysis TypeInfo(CanonicalIVType);
1728 LLVMContext &Ctx = CanonicalIVType->getContext();
1729 VPValue *AllOneMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
1730
1731 for (VPUser *U : to_vector(Plan.getVF().users())) {
1732 if (auto *R = dyn_cast<VPReverseVectorPointerRecipe>(U))
1733 R->setOperand(1, &EVL);
1734 }
1735
1737
1738 for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
1739 for (VPUser *U : collectUsersRecursively(HeaderMask)) {
1740 auto *CurRecipe = cast<VPRecipeBase>(U);
1741 VPRecipeBase *EVLRecipe =
1742 createEVLRecipe(HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL);
1743 if (!EVLRecipe)
1744 continue;
1745
1746 [[maybe_unused]] unsigned NumDefVal = EVLRecipe->getNumDefinedValues();
1747 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
1748 "New recipe must define the same number of values as the "
1749 "original.");
1750 assert(
1751 NumDefVal <= 1 &&
1752 "Only supports recipes with a single definition or without users.");
1753 EVLRecipe->insertBefore(CurRecipe);
1754 if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(EVLRecipe)) {
1755 VPValue *CurVPV = CurRecipe->getVPSingleValue();
1756 CurVPV->replaceAllUsesWith(EVLRecipe->getVPSingleValue());
1757 }
1758 // Defer erasing recipes till the end so that we don't invalidate the
1759 // VPTypeAnalysis cache.
1760 ToErase.push_back(CurRecipe);
1761 }
1762 }
1763
1764 for (VPRecipeBase *R : reverse(ToErase)) {
1765 SmallVector<VPValue *> PossiblyDead(R->operands());
1766 R->eraseFromParent();
1767 for (VPValue *Op : PossiblyDead)
1769 }
1770}
1771
1772/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
1773/// replaces all uses except the canonical IV increment of
1774/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
1775/// is used only for loop iterations counting after this transformation.
1776///
1777/// The function uses the following definitions:
1778/// %StartV is the canonical induction start value.
1779///
1780/// The function adds the following recipes:
1781///
1782/// vector.ph:
1783/// ...
1784///
1785/// vector.body:
1786/// ...
1787/// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ],
1788/// [ %NextEVLIV, %vector.body ]
1789/// %AVL = sub original TC, %EVLPhi
1790/// %VPEVL = EXPLICIT-VECTOR-LENGTH %AVL
1791/// ...
1792/// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi
1793/// ...
1794///
1795/// If MaxSafeElements is provided, the function adds the following recipes:
1796/// vector.ph:
1797/// ...
1798///
1799/// vector.body:
1800/// ...
1801/// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ],
1802/// [ %NextEVLIV, %vector.body ]
1803/// %AVL = sub original TC, %EVLPhi
1804/// %cmp = cmp ult %AVL, MaxSafeElements
1805/// %SAFE_AVL = select %cmp, %AVL, MaxSafeElements
1806/// %VPEVL = EXPLICIT-VECTOR-LENGTH %SAFE_AVL
1807/// ...
1808/// %NextEVLIV = add IVSize (cast i32 %VPEVL to IVSize), %EVLPhi
1809/// ...
1810///
1812 VPlan &Plan, const std::optional<unsigned> &MaxSafeElements) {
1814 // The transform updates all users of inductions to work based on EVL, instead
1815 // of the VF directly. At the moment, widened inductions cannot be updated, so
1816 // bail out if the plan contains any.
1817 bool ContainsWidenInductions = any_of(
1818 Header->phis(),
1819 IsaPred<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>);
1820 if (ContainsWidenInductions)
1821 return false;
1822
1823 auto *CanonicalIVPHI = Plan.getCanonicalIV();
1824 VPValue *StartV = CanonicalIVPHI->getStartValue();
1825
1826 // Create the ExplicitVectorLengthPhi recipe in the main loop.
1827 auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());
1828 EVLPhi->insertAfter(CanonicalIVPHI);
1829 VPBuilder Builder(Header, Header->getFirstNonPhi());
1830 // Compute original TC - IV as the AVL (application vector length).
1831 VPValue *AVL = Builder.createNaryOp(
1832 Instruction::Sub, {Plan.getTripCount(), EVLPhi}, DebugLoc(), "avl");
1833 if (MaxSafeElements) {
1834 // Support for MaxSafeDist for correct loop emission.
1835 VPValue *AVLSafe = Plan.getOrAddLiveIn(
1836 ConstantInt::get(CanonicalIVPHI->getScalarType(), *MaxSafeElements));
1837 VPValue *Cmp = Builder.createICmp(ICmpInst::ICMP_ULT, AVL, AVLSafe);
1838 AVL = Builder.createSelect(Cmp, AVL, AVLSafe, DebugLoc(), "safe_avl");
1839 }
1840 auto *VPEVL = Builder.createNaryOp(VPInstruction::ExplicitVectorLength, AVL,
1841 DebugLoc());
1842
1843 auto *CanonicalIVIncrement =
1844 cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
1845 VPSingleDefRecipe *OpVPEVL = VPEVL;
1846 if (unsigned IVSize = CanonicalIVPHI->getScalarType()->getScalarSizeInBits();
1847 IVSize != 32) {
1848 OpVPEVL = new VPScalarCastRecipe(
1849 IVSize < 32 ? Instruction::Trunc : Instruction::ZExt, OpVPEVL,
1850 CanonicalIVPHI->getScalarType(), CanonicalIVIncrement->getDebugLoc());
1851 OpVPEVL->insertBefore(CanonicalIVIncrement);
1852 }
1853 auto *NextEVLIV =
1854 new VPInstruction(Instruction::Add, {OpVPEVL, EVLPhi},
1855 {CanonicalIVIncrement->hasNoUnsignedWrap(),
1856 CanonicalIVIncrement->hasNoSignedWrap()},
1857 CanonicalIVIncrement->getDebugLoc(), "index.evl.next");
1858 NextEVLIV->insertBefore(CanonicalIVIncrement);
1859 EVLPhi->addOperand(NextEVLIV);
1860
1861 transformRecipestoEVLRecipes(Plan, *VPEVL);
1862
1863 // Replace all uses of VPCanonicalIVPHIRecipe by
1864 // VPEVLBasedIVPHIRecipe except for the canonical IV increment.
1865 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
1866 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
1867 // TODO: support unroll factor > 1.
1868 Plan.setUF(1);
1869 return true;
1870}
1871
1873 VPlan &Plan,
1874 const std::function<bool(BasicBlock *)> &BlockNeedsPredication) {
1875 // Collect recipes in the backward slice of `Root` that may generate a poison
1876 // value that is used after vectorization.
1878 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) {
1880 Worklist.push_back(Root);
1881
1882 // Traverse the backward slice of Root through its use-def chain.
1883 while (!Worklist.empty()) {
1884 VPRecipeBase *CurRec = Worklist.pop_back_val();
1885
1886 if (!Visited.insert(CurRec).second)
1887 continue;
1888
1889 // Prune search if we find another recipe generating a widen memory
1890 // instruction. Widen memory instructions involved in address computation
1891 // will lead to gather/scatter instructions, which don't need to be
1892 // handled.
1893 if (isa<VPWidenMemoryRecipe, VPInterleaveRecipe, VPScalarIVStepsRecipe,
1894 VPHeaderPHIRecipe>(CurRec))
1895 continue;
1896
1897 // This recipe contributes to the address computation of a widen
1898 // load/store. If the underlying instruction has poison-generating flags,
1899 // drop them directly.
1900 if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
1901 VPValue *A, *B;
1902 using namespace llvm::VPlanPatternMatch;
1903 // Dropping disjoint from an OR may yield incorrect results, as some
1904 // analysis may have converted it to an Add implicitly (e.g. SCEV used
1905 // for dependence analysis). Instead, replace it with an equivalent Add.
1906 // This is possible as all users of the disjoint OR only access lanes
1907 // where the operands are disjoint or poison otherwise.
1908 if (match(RecWithFlags, m_BinaryOr(m_VPValue(A), m_VPValue(B))) &&
1909 RecWithFlags->isDisjoint()) {
1910 VPBuilder Builder(RecWithFlags);
1911 VPInstruction *New = Builder.createOverflowingOp(
1912 Instruction::Add, {A, B}, {false, false},
1913 RecWithFlags->getDebugLoc());
1914 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
1915 RecWithFlags->replaceAllUsesWith(New);
1916 RecWithFlags->eraseFromParent();
1917 CurRec = New;
1918 } else
1919 RecWithFlags->dropPoisonGeneratingFlags();
1920 } else {
1921 Instruction *Instr = dyn_cast_or_null<Instruction>(
1922 CurRec->getVPSingleValue()->getUnderlyingValue());
1923 (void)Instr;
1924 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
1925 "found instruction with poison generating flags not covered by "
1926 "VPRecipeWithIRFlags");
1927 }
1928
1929 // Add new definitions to the worklist.
1930 for (VPValue *Operand : CurRec->operands())
1931 if (VPRecipeBase *OpDef = Operand->getDefiningRecipe())
1932 Worklist.push_back(OpDef);
1933 }
1934 });
1935
1936 // Traverse all the recipes in the VPlan and collect the poison-generating
1937 // recipes in the backward slice starting at the address of a VPWidenRecipe or
1938 // VPInterleaveRecipe.
1939 auto Iter = vp_depth_first_deep(Plan.getEntry());
1940 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
1941 for (VPRecipeBase &Recipe : *VPBB) {
1942 if (auto *WidenRec = dyn_cast<VPWidenMemoryRecipe>(&Recipe)) {
1943 Instruction &UnderlyingInstr = WidenRec->getIngredient();
1944 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
1945 if (AddrDef && WidenRec->isConsecutive() &&
1946 BlockNeedsPredication(UnderlyingInstr.getParent()))
1947 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
1948 } else if (auto *InterleaveRec = dyn_cast<VPInterleaveRecipe>(&Recipe)) {
1949 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
1950 if (AddrDef) {
1951 // Check if any member of the interleave group needs predication.
1952 const InterleaveGroup<Instruction> *InterGroup =
1953 InterleaveRec->getInterleaveGroup();
1954 bool NeedPredication = false;
1955 for (int I = 0, NumMembers = InterGroup->getNumMembers();
1956 I < NumMembers; ++I) {
1957 Instruction *Member = InterGroup->getMember(I);
1958 if (Member)
1959 NeedPredication |= BlockNeedsPredication(Member->getParent());
1960 }
1961
1962 if (NeedPredication)
1963 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
1964 }
1965 }
1966 }
1967 }
1968}
1969
1971 VPlan &Plan,
1973 &InterleaveGroups,
1974 VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed) {
1975 if (InterleaveGroups.empty())
1976 return;
1977
1978 // Interleave memory: for each Interleave Group we marked earlier as relevant
1979 // for this VPlan, replace the Recipes widening its memory instructions with a
1980 // single VPInterleaveRecipe at its insertion point.
1981 VPDominatorTree VPDT;
1982 VPDT.recalculate(Plan);
1983 for (const auto *IG : InterleaveGroups) {
1984 SmallVector<VPValue *, 4> StoredValues;
1985 for (unsigned i = 0; i < IG->getFactor(); ++i)
1986 if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {
1987 auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe(SI));
1988 StoredValues.push_back(StoreR->getStoredValue());
1989 }
1990
1991 bool NeedsMaskForGaps =
1992 IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed;
1993
1994 Instruction *IRInsertPos = IG->getInsertPos();
1995 auto *InsertPos =
1996 cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IRInsertPos));
1997
1998 // Get or create the start address for the interleave group.
1999 auto *Start =
2000 cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getMember(0)));
2001 VPValue *Addr = Start->getAddr();
2002 VPRecipeBase *AddrDef = Addr->getDefiningRecipe();
2003 if (AddrDef && !VPDT.properlyDominates(AddrDef, InsertPos)) {
2004 // TODO: Hoist Addr's defining recipe (and any operands as needed) to
2005 // InsertPos or sink loads above zero members to join it.
2006 bool InBounds = false;
2007 if (auto *Gep = dyn_cast<GetElementPtrInst>(
2008 getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts()))
2009 InBounds = Gep->isInBounds();
2010
2011 // We cannot re-use the address of member zero because it does not
2012 // dominate the insert position. Instead, use the address of the insert
2013 // position and create a PtrAdd adjusting it to the address of member
2014 // zero.
2015 assert(IG->getIndex(IRInsertPos) != 0 &&
2016 "index of insert position shouldn't be zero");
2017 auto &DL = IRInsertPos->getDataLayout();
2018 APInt Offset(32,
2019 DL.getTypeAllocSize(getLoadStoreType(IRInsertPos)) *
2020 IG->getIndex(IRInsertPos),
2021 /*IsSigned=*/true);
2022 VPValue *OffsetVPV = Plan.getOrAddLiveIn(
2023 ConstantInt::get(IRInsertPos->getParent()->getContext(), -Offset));
2024 VPBuilder B(InsertPos);
2025 Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV)
2026 : B.createPtrAdd(InsertPos->getAddr(), OffsetVPV);
2027 }
2028 auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues,
2029 InsertPos->getMask(), NeedsMaskForGaps);
2030 VPIG->insertBefore(InsertPos);
2031
2032 unsigned J = 0;
2033 for (unsigned i = 0; i < IG->getFactor(); ++i)
2034 if (Instruction *Member = IG->getMember(i)) {
2035 VPRecipeBase *MemberR = RecipeBuilder.getRecipe(Member);
2036 if (!Member->getType()->isVoidTy()) {
2037 VPValue *OriginalV = MemberR->getVPSingleValue();
2038 OriginalV->replaceAllUsesWith(VPIG->getVPValue(J));
2039 J++;
2040 }
2041 MemberR->eraseFromParent();
2042 }
2043 }
2044}
2045
2047 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2048 vp_depth_first_deep(Plan.getEntry()))) {
2049 for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) {
2050 if (!isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(&R))
2051 continue;
2052 auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
2053 StringRef Name =
2054 isa<VPCanonicalIVPHIRecipe>(PhiR) ? "index" : "evl.based.iv";
2055 auto *ScalarR =
2056 new VPScalarPHIRecipe(PhiR->getStartValue(), PhiR->getBackedgeValue(),
2057 PhiR->getDebugLoc(), Name);
2058 ScalarR->insertBefore(PhiR);
2059 PhiR->replaceAllUsesWith(ScalarR);
2060 PhiR->eraseFromParent();
2061 }
2062 }
2063}
2064
2066 VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,
2067 BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) {
2068 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
2069 auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
2070 VPBuilder Builder(LatchVPBB->getTerminator());
2071 auto *MiddleVPBB = Plan.getMiddleBlock();
2072 VPValue *IsEarlyExitTaken = nullptr;
2073
2074 // Process the uncountable exiting block. Update IsEarlyExitTaken, which
2075 // tracks if the uncountable early exit has been taken. Also split the middle
2076 // block and have it conditionally branch to the early exit block if
2077 // EarlyExitTaken.
2078 auto *EarlyExitingBranch =
2079 cast<BranchInst>(UncountableExitingBlock->getTerminator());
2080 BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
2081 BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
2082
2083 // The early exit block may or may not be the same as the "countable" exit
2084 // block. Creates a new VPIRBB for the early exit block in case it is distinct
2085 // from the countable exit block.
2086 // TODO: Introduce both exit blocks during VPlan skeleton construction.
2087 VPIRBasicBlock *VPEarlyExitBlock;
2088 if (OrigLoop->getUniqueExitBlock()) {
2089 VPEarlyExitBlock = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0]);
2090 } else {
2091 VPEarlyExitBlock = Plan.createVPIRBasicBlock(
2092 !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
2093 }
2094
2095 VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
2096 OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
2097 auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
2098 IsEarlyExitTaken =
2099 Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
2100
2101 VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
2102 VPBasicBlock *VectorEarlyExitVPBB =
2103 Plan.createVPBasicBlock("vector.early.exit");
2104 VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
2105 VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
2106 NewMiddle->swapSuccessors();
2107
2108 VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
2109
2110 // Update the exit phis in the early exit block.
2111 VPBuilder MiddleBuilder(NewMiddle);
2112 VPBuilder EarlyExitB(VectorEarlyExitVPBB);
2113 for (VPRecipeBase &R : *VPEarlyExitBlock) {
2114 auto *ExitIRI = cast<VPIRInstruction>(&R);
2115 auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
2116 if (!ExitPhi)
2117 break;
2118
2119 VPValue *IncomingFromEarlyExit = RecipeBuilder.getVPValueOrAddLiveIn(
2120 ExitPhi->getIncomingValueForBlock(UncountableExitingBlock));
2121
2122 if (OrigLoop->getUniqueExitBlock()) {
2123 // If there's a unique exit block, VPEarlyExitBlock has 2 predecessors
2124 // (MiddleVPBB and NewMiddle). Add the incoming value from MiddleVPBB
2125 // which is coming from the original latch.
2126 VPValue *IncomingFromLatch = RecipeBuilder.getVPValueOrAddLiveIn(
2127 ExitPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
2128 ExitIRI->addOperand(IncomingFromLatch);
2129 ExitIRI->extractLastLaneOfOperand(MiddleBuilder);
2130 }
2131 // Add the incoming value from the early exit.
2132 if (!IncomingFromEarlyExit->isLiveIn())
2133 IncomingFromEarlyExit =
2135 {IncomingFromEarlyExit, EarlyExitTakenCond});
2136 ExitIRI->addOperand(IncomingFromEarlyExit);
2137 }
2138 MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
2139
2140 // Replace the condition controlling the non-early exit from the vector loop
2141 // with one exiting if either the original condition of the vector latch is
2142 // true or the early exit has been taken.
2143 auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
2144 assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
2145 "Unexpected terminator");
2146 auto *IsLatchExitTaken =
2147 Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
2148 LatchExitingBranch->getOperand(1));
2149 auto *AnyExitTaken = Builder.createNaryOp(
2150 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
2151 Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
2152 LatchExitingBranch->eraseFromParent();
2153}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
iv Induction Variable Users
Definition: IVUsers.cpp:48
licm
Definition: LICM.cpp:378
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
if(PassOpts->AAPipeline)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static bool sinkScalarOperands(VPlan &Plan)
static void removeRedundantInductionCasts(VPlan &Plan)
Remove redundant casts of inductions.
static VPScalarIVStepsRecipe * createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, Instruction *TruncI, VPValue *StartV, VPValue *Step, DebugLoc DL, VPBuilder &Builder)
static bool sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)
Sink users of FOR after the recipe defining the previous value Previous of the recurrence.
static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan)
static VPActiveLaneMaskPHIRecipe * addVPLaneMaskPhiAndUpdateExitBranch(VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck)
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL)
Replace recipes with their EVL variants.
static bool isDeadRecipe(VPRecipeBase &R)
Returns true if R is dead and can be removed.
static void legalizeAndOptimizeInductions(VPlan &Plan)
Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd (IndStart, ScalarIVSteps (0,...
static void addReplicateRegions(VPlan &Plan)
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo)
Try to simplify recipe R.
static void removeRedundantExpandSCEVRecipes(VPlan &Plan)
Remove redundant EpxandSCEVRecipes in Plan's entry block by replacing them with already existing reci...
static SmallVector< VPValue * > collectAllHeaderMasks(VPlan &Plan)
Collect all VPValues representing a header mask through the (ICMP_ULE, WideCanonicalIV,...
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)
Try to hoist Previous and its operands before all users of FOR.
static VPWidenInductionRecipe * getOptimizableIVOf(VPValue *VPV)
Check if VPV is an untruncated wide induction, either before or after the increment.
static SmallVector< VPUser * > collectUsersRecursively(VPValue *V)
static void recursivelyDeleteDeadRecipes(VPValue *V)
static VPRegionBlock * createReplicateRegion(VPReplicateRecipe *PredRecipe, VPlan &Plan)
static VPBasicBlock * getPredicatedThenBlock(VPRegionBlock *R)
If R is a triangle region, return the 'then' block of the triangle.
static VPRecipeBase * createEVLRecipe(VPValue *HeaderMask, VPRecipeBase &CurRecipe, VPTypeAnalysis &TypeInfo, VPValue &AllOneMask, VPValue &EVL)
Try to convert CurRecipe to a corresponding EVL-based recipe.
VPValue * getPredicatedMask(VPRegionBlock *R)
If R is a region with a VPBranchOnMaskRecipe in the entry block, return the mask.
static void removeRedundantCanonicalIVs(VPlan &Plan)
Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV recipe, if it exists.
This file provides utility VPlan to VPlan transformations.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:78
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:220
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:240
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:699
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:205
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
bool isBinaryOp() const
Definition: Instruction.h:315
bool isUnaryOp() const
Definition: Instruction.h:314
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:488
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:558
uint32_t getNumMembers() const
Definition: VectorUtils.h:506
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
bool contains(const KeyT &Key) const
Definition: MapVector.h:163
ValueT lookup(const KeyT &Key) const
Definition: MapVector.h:110
size_type size() const
Definition: MapVector.h:60
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
RecurKind getRecurrenceKind() const
This class represents an analyzed expression in the program.
bool isZero() const
Return true if the expression is a constant zero.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getElementCount(Type *Ty, ElementCount EC)
bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LLVMContext & getContext() const
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
iterator begin() const
Definition: SmallPtrSet.h:472
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
Definition: TypeSwitch.h:87
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
Definition: TypeSwitch.h:96
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
op_range operands()
Definition: User.h:288
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:2960
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3202
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:3277
iterator end()
Definition: VPlan.h:3239
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3290
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:210
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:575
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:545
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:611
const VPRecipeBase & back() const
Definition: VPlan.h:3251
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:2160
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:78
VPRegionBlock * getParent()
Definition: VPlan.h:170
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:180
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition: VPlan.h:309
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:212
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:160
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:258
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:206
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:195
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition: VPlanUtils.h:212
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlanUtils.h:123
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlanUtils.h:142
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlanUtils.h:161
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2520
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL={}, const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL={}, const Twine &Name="")
VPScalarCastRecipe * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL)
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step)
VPInstruction * createOverflowingOp(unsigned Opcode, std::initializer_list< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
VPValue * createNot(VPValue *Operand, DebugLoc DL={}, const Twine &Name="")
VPValue * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL={}, const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:2899
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2930
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:421
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:416
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:394
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
Returns true if A properly dominates B.
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:2995
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3344
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:845
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:851
@ CanonicalIVIncrementForPart
Definition: VPlan.h:866
@ CalculateTripCountMinusVF
Definition: VPlan.h:864
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2227
static std::optional< unsigned > getMaskParamPos(Intrinsic::ID IntrinsicID)
static std::optional< unsigned > getVectorLengthParamPos(Intrinsic::ID IntrinsicID)
static Intrinsic::ID getForOpcode(unsigned OC)
The llvm.vp.* intrinsics for this instruction Opcode.
static Intrinsic::ID getForIntrinsic(Intrinsic::ID Id)
The llvm.vp.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2575
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:366
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:455
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
VPBasicBlock * getParent()
Definition: VPlan.h:391
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:460
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPValue * getBlockInMask(BasicBlock *BB) const
Returns the entry mask for the block BB.
VPValue * getVPValueOrAddLiveIn(Value *V)
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
Definition: VPlan.h:2402
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2322
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3379
const VPBlockBase * getEntry() const
Definition: VPlan.h:3415
const VPBlockBase * getExiting() const
Definition: VPlan.h:3427
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2443
bool isUniform() const
Definition: VPlan.h:2487
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2511
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1243
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:3145
Recipe to generate a scalar PHI.
Definition: VPlan.h:1925
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:493
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:563
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:40
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:206
operand_range operands()
Definition: VPlanValue.h:263
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:248
operand_iterator op_end()
Definition: VPlanValue.h:261
operand_iterator op_begin()
Definition: VPlanValue.h:259
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:237
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:125
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:89
void replaceAllUsesWith(VPValue *New)
Definition: VPlan.cpp:1438
unsigned getNumUsers() const
Definition: VPlanValue.h:117
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:178
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:173
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition: VPlan.cpp:1442
user_range users()
Definition: VPlanValue.h:138
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:3040
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1191
A recipe for widening operations with vector-predication intrinsics with explicit vector length (EVL)...
Definition: VPlan.h:1144
A recipe for handling GEP instructions.
Definition: VPlan.h:1517
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
Definition: VPlan.h:1751
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:1804
A recipe for widening vector intrinsics.
Definition: VPlan.h:1291
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2677
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition: VPlan.h:1093
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3478
bool hasVF(ElementCount VF) const
Definition: VPlan.h:3672
VPBasicBlock * getEntry()
Definition: VPlan.h:3591
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
Definition: VPlan.h:3777
bool hasScalableVF() const
Definition: VPlan.h:3673
VPValue & getVF()
Returns the VF of the vector loop region.
Definition: VPlan.h:3659
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3635
bool hasUF(unsigned UF) const
Definition: VPlan.h:3690
void setVF(ElementCount VF)
Definition: VPlan.h:3666
auto getExitBlocks()
Return an iterator range over the VPIRBasicBlock wrapping the exit blocks of the VPlan,...
Definition: VPlanCFG.h:310
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.cpp:1070
const VPBasicBlock * getMiddleBlock() const
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition: VPlan.h:3610
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition: VPlan.h:3767
VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition: VPlan.cpp:1270
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:3710
bool hasScalarVFOnly() const
Definition: VPlan.h:3683
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:3742
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition: VPlan.h:3596
void setUF(unsigned UF)
Definition: VPlan.h:3697
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
IteratorT end() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:982
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlanUtils.h:41
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlanUtils.cpp:26
const SCEV * getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE)
Return the SCEV expression for V.
Definition: VPlanUtils.cpp:72
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:16
bool isHeaderMask(const VPValue *V, VPlan &Plan)
Return true if V is a header mask in Plan.
Definition: VPlanUtils.cpp:50
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition: VPlanCFG.h:215
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
Definition: VPlanCFG.h:227
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:74
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition: Casting.h:548
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33
@ Mul
Product of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1945
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ Default
The result values are uniform if and only if all operands are uniform.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:2015
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:2735
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:2696
A recipe for widening select instructions.
Definition: VPlan.h:1480
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:2815
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:2774
static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop, BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder)
Update Plan to account for the uncountable early exit block in UncountableExitingBlock by.
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy)
Perform instcombine-like simplifications on recipes in Plan.
static void optimizeInductionExitUsers(VPlan &Plan, DenseMap< VPValue *, VPValue * > &EndValues)
If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them p...
static void dropPoisonGeneratingRecipes(VPlan &Plan, const std::function< bool(BasicBlock *)> &BlockNeedsPredication)
Drop poison flags from recipes that may generate a poison value that is used after vectorization,...
static void createAndOptimizeReplicateRegions(VPlan &Plan)
Wrap predicated VPReplicateRecipes with a mask operand in an if-then region block and remove the mask...
static void createInterleaveGroups(VPlan &Plan, const SmallPtrSetImpl< const InterleaveGroup< Instruction > * > &InterleaveGroups, VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed)
static bool runPass(bool(*Transform)(VPlan &, ArgsTy...), VPlan &Plan, typename std::remove_reference< ArgsTy >::type &...Args)
Helper to run a VPlan transform Transform on VPlan, forwarding extra arguments to the transform.
static void convertToConcreteRecipes(VPlan &Plan)
Lower abstract recipes to concrete ones, that can be codegen'd.
static void removeDeadRecipes(VPlan &Plan)
Remove dead recipes from Plan.
static void clearReductionWrapFlags(VPlan &Plan)
Clear NSW/NUW flags from reduction instructions if necessary.
static bool tryAddExplicitVectorLength(VPlan &Plan, const std::optional< unsigned > &MaxEVLSafeElements)
Add a VPEVLBasedIVPHIRecipe and related recipes to Plan and replaces all uses except the canonical IV...
static void VPInstructionsToVPRecipes(VPlanPtr &Plan, function_ref< const InductionDescriptor *(PHINode *)> GetIntOrFpInductionDescriptor, ScalarEvolution &SE, const TargetLibraryInfo &TLI)
Replaces the VPInstructions in Plan with corresponding widen recipes.
static void addActiveLaneMask(VPlan &Plan, bool UseActiveLaneMaskForControlFlow, bool DataAndControlFlowWithoutRuntimeCheck)
Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an (active-lane-mask recipe,...
static void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...
static void truncateToMinimalBitwidths(VPlan &Plan, const MapVector< Instruction *, uint64_t > &MinBWs)
Insert truncates and extends for any truncated recipe.
static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder)
Try to have all users of fixed-order recurrences appear after the recipe defining their previous valu...
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)
Optimize Plan based on BestVF and BestUF.