LLVM 22.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanCFG.h"
17#include "VPlanDominatorTree.h"
18#include "VPlanPatternMatch.h"
19#include "VPlanTransforms.h"
23#include "llvm/IR/MDBuilder.h"
24
25#define DEBUG_TYPE "vplan"
26
27using namespace llvm;
28using namespace VPlanPatternMatch;
29
30namespace {
31// Class that is used to build the plain CFG for the incoming IR.
32class PlainCFGBuilder {
33 // The outermost loop of the input loop nest considered for vectorization.
34 Loop *TheLoop;
35
36 // Loop Info analysis.
37 LoopInfo *LI;
38
39 // Vectorization plan that we are working on.
40 std::unique_ptr<VPlan> Plan;
41
42 // Builder of the VPlan instruction-level representation.
43 VPBuilder VPIRBuilder;
44
45 // NOTE: The following maps are intentionally destroyed after the plain CFG
46 // construction because subsequent VPlan-to-VPlan transformation may
47 // invalidate them.
48 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
50 // Map incoming Value definitions to their newly-created VPValues.
51 DenseMap<Value *, VPValue *> IRDef2VPValue;
52
53 // Hold phi node's that need to be fixed once the plain CFG has been built.
55
56 // Utility functions.
57 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
58 void fixHeaderPhis();
59 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
60#ifndef NDEBUG
61 bool isExternalDef(Value *Val);
62#endif
63 VPValue *getOrCreateVPOperand(Value *IRVal);
64 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
65
66public:
67 PlainCFGBuilder(Loop *Lp, LoopInfo *LI)
68 : TheLoop(Lp), LI(LI), Plan(std::make_unique<VPlan>(Lp)) {}
69
70 /// Build plain CFG for TheLoop and connect it to Plan's entry.
71 std::unique_ptr<VPlan> buildPlainCFG();
72};
73} // anonymous namespace
74
75// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
76// must have no predecessors.
77void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
78 // Collect VPBB predecessors.
80 for (BasicBlock *Pred : predecessors(BB))
81 VPBBPreds.push_back(getOrCreateVPBB(Pred));
82 VPBB->setPredecessors(VPBBPreds);
83}
84
85static bool isHeaderBB(BasicBlock *BB, Loop *L) {
86 return L && BB == L->getHeader();
87}
88
89// Add operands to VPInstructions representing phi nodes from the input IR.
90void PlainCFGBuilder::fixHeaderPhis() {
91 for (auto *Phi : PhisToFix) {
92 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
93 VPValue *VPVal = IRDef2VPValue[Phi];
94 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
95 auto *PhiR = cast<VPPhi>(VPVal);
96 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
97 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
98 "Expected Phi in header block.");
99 assert(Phi->getNumOperands() == 2 &&
100 "header phi must have exactly 2 operands");
101 for (BasicBlock *Pred : predecessors(Phi->getParent()))
102 PhiR->addOperand(
103 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
104 }
105}
106
107// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
108// existing one if it was already created.
109VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
110 if (auto *VPBB = BB2VPBB.lookup(BB)) {
111 // Retrieve existing VPBB.
112 return VPBB;
113 }
114
115 // Create new VPBB.
116 StringRef Name = BB->getName();
117 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
118 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
119 BB2VPBB[BB] = VPBB;
120 return VPBB;
121}
122
123#ifndef NDEBUG
124// Return true if \p Val is considered an external definition. An external
125// definition is either:
126// 1. A Value that is not an Instruction. This will be refined in the future.
127// 2. An Instruction that is outside of the IR region represented in VPlan,
128// i.e., is not part of the loop nest.
129bool PlainCFGBuilder::isExternalDef(Value *Val) {
130 // All the Values that are not Instructions are considered external
131 // definitions for now.
133 if (!Inst)
134 return true;
135
136 // Check whether Instruction definition is in loop body.
137 return !TheLoop->contains(Inst);
138}
139#endif
140
141// Create a new VPValue or retrieve an existing one for the Instruction's
142// operand \p IRVal. This function must only be used to create/retrieve VPValues
143// for *Instruction's operands* and not to create regular VPInstruction's. For
144// the latter, please, look at 'createVPInstructionsForVPBB'.
145VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
146 auto VPValIt = IRDef2VPValue.find(IRVal);
147 if (VPValIt != IRDef2VPValue.end())
148 // Operand has an associated VPInstruction or VPValue that was previously
149 // created.
150 return VPValIt->second;
151
152 // Operand doesn't have a previously created VPInstruction/VPValue. This
153 // means that operand is:
154 // A) a definition external to VPlan,
155 // B) any other Value without specific representation in VPlan.
156 // For now, we use VPValue to represent A and B and classify both as external
157 // definitions. We may introduce specific VPValue subclasses for them in the
158 // future.
159 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
160
161 // A and B: Create VPValue and add it to the pool of external definitions and
162 // to the Value->VPValue map.
163 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
164 IRDef2VPValue[IRVal] = NewVPVal;
165 return NewVPVal;
166}
167
168// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
169// counterpart. This function must be invoked in RPO so that the operands of a
170// VPInstruction in \p BB have been visited before (except for Phi nodes).
171void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
172 BasicBlock *BB) {
173 VPIRBuilder.setInsertPoint(VPBB);
174 // TODO: Model and preserve debug intrinsics in VPlan.
175 for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) {
176 Instruction *Inst = &InstRef;
177
178 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
179 // visited Inst when we shouldn't, breaking the RPO traversal order.
180 assert(!IRDef2VPValue.count(Inst) &&
181 "Instruction shouldn't have been visited.");
182
183 if (auto *Br = dyn_cast<BranchInst>(Inst)) {
184 // Conditional branch instruction are represented using BranchOnCond
185 // recipes.
186 if (Br->isConditional()) {
187 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
188 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst);
189 }
190
191 // Skip the rest of the Instruction processing for Branch instructions.
192 continue;
193 }
194
195 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
196 // Don't emit recipes for unconditional switch instructions.
197 if (SI->getNumCases() == 0)
198 continue;
199 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
200 for (auto Case : SI->cases())
201 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
202 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst);
203 continue;
204 }
205
206 VPSingleDefRecipe *NewR;
207 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
208 // Phi node's operands may not have been visited at this point. We create
209 // an empty VPInstruction that we will fix once the whole plain CFG has
210 // been built.
211 NewR = VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi");
212 NewR->setUnderlyingValue(Phi);
213 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
214 // Header phis need to be fixed after the VPBB for the latch has been
215 // created.
216 PhisToFix.push_back(Phi);
217 } else {
218 // Add operands for VPPhi in the order matching its predecessors in
219 // VPlan.
220 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
221 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
222 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
223 getOrCreateVPOperand(Phi->getIncomingValue(I));
224 }
225 for (VPBlockBase *Pred : VPBB->getPredecessors())
226 NewR->addOperand(
227 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
228 }
229 } else {
230 // Translate LLVM-IR operands into VPValue operands and set them in the
231 // new VPInstruction.
232 SmallVector<VPValue *, 4> VPOperands;
233 for (Value *Op : Inst->operands())
234 VPOperands.push_back(getOrCreateVPOperand(Op));
235
236 // Build VPInstruction for any arbitrary Instruction without specific
237 // representation in VPlan.
238 NewR = cast<VPInstruction>(
239 VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
240 }
241
242 IRDef2VPValue[Inst] = NewR;
243 }
244}
245
246// Main interface to build the plain CFG.
247std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
248 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
249 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
250 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
251 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
252
253 // 1. Scan the body of the loop in a topological order to visit each basic
254 // block after having visited its predecessor basic blocks. Create a VPBB for
255 // each BB and link it to its successor and predecessor VPBBs. Note that
256 // predecessors must be set in the same order as they are in the incomming IR.
257 // Otherwise, there might be problems with existing phi nodes and algorithm
258 // based on predecessors traversal.
259
260 // Loop PH needs to be explicitly visited since it's not taken into account by
261 // LoopBlocksDFS.
262 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
263 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
264 "Unexpected loop preheader");
265 for (auto &I : *ThePreheaderBB) {
266 if (I.getType()->isVoidTy())
267 continue;
268 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
269 }
270
271 LoopBlocksRPO RPO(TheLoop);
272 RPO.perform(LI);
273
274 for (BasicBlock *BB : RPO) {
275 // Create or retrieve the VPBasicBlock for this BB.
276 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
277 // Set VPBB predecessors in the same order as they are in the incoming BB.
278 setVPBBPredsFromBB(VPBB, BB);
279
280 // Create VPInstructions for BB.
281 createVPInstructionsForVPBB(VPBB, BB);
282
283 // Set VPBB successors. We create empty VPBBs for successors if they don't
284 // exist already. Recipes will be created when the successor is visited
285 // during the RPO traversal.
286 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
288 getOrCreateVPBB(SI->getDefaultDest())};
289 for (auto Case : SI->cases())
290 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
291 VPBB->setSuccessors(Succs);
292 continue;
293 }
294 auto *BI = cast<BranchInst>(BB->getTerminator());
295 unsigned NumSuccs = succ_size(BB);
296 if (NumSuccs == 1) {
297 VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor()));
298 continue;
299 }
300 assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
301 "block must have conditional branch with 2 successors");
302
303 BasicBlock *IRSucc0 = BI->getSuccessor(0);
304 BasicBlock *IRSucc1 = BI->getSuccessor(1);
305 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
306 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
307 VPBB->setTwoSuccessors(Successor0, Successor1);
308 }
309
310 for (auto *EB : Plan->getExitBlocks())
311 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
312
313 // 2. The whole CFG has been built at this point so all the input Values must
314 // have a VPlan counterpart. Fix VPlan header phi by adding their
315 // corresponding VPlan operands.
316 fixHeaderPhis();
317
318 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
319 Plan->getEntry()->setPlan(&*Plan);
320
321 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
322 // VPIRInstructions wrapping them.
323 // // Note that the operand order corresponds to IR predecessor order, and may
324 // need adjusting when VPlan predecessors are added, if an exit block has
325 // multiple predecessor.
326 for (auto *EB : Plan->getExitBlocks()) {
327 for (VPRecipeBase &R : EB->phis()) {
328 auto *PhiR = cast<VPIRPhi>(&R);
329 PHINode &Phi = PhiR->getIRPhi();
330 assert(PhiR->getNumOperands() == 0 &&
331 "no phi operands should be added yet");
332 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
333 PhiR->addOperand(
334 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
335 }
336 }
337
338 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
339 return std::move(Plan);
340}
341
342/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
343/// has exactly 2 predecessors (preheader and latch), where the block
344/// dominates the latch and the preheader dominates the block. If it is a
345/// header block return true and canonicalize the predecessors of the header
346/// (making sure the preheader appears first and the latch second) and the
347/// successors of the latch (making sure the loop exit comes first). Otherwise
348/// return false.
350 const VPDominatorTree &VPDT) {
351 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
352 if (Preds.size() != 2)
353 return false;
354
355 auto *PreheaderVPBB = Preds[0];
356 auto *LatchVPBB = Preds[1];
357 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
358 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
359 std::swap(PreheaderVPBB, LatchVPBB);
360
361 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
362 !VPDT.dominates(HeaderVPB, LatchVPBB))
363 return false;
364
365 // Canonicalize predecessors of header so that preheader is first and
366 // latch second.
367 HeaderVPB->swapPredecessors();
368 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
369 R.swapOperands();
370 }
371
372 // The two successors of conditional branch match the condition, with the
373 // first successor corresponding to true and the second to false. We
374 // canonicalize the successors of the latch when introducing the region, such
375 // that the latch exits the region when its condition is true; invert the
376 // original condition if the original CFG branches to the header on true.
377 // Note that the exit edge is not yet connected for top-level loops.
378 if (LatchVPBB->getSingleSuccessor() ||
379 LatchVPBB->getSuccessors()[0] != HeaderVPB)
380 return true;
381
382 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
383 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
386 "terminator must be a BranchOnCond");
387 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
388 Not->insertBefore(Term);
389 Term->setOperand(0, Not);
390 LatchVPBB->swapSuccessors();
391
392 return true;
393}
394
395/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
396static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
397 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
398 auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
399
400 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
401 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
402 VPBlockBase *LatchExitVPB = LatchVPBB->getSingleSuccessor();
403 assert(LatchExitVPB && "Latch expected to be left with a single successor");
404
405 // Create an empty region first and insert it between PreheaderVPBB and
406 // LatchExitVPB, taking care to preserve the original predecessor & successor
407 // order of blocks. Set region entry and exiting after both HeaderVPB and
408 // LatchVPBB have been disconnected from their predecessors/successors.
409 auto *R = Plan.createVPRegionBlock();
410 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R);
411 VPBlockUtils::disconnectBlocks(LatchVPBB, R);
412 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
413 R->setEntry(HeaderVPB);
414 R->setExiting(LatchVPBB);
415
416 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
417 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
418 VPBB->setParent(R);
419}
420
421// Add the necessary canonical IV and branch recipes required to control the
422// loop.
423static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
424 VPBasicBlock *LatchVPBB, Type *IdxTy,
425 DebugLoc DL) {
426 Value *StartIdx = ConstantInt::get(IdxTy, 0);
427 auto *StartV = Plan.getOrAddLiveIn(StartIdx);
428
429 // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
430 auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
431 HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
432
433 // We are about to replace the branch to exit the region. Remove the original
434 // BranchOnCond, if there is any.
435 DebugLoc LatchDL = DL;
436 if (!LatchVPBB->empty() &&
437 match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue()))) {
438 LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
439 LatchVPBB->getTerminator()->eraseFromParent();
440 }
441
442 VPBuilder Builder(LatchVPBB);
443 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
444 // Initially the induction increment is guaranteed to not wrap, but that may
445 // change later, e.g. when tail-folding, when the flags need to be dropped.
446 auto *CanonicalIVIncrement = Builder.createOverflowingOp(
447 Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {true, false}, DL,
448 "index.next");
449 CanonicalIVPHI->addOperand(CanonicalIVIncrement);
450
451 // Add the BranchOnCount VPInstruction to the latch.
452 Builder.createNaryOp(VPInstruction::BranchOnCount,
453 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
454 LatchDL);
455}
456
457/// Creates extracts for values in \p Plan defined in a loop region and used
458/// outside a loop region.
459static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
460 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
461 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
462 if (EB->getSinglePredecessor() != MiddleVPBB)
463 continue;
464
465 for (VPRecipeBase &R : EB->phis()) {
466 auto *ExitIRI = cast<VPIRPhi>(&R);
467 for (unsigned Idx = 0; Idx != ExitIRI->getNumIncoming(); ++Idx) {
468 VPRecipeBase *Inc = ExitIRI->getIncomingValue(Idx)->getDefiningRecipe();
469 if (!Inc)
470 continue;
471 assert(ExitIRI->getNumOperands() == 1 &&
472 ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
473 "exit values from early exits must be fixed when branch to "
474 "early-exit is added");
475 ExitIRI->extractLastLaneOfFirstOperand(B);
476 }
477 }
478 }
479}
480
481static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
482 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
483 VPDominatorTree VPDT;
484 VPDT.recalculate(Plan);
485
486 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
487 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
488 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
489
490 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
491 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
492
493 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
494 // The canonical LatchVPBB has the header block as last successor. If it has
495 // another successor, this successor is an exit block - insert middle block on
496 // its edge. Otherwise, add middle block as another successor retaining header
497 // as last.
498 if (LatchVPBB->getNumSuccessors() == 2) {
499 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
500 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
501 } else {
502 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
503 LatchVPBB->swapSuccessors();
504 }
505
506 addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
507
508 // Create SCEV and VPValue for the trip count.
509 // We use the symbolic max backedge-taken-count, which works also when
510 // vectorizing loops with uncountable early exits.
511 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
512 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
513 "Invalid backedge-taken count");
514 ScalarEvolution &SE = *PSE.getSE();
515 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
516 InductionTy, TheLoop);
518
519 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
521
522 // The connection order corresponds to the operands of the conditional branch,
523 // with the middle block already connected to the exit block.
524 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
525 // Also connect the entry block to the scalar preheader.
526 // TODO: Also introduce a branch recipe together with the minimum trip count
527 // check.
528 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
529 Plan.getEntry()->swapSuccessors();
530
531 createExtractsForLiveOuts(Plan, MiddleVPBB);
532}
533
534std::unique_ptr<VPlan>
535VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
537 PlainCFGBuilder Builder(TheLoop, &LI);
538 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
539 addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
540 return VPlan0;
541}
542
544 bool HasUncountableEarlyExit) {
545 auto *MiddleVPBB = cast<VPBasicBlock>(
547 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
548 VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
549
550 // Disconnect all early exits from the loop leaving it with a single exit from
551 // the latch. Early exits that are countable are left for a scalar epilog. The
552 // condition of uncountable early exits (currently at most one is supported)
553 // is fused into the latch exit, and used to branch from middle block to the
554 // early exit destination.
555 [[maybe_unused]] bool HandledUncountableEarlyExit = false;
556 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
557 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
558 if (Pred == MiddleVPBB)
559 continue;
560 if (HasUncountableEarlyExit) {
561 assert(!HandledUncountableEarlyExit &&
562 "can handle exactly one uncountable early exit");
564 cast<VPBasicBlock>(HeaderVPB), LatchVPBB);
565 HandledUncountableEarlyExit = true;
566 } else {
567 for (VPRecipeBase &R : EB->phis())
568 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
569 }
570 cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
572 }
573 }
574
575 assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
576 "missed an uncountable exit that must be handled");
577}
578
580 bool RequiresScalarEpilogueCheck,
581 bool TailFolded) {
582 auto *MiddleVPBB = cast<VPBasicBlock>(
584 // If MiddleVPBB has a single successor then the original loop does not exit
585 // via the latch and the single successor must be the scalar preheader.
586 // There's no need to add a runtime check to MiddleVPBB.
587 if (MiddleVPBB->getNumSuccessors() == 1) {
588 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
589 "must have ScalarPH as single successor");
590 return;
591 }
592
593 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
594
595 // Add a check in the middle block to see if we have completed all of the
596 // iterations in the first vector loop.
597 //
598 // Three cases:
599 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
600 // condition to false.
601 // 2) If (N - N%VF) == N, then we *don't* need to run the
602 // remainder. Thus if tail is to be folded, we know we don't need to run
603 // the remainder and we can set the condition to true.
604 // 3) Otherwise, construct a runtime check.
605
606 // We use the same DebugLoc as the scalar loop latch terminator instead of
607 // the corresponding compare because they may have ended up with different
608 // line numbers and we want to avoid awkward line stepping while debugging.
609 // E.g., if the compare has got a line number inside the loop.
610 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
611 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
612 VPBuilder Builder(MiddleVPBB);
613 VPValue *Cmp;
614 if (!RequiresScalarEpilogueCheck)
615 Cmp = Plan.getFalse();
616 else if (TailFolded)
617 Cmp = Plan.getOrAddLiveIn(
619 else
620 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
621 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
622 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
623}
624
626 VPDominatorTree VPDT;
627 VPDT.recalculate(Plan);
628 for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry()))
629 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
630 createLoopRegion(Plan, HeaderVPB);
631
632 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
633 TopRegion->setName("vector loop");
634 TopRegion->getEntryBasicBlock()->setName("vector.body");
635}
636
637// Likelyhood of bypassing the vectorized loop due to a runtime check block,
638// including memory overlap checks block and wrapping/unit-stride checks block.
639static constexpr uint32_t CheckBypassWeights[] = {1, 127};
640
642 BasicBlock *CheckBlock,
643 bool AddBranchWeights) {
644 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
645 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
646 VPBlockBase *VectorPH = Plan.getVectorPreheader();
647 VPBlockBase *ScalarPH = Plan.getScalarPreheader();
648 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
649 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
650 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
651 CheckBlockVPBB->swapSuccessors();
652
653 // We just connected a new block to the scalar preheader. Update all
654 // VPPhis by adding an incoming value for it, replicating the last value.
655 unsigned NumPredecessors = ScalarPH->getNumPredecessors();
656 for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
657 assert(isa<VPPhi>(&R) && "Phi expected to be VPPhi");
658 assert(cast<VPPhi>(&R)->getNumIncoming() == NumPredecessors - 1 &&
659 "must have incoming values for all operands");
660 R.addOperand(R.getOperand(NumPredecessors - 2));
661 }
662
663 VPIRMetadata VPBranchWeights;
664 auto *Term = VPBuilder(CheckBlockVPBB)
666 Plan.getCanonicalIV()->getDebugLoc());
667 if (AddBranchWeights) {
668 MDBuilder MDB(Plan.getContext());
669 MDNode *BranchWeights =
670 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
671 Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
672 }
673}
674
676 VPlan &Plan, ElementCount VF, unsigned UF,
677 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
678 bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop,
680 // Generate code to check if the loop's trip count is less than VF * UF, or
681 // equal to it in case a scalar epilogue is required; this implies that the
682 // vector trip count is zero. This check also covers the case where adding one
683 // to the backedge-taken count overflowed leading to an incorrect trip count
684 // of zero. In this case we will also jump to the scalar loop.
685 CmpInst::Predicate CmpPred =
686 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
687 // If tail is to be folded, vector loop takes care of all iterations.
688 VPValue *TripCountVPV = Plan.getTripCount();
689 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, SE);
690 Type *TripCountTy = TripCount->getType();
691 auto GetMinTripCount = [&]() -> const SCEV * {
692 // Compute max(MinProfitableTripCount, UF * VF) and return it.
693 const SCEV *VFxUF =
694 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
695 if (UF * VF.getKnownMinValue() >=
696 MinProfitableTripCount.getKnownMinValue()) {
697 // TODO: SCEV should be able to simplify test.
698 return VFxUF;
699 }
700 const SCEV *MinProfitableTripCountSCEV =
701 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
702 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
703 };
704
705 VPBasicBlock *EntryVPBB = Plan.getEntry();
706 VPBuilder Builder(EntryVPBB);
707 VPValue *TripCountCheck = Plan.getFalse();
708 const SCEV *Step = GetMinTripCount();
709 if (TailFolded) {
710 if (CheckNeededWithTailFolding) {
711 // vscale is not necessarily a power-of-2, which means we cannot guarantee
712 // an overflow to zero when updating induction variables and so an
713 // additional overflow check is required before entering the vector loop.
714
715 // Get the maximum unsigned value for the type.
716 VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn(ConstantInt::get(
717 TripCountTy, cast<IntegerType>(TripCountTy)->getMask()));
718 VPValue *DistanceToMax = Builder.createNaryOp(
719 Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
721
722 // Don't execute the vector loop if (UMax - n) < (VF * UF).
723 // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
724 // minProfitableTripCount).
725 TripCountCheck = Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax,
726 Builder.createExpandSCEV(Step), DL);
727 } else {
728 // TripCountCheck = false, folding tail implies positive vector trip
729 // count.
730 }
731 } else {
732 // TODO: Emit unconditional branch to vector preheader instead of
733 // conditional branch with known condition.
734 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
735 // Check if the trip count is < the step.
736 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
737 // TODO: Ensure step is at most the trip count when determining max VF and
738 // UF, w/o tail folding.
739 TripCountCheck = Plan.getTrue();
740 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
741 TripCount, Step)) {
742 // Generate the minimum iteration check only if we cannot prove the
743 // check is known to be true, or known to be false.
744 VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step);
745 TripCountCheck = Builder.createICmp(
746 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
747 } // else step known to be < trip count, use TripCountCheck preset to false.
748 }
749 VPInstruction *Term =
750 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
752 MDBuilder MDB(Plan.getContext());
753 MDNode *BranchWeights = MDB.createBranchWeights(
754 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
755 Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
756 }
757}
758
760 auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
761 auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>(
762 RedPhiR->getBackedgeValue()->getDefiningRecipe());
763 if (!MinMaxR)
764 return nullptr;
765
766 auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxR);
767 if (!isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
768 !(RepR && isa<IntrinsicInst>(RepR->getUnderlyingInstr())))
769 return nullptr;
770
771#ifndef NDEBUG
772 Intrinsic::ID RdxIntrinsicId =
773 RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum
774 : Intrinsic::minnum;
776 cast<VPWidenIntrinsicRecipe>(MinMaxR)->getVectorIntrinsicID() ==
777 RdxIntrinsicId) ||
778 (RepR && cast<IntrinsicInst>(RepR->getUnderlyingInstr())
779 ->getIntrinsicID() == RdxIntrinsicId)) &&
780 "Intrinsic did not match recurrence kind");
781#endif
782
783 if (MinMaxR->getOperand(0) == RedPhiR)
784 return MinMaxR->getOperand(1);
785
786 assert(MinMaxR->getOperand(1) == RedPhiR &&
787 "Reduction phi operand expected");
788 return MinMaxR->getOperand(0);
789 };
790
791 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
792 VPReductionPHIRecipe *RedPhiR = nullptr;
793 bool HasUnsupportedPhi = false;
794 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
796 continue;
797 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
798 if (!Cur) {
799 // TODO: Also support fixed-order recurrence phis.
800 HasUnsupportedPhi = true;
801 continue;
802 }
803 // For now, only a single reduction is supported.
804 // TODO: Support multiple MaxNum/MinNum reductions and other reductions.
805 if (RedPhiR)
806 return false;
807 if (Cur->getRecurrenceKind() != RecurKind::FMaxNum &&
808 Cur->getRecurrenceKind() != RecurKind::FMinNum) {
809 HasUnsupportedPhi = true;
810 continue;
811 }
812 RedPhiR = Cur;
813 }
814
815 if (!RedPhiR)
816 return true;
817
818 // We won't be able to resume execution in the scalar tail, if there are
819 // unsupported header phis or there is no scalar tail at all, due to
820 // tail-folding.
821 if (HasUnsupportedPhi || !Plan.hasScalarTail())
822 return false;
823
824 VPValue *MinMaxOp = GetMinMaxCompareValue(RedPhiR);
825 if (!MinMaxOp)
826 return false;
827
828 RecurKind RedPhiRK = RedPhiR->getRecurrenceKind();
829 assert((RedPhiRK == RecurKind::FMaxNum || RedPhiRK == RecurKind::FMinNum) &&
830 "unsupported reduction");
831 (void)RedPhiRK;
832
833 /// Check if the vector loop of \p Plan can early exit and restart
834 /// execution of last vector iteration in the scalar loop. This requires all
835 /// recipes up to early exit point be side-effect free as they are
836 /// re-executed. Currently we check that the loop is free of any recipe that
837 /// may write to memory. Expected to operate on an early VPlan w/o nested
838 /// regions.
841 auto *VPBB = cast<VPBasicBlock>(VPB);
842 for (auto &R : *VPBB) {
843 if (R.mayWriteToMemory() &&
845 return false;
846 }
847 }
848
849 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
850 VPBuilder Builder(LatchVPBB->getTerminator());
851 auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
852 assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
853 "Unexpected terminator");
854 auto *IsLatchExitTaken =
855 Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
856 LatchExitingBranch->getOperand(1));
857
858 VPValue *IsNaN = Builder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
859 VPValue *AnyNaN = Builder.createNaryOp(VPInstruction::AnyOf, {IsNaN});
860 auto *AnyExitTaken =
861 Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken});
862 Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
863 LatchExitingBranch->eraseFromParent();
864
865 // If we exit early due to NaNs, compute the final reduction result based on
866 // the reduction phi at the beginning of the last vector iteration.
867 auto *RdxResult = find_singleton<VPSingleDefRecipe>(
868 RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * {
869 auto *VPI = dyn_cast<VPInstruction>(U);
870 if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult)
871 return VPI;
872 return nullptr;
873 });
874
875 auto *MiddleVPBB = Plan.getMiddleBlock();
876 Builder.setInsertPoint(MiddleVPBB, MiddleVPBB->begin());
877 auto *NewSel =
878 Builder.createSelect(AnyNaN, RedPhiR, RdxResult->getOperand(1));
879 RdxResult->setOperand(1, NewSel);
880
881 auto *ScalarPH = Plan.getScalarPreheader();
882 // Update resume phis for inductions in the scalar preheader. If AnyNaN is
883 // true, the resume from the start of the last vector iteration via the
884 // canonical IV, otherwise from the original value.
885 for (auto &R : ScalarPH->phis()) {
886 auto *ResumeR = cast<VPPhi>(&R);
887 VPValue *VecV = ResumeR->getOperand(0);
888 if (VecV == RdxResult)
889 continue;
890 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
891 if (DerivedIV->getNumUsers() == 1 &&
892 DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
893 auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(),
894 &Plan.getVectorTripCount());
895 DerivedIV->moveAfter(&*Builder.getInsertPoint());
896 DerivedIV->setOperand(1, NewSel);
897 continue;
898 }
899 }
900 // Bail out and abandon the current, partially modified, VPlan if we
901 // encounter resume phi that cannot be updated yet.
902 if (VecV != &Plan.getVectorTripCount()) {
903 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
904 "FMaxNum/FMinNum reduction.\n");
905 return false;
906 }
907 auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV);
908 ResumeR->setOperand(0, NewSel);
909 }
910
911 auto *MiddleTerm = MiddleVPBB->getTerminator();
912 Builder.setInsertPoint(MiddleTerm);
913 VPValue *MiddleCond = MiddleTerm->getOperand(0);
914 VPValue *NewCond = Builder.createAnd(MiddleCond, Builder.createNot(AnyNaN));
915 MiddleTerm->setOperand(0, NewCond);
916 return true;
917}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
#define LLVM_DEBUG(...)
Definition Debug.h:119
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, Type *IdxTy, DebugLoc DL)
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static constexpr uint32_t CheckBypassWeights[]
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
This file contains the declarations of the Vectorization Plan base classes:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:688
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:791
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:162
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:187
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:161
iterator end()
Definition DenseMap.h:81
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1077
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
op_range operands()
Definition User.h:292
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3750
iterator begin()
Recipe iterator methods.
Definition VPlan.h:3785
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:3838
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:246
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:680
const VPRecipeBase & back() const
Definition VPlan.h:3799
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:3816
bool empty() const
Definition VPlan.h:3796
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:190
void setName(const Twine &newName)
Definition VPlan.h:166
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:170
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:120
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition VPlanUtils.h:238
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:176
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:195
VPlan-based builder utility analogous to IRBuilder.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3406
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:3903
Helper to manage IR metadata for recipes.
Definition VPlan.h:934
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:975
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:394
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:482
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A recipe for handling reduction phis.
Definition VPlan.h:2317
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2371
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:3938
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:521
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:197
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:236
void addOperand(VPValue *Operand)
Definition VPlanValue.h:230
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:184
user_range users()
Definition VPlanValue.h:134
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4041
LLVMContext & getContext() const
Definition VPlan.h:4238
VPBasicBlock * getEntry()
Definition VPlan.h:4140
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
Definition VPlan.h:4381
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4230
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4236
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4202
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4307
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4192
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1050
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4209
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4165
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4371
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1265
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4313
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4292
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4183
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition VPlan.h:4346
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4188
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4145
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4423
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
@ Entry
Definition COFF.h:862
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
bool match(Val *V, const Pattern &P)
VPInstruction_match< VPInstruction::BranchOnCount, Op0_t, Op1_t > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::BranchOnCond, Op0_t > m_BranchOnCond(const Op0_t &Op0)
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
const SCEV * getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
T * find_singleton(R &&Range, Predicate P, bool AllowRepeats=false)
Return the single value in Range that satisfies P(<member of Range> *, AllowRepeats)->T * returning n...
Definition STLExtras.h:1789
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
Definition VPlanCFG.h:229
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
RecurKind
These are the kinds of recurrences that we support.
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
@ FMinNum
FP min with llvm.minnum semantics including NaNs.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto predecessors(const MachineBasicBlock *BB)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE)
Create a base VPlan0, serving as the common starting point for all later candidates.
static LLVM_ABI_FOR_TEST void handleEarlyExits(VPlan &Plan, bool HasUncountableExit)
Update Plan to account for all early exits.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, ScalarEvolution &SE)
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB)
Update Plan to account for the uncountable early exit from EarlyExitingVPBB to EarlyExitVPBB by.
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...