LLVM 22.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanCFG.h"
17#include "VPlanDominatorTree.h"
18#include "VPlanPatternMatch.h"
19#include "VPlanTransforms.h"
23#include "llvm/IR/MDBuilder.h"
24
25#define DEBUG_TYPE "vplan"
26
27using namespace llvm;
28using namespace VPlanPatternMatch;
29
30namespace {
31// Class that is used to build the plain CFG for the incoming IR.
32class PlainCFGBuilder {
33 // The outermost loop of the input loop nest considered for vectorization.
34 Loop *TheLoop;
35
36 // Loop Info analysis.
37 LoopInfo *LI;
38
39 // Vectorization plan that we are working on.
40 std::unique_ptr<VPlan> Plan;
41
42 // Builder of the VPlan instruction-level representation.
43 VPBuilder VPIRBuilder;
44
45 // NOTE: The following maps are intentionally destroyed after the plain CFG
46 // construction because subsequent VPlan-to-VPlan transformation may
47 // invalidate them.
48 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
50 // Map incoming Value definitions to their newly-created VPValues.
51 DenseMap<Value *, VPValue *> IRDef2VPValue;
52
53 // Hold phi node's that need to be fixed once the plain CFG has been built.
55
56 // Utility functions.
57 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
58 void fixHeaderPhis();
59 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
60#ifndef NDEBUG
61 bool isExternalDef(Value *Val);
62#endif
63 VPValue *getOrCreateVPOperand(Value *IRVal);
64 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
65
66public:
67 PlainCFGBuilder(Loop *Lp, LoopInfo *LI)
68 : TheLoop(Lp), LI(LI), Plan(std::make_unique<VPlan>(Lp)) {}
69
70 /// Build plain CFG for TheLoop and connect it to Plan's entry.
71 std::unique_ptr<VPlan> buildPlainCFG();
72};
73} // anonymous namespace
74
75// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
76// must have no predecessors.
77void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
78 // Collect VPBB predecessors.
80 for (BasicBlock *Pred : predecessors(BB))
81 VPBBPreds.push_back(getOrCreateVPBB(Pred));
82 VPBB->setPredecessors(VPBBPreds);
83}
84
85static bool isHeaderBB(BasicBlock *BB, Loop *L) {
86 return L && BB == L->getHeader();
87}
88
89// Add operands to VPInstructions representing phi nodes from the input IR.
90void PlainCFGBuilder::fixHeaderPhis() {
91 for (auto *Phi : PhisToFix) {
92 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
93 VPValue *VPVal = IRDef2VPValue[Phi];
94 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
95 auto *PhiR = cast<VPPhi>(VPVal);
96 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
97 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
98 "Expected Phi in header block.");
99 assert(Phi->getNumOperands() == 2 &&
100 "header phi must have exactly 2 operands");
101 for (BasicBlock *Pred : predecessors(Phi->getParent()))
102 PhiR->addOperand(
103 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
104 }
105}
106
107// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
108// existing one if it was already created.
109VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
110 if (auto *VPBB = BB2VPBB.lookup(BB)) {
111 // Retrieve existing VPBB.
112 return VPBB;
113 }
114
115 // Create new VPBB.
116 StringRef Name = BB->getName();
117 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
118 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
119 BB2VPBB[BB] = VPBB;
120 return VPBB;
121}
122
123#ifndef NDEBUG
124// Return true if \p Val is considered an external definition. An external
125// definition is either:
126// 1. A Value that is not an Instruction. This will be refined in the future.
127// 2. An Instruction that is outside of the IR region represented in VPlan,
128// i.e., is not part of the loop nest.
129bool PlainCFGBuilder::isExternalDef(Value *Val) {
130 // All the Values that are not Instructions are considered external
131 // definitions for now.
132 Instruction *Inst = dyn_cast<Instruction>(Val);
133 if (!Inst)
134 return true;
135
136 // Check whether Instruction definition is in loop body.
137 return !TheLoop->contains(Inst);
138}
139#endif
140
141// Create a new VPValue or retrieve an existing one for the Instruction's
142// operand \p IRVal. This function must only be used to create/retrieve VPValues
143// for *Instruction's operands* and not to create regular VPInstruction's. For
144// the latter, please, look at 'createVPInstructionsForVPBB'.
145VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
146 auto VPValIt = IRDef2VPValue.find(IRVal);
147 if (VPValIt != IRDef2VPValue.end())
148 // Operand has an associated VPInstruction or VPValue that was previously
149 // created.
150 return VPValIt->second;
151
152 // Operand doesn't have a previously created VPInstruction/VPValue. This
153 // means that operand is:
154 // A) a definition external to VPlan,
155 // B) any other Value without specific representation in VPlan.
156 // For now, we use VPValue to represent A and B and classify both as external
157 // definitions. We may introduce specific VPValue subclasses for them in the
158 // future.
159 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
160
161 // A and B: Create VPValue and add it to the pool of external definitions and
162 // to the Value->VPValue map.
163 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
164 IRDef2VPValue[IRVal] = NewVPVal;
165 return NewVPVal;
166}
167
168// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
169// counterpart. This function must be invoked in RPO so that the operands of a
170// VPInstruction in \p BB have been visited before (except for Phi nodes).
171void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
172 BasicBlock *BB) {
173 VPIRBuilder.setInsertPoint(VPBB);
174 // TODO: Model and preserve debug intrinsics in VPlan.
175 for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) {
176 Instruction *Inst = &InstRef;
177
178 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
179 // visited Inst when we shouldn't, breaking the RPO traversal order.
180 assert(!IRDef2VPValue.count(Inst) &&
181 "Instruction shouldn't have been visited.");
182
183 if (auto *Br = dyn_cast<BranchInst>(Inst)) {
184 // Conditional branch instruction are represented using BranchOnCond
185 // recipes.
186 if (Br->isConditional()) {
187 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
188 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst);
189 }
190
191 // Skip the rest of the Instruction processing for Branch instructions.
192 continue;
193 }
194
195 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
196 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
197 for (auto Case : SI->cases())
198 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
199 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst);
200 continue;
201 }
202
203 VPSingleDefRecipe *NewR;
204 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
205 // Phi node's operands may not have been visited at this point. We create
206 // an empty VPInstruction that we will fix once the whole plain CFG has
207 // been built.
208 NewR = VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi");
209 NewR->setUnderlyingValue(Phi);
210 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
211 // Header phis need to be fixed after the VPBB for the latch has been
212 // created.
213 PhisToFix.push_back(Phi);
214 } else {
215 // Add operands for VPPhi in the order matching its predecessors in
216 // VPlan.
217 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
218 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
219 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
220 getOrCreateVPOperand(Phi->getIncomingValue(I));
221 }
222 for (VPBlockBase *Pred : VPBB->getPredecessors())
223 NewR->addOperand(
224 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
225 }
226 } else {
227 // Translate LLVM-IR operands into VPValue operands and set them in the
228 // new VPInstruction.
229 SmallVector<VPValue *, 4> VPOperands;
230 for (Value *Op : Inst->operands())
231 VPOperands.push_back(getOrCreateVPOperand(Op));
232
233 // Build VPInstruction for any arbitrary Instruction without specific
234 // representation in VPlan.
235 NewR = cast<VPInstruction>(
236 VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
237 }
238
239 IRDef2VPValue[Inst] = NewR;
240 }
241}
242
243// Main interface to build the plain CFG.
244std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
245 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
246 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
247 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
248 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
249
250 // 1. Scan the body of the loop in a topological order to visit each basic
251 // block after having visited its predecessor basic blocks. Create a VPBB for
252 // each BB and link it to its successor and predecessor VPBBs. Note that
253 // predecessors must be set in the same order as they are in the incomming IR.
254 // Otherwise, there might be problems with existing phi nodes and algorithm
255 // based on predecessors traversal.
256
257 // Loop PH needs to be explicitly visited since it's not taken into account by
258 // LoopBlocksDFS.
259 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
260 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
261 "Unexpected loop preheader");
262 for (auto &I : *ThePreheaderBB) {
263 if (I.getType()->isVoidTy())
264 continue;
265 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
266 }
267
268 LoopBlocksRPO RPO(TheLoop);
269 RPO.perform(LI);
270
271 for (BasicBlock *BB : RPO) {
272 // Create or retrieve the VPBasicBlock for this BB.
273 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
274 // Set VPBB predecessors in the same order as they are in the incoming BB.
275 setVPBBPredsFromBB(VPBB, BB);
276
277 // Create VPInstructions for BB.
278 createVPInstructionsForVPBB(VPBB, BB);
279
280 // Set VPBB successors. We create empty VPBBs for successors if they don't
281 // exist already. Recipes will be created when the successor is visited
282 // during the RPO traversal.
283 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
285 getOrCreateVPBB(SI->getDefaultDest())};
286 for (auto Case : SI->cases())
287 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
288 VPBB->setSuccessors(Succs);
289 continue;
290 }
291 auto *BI = cast<BranchInst>(BB->getTerminator());
292 unsigned NumSuccs = succ_size(BB);
293 if (NumSuccs == 1) {
294 VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor()));
295 continue;
296 }
297 assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
298 "block must have conditional branch with 2 successors");
299
300 BasicBlock *IRSucc0 = BI->getSuccessor(0);
301 BasicBlock *IRSucc1 = BI->getSuccessor(1);
302 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
303 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
304 VPBB->setTwoSuccessors(Successor0, Successor1);
305 }
306
307 for (auto *EB : Plan->getExitBlocks())
308 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
309
310 // 2. The whole CFG has been built at this point so all the input Values must
311 // have a VPlan counterpart. Fix VPlan header phi by adding their
312 // corresponding VPlan operands.
313 fixHeaderPhis();
314
315 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
316 Plan->getEntry()->setPlan(&*Plan);
317
318 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
319 // VPIRInstructions wrapping them.
320 // // Note that the operand order corresponds to IR predecessor order, and may
321 // need adjusting when VPlan predecessors are added, if an exit block has
322 // multiple predecessor.
323 for (auto *EB : Plan->getExitBlocks()) {
324 for (VPRecipeBase &R : EB->phis()) {
325 auto *PhiR = cast<VPIRPhi>(&R);
326 PHINode &Phi = PhiR->getIRPhi();
327 assert(PhiR->getNumOperands() == 0 &&
328 "no phi operands should be added yet");
329 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
330 PhiR->addOperand(
331 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
332 }
333 }
334
335 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
336 return std::move(Plan);
337}
338
339/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
340/// has exactly 2 predecessors (preheader and latch), where the block
341/// dominates the latch and the preheader dominates the block. If it is a
342/// header block return true and canonicalize the predecessors of the header
343/// (making sure the preheader appears first and the latch second) and the
344/// successors of the latch (making sure the loop exit comes first). Otherwise
345/// return false.
347 const VPDominatorTree &VPDT) {
348 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
349 if (Preds.size() != 2)
350 return false;
351
352 auto *PreheaderVPBB = Preds[0];
353 auto *LatchVPBB = Preds[1];
354 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
355 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
356 std::swap(PreheaderVPBB, LatchVPBB);
357
358 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
359 !VPDT.dominates(HeaderVPB, LatchVPBB))
360 return false;
361
362 // Canonicalize predecessors of header so that preheader is first and
363 // latch second.
364 HeaderVPB->swapPredecessors();
365 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
366 R.swapOperands();
367 }
368
369 // The two successors of conditional branch match the condition, with the
370 // first successor corresponding to true and the second to false. We
371 // canonicalize the successors of the latch when introducing the region, such
372 // that the latch exits the region when its condition is true; invert the
373 // original condition if the original CFG branches to the header on true.
374 // Note that the exit edge is not yet connected for top-level loops.
375 if (LatchVPBB->getSingleSuccessor() ||
376 LatchVPBB->getSuccessors()[0] != HeaderVPB)
377 return true;
378
379 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
380 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
381 assert(cast<VPInstruction>(Term)->getOpcode() ==
383 "terminator must be a BranchOnCond");
384 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
385 Not->insertBefore(Term);
386 Term->setOperand(0, Not);
387 LatchVPBB->swapSuccessors();
388
389 return true;
390}
391
392/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
393static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
394 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
395 auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
396
397 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
398 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
399 VPBlockBase *LatchExitVPB = LatchVPBB->getSingleSuccessor();
400 assert(LatchExitVPB && "Latch expected to be left with a single successor");
401
402 // Create an empty region first and insert it between PreheaderVPBB and
403 // LatchExitVPB, taking care to preserve the original predecessor & successor
404 // order of blocks. Set region entry and exiting after both HeaderVPB and
405 // LatchVPBB have been disconnected from their predecessors/successors.
406 auto *R = Plan.createVPRegionBlock();
407 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R);
408 VPBlockUtils::disconnectBlocks(LatchVPBB, R);
409 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
410 R->setEntry(HeaderVPB);
411 R->setExiting(LatchVPBB);
412
413 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
414 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
415 VPBB->setParent(R);
416}
417
418// Add the necessary canonical IV and branch recipes required to control the
419// loop.
420static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
421 VPBasicBlock *LatchVPBB, Type *IdxTy,
422 DebugLoc DL) {
423 Value *StartIdx = ConstantInt::get(IdxTy, 0);
424 auto *StartV = Plan.getOrAddLiveIn(StartIdx);
425
426 // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
427 auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
428 HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
429
430 // We are about to replace the branch to exit the region. Remove the original
431 // BranchOnCond, if there is any.
432 DebugLoc LatchDL = DL;
433 if (!LatchVPBB->empty() &&
434 match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue()))) {
435 LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
436 LatchVPBB->getTerminator()->eraseFromParent();
437 }
438
439 VPBuilder Builder(LatchVPBB);
440 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
441 // Initially the induction increment is guaranteed to not wrap, but that may
442 // change later, e.g. when tail-folding, when the flags need to be dropped.
443 auto *CanonicalIVIncrement = Builder.createOverflowingOp(
444 Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {true, false}, DL,
445 "index.next");
446 CanonicalIVPHI->addOperand(CanonicalIVIncrement);
447
448 // Add the BranchOnCount VPInstruction to the latch.
450 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
451 LatchDL);
452}
453
454/// Creates extracts for values in \p Plan defined in a loop region and used
455/// outside a loop region.
456static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
457 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
458 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
459 if (EB->getSinglePredecessor() != MiddleVPBB)
460 continue;
461
462 for (VPRecipeBase &R : EB->phis()) {
463 auto *ExitIRI = cast<VPIRPhi>(&R);
464 for (unsigned Idx = 0; Idx != ExitIRI->getNumIncoming(); ++Idx) {
465 VPRecipeBase *Inc = ExitIRI->getIncomingValue(Idx)->getDefiningRecipe();
466 if (!Inc)
467 continue;
468 assert(ExitIRI->getNumOperands() == 1 &&
469 ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
470 "exit values from early exits must be fixed when branch to "
471 "early-exit is added");
472 ExitIRI->extractLastLaneOfFirstOperand(B);
473 }
474 }
475 }
476}
477
478static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
479 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
480 VPDominatorTree VPDT;
481 VPDT.recalculate(Plan);
482
483 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
484 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
485 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
486
487 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
488 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
489
490 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
491 // The canonical LatchVPBB has the header block as last successor. If it has
492 // another successor, this successor is an exit block - insert middle block on
493 // its edge. Otherwise, add middle block as another successor retaining header
494 // as last.
495 if (LatchVPBB->getNumSuccessors() == 2) {
496 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
497 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
498 } else {
499 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
500 LatchVPBB->swapSuccessors();
501 }
502
503 addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
504
505 // Create SCEV and VPValue for the trip count.
506 // We use the symbolic max backedge-taken-count, which works also when
507 // vectorizing loops with uncountable early exits.
508 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
509 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
510 "Invalid backedge-taken count");
511 ScalarEvolution &SE = *PSE.getSE();
512 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
513 InductionTy, TheLoop);
515
516 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
518
519 // The connection order corresponds to the operands of the conditional branch,
520 // with the middle block already connected to the exit block.
521 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
522 // Also connect the entry block to the scalar preheader.
523 // TODO: Also introduce a branch recipe together with the minimum trip count
524 // check.
525 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
526 Plan.getEntry()->swapSuccessors();
527
528 createExtractsForLiveOuts(Plan, MiddleVPBB);
529}
530
531std::unique_ptr<VPlan>
532VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
534 PlainCFGBuilder Builder(TheLoop, &LI);
535 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
536 addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
537 return VPlan0;
538}
539
541 bool HasUncountableEarlyExit) {
542 auto *MiddleVPBB = cast<VPBasicBlock>(
544 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
545 VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
546
547 // Disconnect all early exits from the loop leaving it with a single exit from
548 // the latch. Early exits that are countable are left for a scalar epilog. The
549 // condition of uncountable early exits (currently at most one is supported)
550 // is fused into the latch exit, and used to branch from middle block to the
551 // early exit destination.
552 [[maybe_unused]] bool HandledUncountableEarlyExit = false;
553 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
554 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
555 if (Pred == MiddleVPBB)
556 continue;
557 if (HasUncountableEarlyExit) {
558 assert(!HandledUncountableEarlyExit &&
559 "can handle exactly one uncountable early exit");
560 handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
561 cast<VPBasicBlock>(HeaderVPB), LatchVPBB);
562 HandledUncountableEarlyExit = true;
563 } else {
564 for (VPRecipeBase &R : EB->phis())
565 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
566 }
567 cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
569 }
570 }
571
572 assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
573 "missed an uncountable exit that must be handled");
574}
575
577 bool RequiresScalarEpilogueCheck,
578 bool TailFolded) {
579 auto *MiddleVPBB = cast<VPBasicBlock>(
581 // If MiddleVPBB has a single successor then the original loop does not exit
582 // via the latch and the single successor must be the scalar preheader.
583 // There's no need to add a runtime check to MiddleVPBB.
584 if (MiddleVPBB->getNumSuccessors() == 1) {
585 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
586 "must have ScalarPH as single successor");
587 return;
588 }
589
590 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
591
592 // Add a check in the middle block to see if we have completed all of the
593 // iterations in the first vector loop.
594 //
595 // Three cases:
596 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
597 // condition to false.
598 // 2) If (N - N%VF) == N, then we *don't* need to run the
599 // remainder. Thus if tail is to be folded, we know we don't need to run
600 // the remainder and we can set the condition to true.
601 // 3) Otherwise, construct a runtime check.
602
603 // We use the same DebugLoc as the scalar loop latch terminator instead of
604 // the corresponding compare because they may have ended up with different
605 // line numbers and we want to avoid awkward line stepping while debugging.
606 // E.g., if the compare has got a line number inside the loop.
607 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
608 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
609 VPBuilder Builder(MiddleVPBB);
610 VPValue *Cmp;
611 if (!RequiresScalarEpilogueCheck)
612 Cmp = Plan.getFalse();
613 else if (TailFolded)
614 Cmp = Plan.getOrAddLiveIn(
616 else
617 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
618 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
619 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
620}
621
623 VPDominatorTree VPDT;
624 VPDT.recalculate(Plan);
625 for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry()))
626 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
627 createLoopRegion(Plan, HeaderVPB);
628
629 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
630 TopRegion->setName("vector loop");
631 TopRegion->getEntryBasicBlock()->setName("vector.body");
632}
633
634// Likelyhood of bypassing the vectorized loop due to a runtime check block,
635// including memory overlap checks block and wrapping/unit-stride checks block.
636static constexpr uint32_t CheckBypassWeights[] = {1, 127};
637
639 BasicBlock *CheckBlock,
640 bool AddBranchWeights) {
641 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
642 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
643 VPBlockBase *VectorPH = Plan.getVectorPreheader();
644 VPBlockBase *ScalarPH = Plan.getScalarPreheader();
645 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
646 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
647 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
648 CheckBlockVPBB->swapSuccessors();
649
650 // We just connected a new block to the scalar preheader. Update all
651 // VPPhis by adding an incoming value for it, replicating the last value.
652 unsigned NumPredecessors = ScalarPH->getNumPredecessors();
653 for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
654 assert(isa<VPPhi>(&R) && "Phi expected to be VPPhi");
655 assert(cast<VPPhi>(&R)->getNumIncoming() == NumPredecessors - 1 &&
656 "must have incoming values for all operands");
657 R.addOperand(R.getOperand(NumPredecessors - 2));
658 }
659
660 VPIRMetadata VPBranchWeights;
661 auto *Term = VPBuilder(CheckBlockVPBB)
663 Plan.getCanonicalIV()->getDebugLoc());
664 if (AddBranchWeights) {
665 MDBuilder MDB(Plan.getContext());
666 MDNode *BranchWeights =
667 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
668 Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
669 }
670}
671
673 VPlan &Plan, ElementCount VF, unsigned UF,
674 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
675 bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop,
677 // Generate code to check if the loop's trip count is less than VF * UF, or
678 // equal to it in case a scalar epilogue is required; this implies that the
679 // vector trip count is zero. This check also covers the case where adding one
680 // to the backedge-taken count overflowed leading to an incorrect trip count
681 // of zero. In this case we will also jump to the scalar loop.
682 CmpInst::Predicate CmpPred =
683 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
684 // If tail is to be folded, vector loop takes care of all iterations.
685 VPValue *TripCountVPV = Plan.getTripCount();
686 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, SE);
687 Type *TripCountTy = TripCount->getType();
688 auto GetMinTripCount = [&]() -> const SCEV * {
689 // Compute max(MinProfitableTripCount, UF * VF) and return it.
690 const SCEV *VFxUF =
691 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
692 if (UF * VF.getKnownMinValue() >=
693 MinProfitableTripCount.getKnownMinValue()) {
694 // TODO: SCEV should be able to simplify test.
695 return VFxUF;
696 }
697 const SCEV *MinProfitableTripCountSCEV =
698 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
699 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
700 };
701
702 VPBasicBlock *EntryVPBB = Plan.getEntry();
703 VPBuilder Builder(EntryVPBB);
704 VPValue *TripCountCheck = Plan.getFalse();
705 const SCEV *Step = GetMinTripCount();
706 if (TailFolded) {
707 if (CheckNeededWithTailFolding) {
708 // vscale is not necessarily a power-of-2, which means we cannot guarantee
709 // an overflow to zero when updating induction variables and so an
710 // additional overflow check is required before entering the vector loop.
711
712 // Get the maximum unsigned value for the type.
713 VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn(ConstantInt::get(
714 TripCountTy, cast<IntegerType>(TripCountTy)->getMask()));
715 VPValue *DistanceToMax = Builder.createNaryOp(
716 Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
718
719 // Don't execute the vector loop if (UMax - n) < (VF * UF).
720 // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
721 // minProfitableTripCount).
722 TripCountCheck = Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax,
723 Builder.createExpandSCEV(Step), DL);
724 } else {
725 // TripCountCheck = false, folding tail implies positive vector trip
726 // count.
727 }
728 } else {
729 // TODO: Emit unconditional branch to vector preheader instead of
730 // conditional branch with known condition.
731 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
732 // Check if the trip count is < the step.
733 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
734 // TODO: Ensure step is at most the trip count when determining max VF and
735 // UF, w/o tail folding.
736 TripCountCheck = Plan.getTrue();
737 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
738 TripCount, Step)) {
739 // Generate the minimum iteration check only if we cannot prove the
740 // check is known to be true, or known to be false.
741 VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step);
742 TripCountCheck = Builder.createICmp(
743 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
744 } // else step known to be < trip count, use TripCountCheck preset to false.
745 }
746 VPInstruction *Term =
747 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
749 MDBuilder MDB(Plan.getContext());
750 MDNode *BranchWeights = MDB.createBranchWeights(
751 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
752 Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
753 }
754}
755
757 auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
758 auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>(
759 RedPhiR->getBackedgeValue()->getDefiningRecipe());
760 if (!MinMaxR)
761 return nullptr;
762
763 auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxR);
764 if (!isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
765 !(RepR && isa<IntrinsicInst>(RepR->getUnderlyingInstr())))
766 return nullptr;
767
768#ifndef NDEBUG
769 Intrinsic::ID RdxIntrinsicId =
770 RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum
771 : Intrinsic::minnum;
772 assert(((isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
773 cast<VPWidenIntrinsicRecipe>(MinMaxR)->getVectorIntrinsicID() ==
774 RdxIntrinsicId) ||
775 (RepR && cast<IntrinsicInst>(RepR->getUnderlyingInstr())
776 ->getIntrinsicID() == RdxIntrinsicId)) &&
777 "Intrinsic did not match recurrence kind");
778#endif
779
780 if (MinMaxR->getOperand(0) == RedPhiR)
781 return MinMaxR->getOperand(1);
782
783 assert(MinMaxR->getOperand(1) == RedPhiR &&
784 "Reduction phi operand expected");
785 return MinMaxR->getOperand(0);
786 };
787
788 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
789 VPReductionPHIRecipe *RedPhiR = nullptr;
790 bool HasUnsupportedPhi = false;
791 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
792 if (isa<VPCanonicalIVPHIRecipe, VPWidenIntOrFpInductionRecipe>(&R))
793 continue;
794 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
795 if (!Cur) {
796 // TODO: Also support fixed-order recurrence phis.
797 HasUnsupportedPhi = true;
798 continue;
799 }
800 // For now, only a single reduction is supported.
801 // TODO: Support multiple MaxNum/MinNum reductions and other reductions.
802 if (RedPhiR)
803 return false;
804 if (Cur->getRecurrenceKind() != RecurKind::FMaxNum &&
805 Cur->getRecurrenceKind() != RecurKind::FMinNum) {
806 HasUnsupportedPhi = true;
807 continue;
808 }
809 RedPhiR = Cur;
810 }
811
812 if (!RedPhiR)
813 return true;
814
815 // We won't be able to resume execution in the scalar tail, if there are
816 // unsupported header phis or there is no scalar tail at all, due to
817 // tail-folding.
818 if (HasUnsupportedPhi || !Plan.hasScalarTail())
819 return false;
820
821 VPValue *MinMaxOp = GetMinMaxCompareValue(RedPhiR);
822 if (!MinMaxOp)
823 return false;
824
825 RecurKind RedPhiRK = RedPhiR->getRecurrenceKind();
826 assert((RedPhiRK == RecurKind::FMaxNum || RedPhiRK == RecurKind::FMinNum) &&
827 "unsupported reduction");
828 (void)RedPhiRK;
829
830 /// Check if the vector loop of \p Plan can early exit and restart
831 /// execution of last vector iteration in the scalar loop. This requires all
832 /// recipes up to early exit point be side-effect free as they are
833 /// re-executed. Currently we check that the loop is free of any recipe that
834 /// may write to memory. Expected to operate on an early VPlan w/o nested
835 /// regions.
838 auto *VPBB = cast<VPBasicBlock>(VPB);
839 for (auto &R : *VPBB) {
840 if (R.mayWriteToMemory() &&
842 return false;
843 }
844 }
845
846 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
847 VPBuilder Builder(LatchVPBB->getTerminator());
848 auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
849 assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
850 "Unexpected terminator");
851 auto *IsLatchExitTaken =
852 Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
853 LatchExitingBranch->getOperand(1));
854
855 VPValue *IsNaN = Builder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
856 VPValue *AnyNaN = Builder.createNaryOp(VPInstruction::AnyOf, {IsNaN});
857 auto *AnyExitTaken =
858 Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken});
859 Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
860 LatchExitingBranch->eraseFromParent();
861
862 // If we exit early due to NaNs, compute the final reduction result based on
863 // the reduction phi at the beginning of the last vector iteration.
864 auto *RdxResult = find_singleton<VPSingleDefRecipe>(
865 RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * {
866 auto *VPI = dyn_cast<VPInstruction>(U);
867 if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult)
868 return VPI;
869 return nullptr;
870 });
871
872 auto *MiddleVPBB = Plan.getMiddleBlock();
873 Builder.setInsertPoint(MiddleVPBB, MiddleVPBB->begin());
874 auto *NewSel =
875 Builder.createSelect(AnyNaN, RedPhiR, RdxResult->getOperand(1));
876 RdxResult->setOperand(1, NewSel);
877
878 auto *ScalarPH = Plan.getScalarPreheader();
879 // Update resume phis for inductions in the scalar preheader. If AnyNaN is
880 // true, the resume from the start of the last vector iteration via the
881 // canonical IV, otherwise from the original value.
882 for (auto &R : ScalarPH->phis()) {
883 auto *ResumeR = cast<VPPhi>(&R);
884 VPValue *VecV = ResumeR->getOperand(0);
885 if (VecV == RdxResult)
886 continue;
887 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
888 if (DerivedIV->getNumUsers() == 1 &&
889 DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
890 auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(),
891 &Plan.getVectorTripCount());
892 DerivedIV->moveAfter(&*Builder.getInsertPoint());
893 DerivedIV->setOperand(1, NewSel);
894 continue;
895 }
896 }
897 // Bail out and abandon the current, partially modified, VPlan if we
898 // encounter resume phi that cannot be updated yet.
899 if (VecV != &Plan.getVectorTripCount()) {
900 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
901 "FMaxNum/FMinNum reduction.\n");
902 return false;
903 }
904 auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV);
905 ResumeR->setOperand(0, NewSel);
906 }
907
908 auto *MiddleTerm = MiddleVPBB->getTerminator();
909 Builder.setInsertPoint(MiddleTerm);
910 VPValue *MiddleCond = MiddleTerm->getOperand(0);
911 VPValue *NewCond = Builder.createAnd(MiddleCond, Builder.createNot(AnyNaN));
912 MiddleTerm->setOperand(0, NewCond);
913 return true;
914}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, Type *IdxTy, DebugLoc DL)
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static constexpr uint32_t CheckBypassWeights[]
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
This file contains the declarations of the Vectorization Plan base classes:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:467
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:703
@ ICMP_EQ
equal
Definition: InstrTypes.h:699
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:704
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:688
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:791
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:868
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
static DebugLoc getUnknown()
Definition: DebugLoc.h:162
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:203
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:38
Metadata node.
Definition: Metadata.h:1077
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
op_range operands()
Definition: User.h:292
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3639
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:3674
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3727
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:236
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:667
const VPRecipeBase & back() const
Definition: VPlan.h:3688
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3705
bool empty() const
Definition: VPlan.h:3685
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition: VPlan.h:297
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:180
void setName(const Twine &newName)
Definition: VPlan.h:166
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition: VPlan.h:319
size_t getNumPredecessors() const
Definition: VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:288
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:204
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:279
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:215
void swapPredecessors()
Swap predecessors of the block.
Definition: VPlan.h:311
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:160
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:268
void setParent(VPRegionBlock *P)
Definition: VPlan.h:184
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:198
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlanUtils.h:119
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition: VPlanUtils.h:237
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlanUtils.h:175
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlanUtils.h:194
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:3295
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3792
Helper to manage IR metadata for recipes.
Definition: VPlan.h:926
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:967
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:391
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:479
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition: VPlan.h:2302
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition: VPlan.h:2356
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3827
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:518
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:197
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:241
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:236
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:230
void setUnderlyingValue(Value *Val)
Definition: VPlanValue.h:184
user_range users()
Definition: VPlanValue.h:134
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3930
LLVMContext & getContext() const
Definition: VPlan.h:4127
VPBasicBlock * getEntry()
Definition: VPlan.h:4029
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
Definition: VPlan.h:4270
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:4119
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:4125
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:4091
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition: VPlan.h:4196
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition: VPlan.h:4081
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.cpp:1037
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition: VPlan.h:4098
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition: VPlan.h:4054
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition: VPlan.h:4260
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition: VPlan.cpp:1252
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition: VPlan.h:4202
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:4181
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition: VPlan.h:4072
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:4235
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition: VPlan.h:4077
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition: VPlan.h:4034
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition: VPlan.h:4313
LLVM Value Representation.
Definition: Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
@ Entry
Definition: COFF.h:862
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
VPInstruction_match< VPInstruction::BranchOnCount, Op0_t, Op1_t > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::BranchOnCond, Op0_t > m_BranchOnCond(const Op0_t &Op0)
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlanUtils.cpp:32
const SCEV * getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE)
Return the SCEV expression for V.
Definition: VPlanUtils.cpp:79
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition: VPlanCFG.h:216
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1300
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
Definition: VPlanCFG.h:229
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
@ FMinNum
FP min with llvm.minnum semantics including NaNs.
auto predecessors(const MachineBasicBlock *BB)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE)
Create a base VPlan0, serving as the common starting point for all later candidates.
static LLVM_ABI_FOR_TEST void handleEarlyExits(VPlan &Plan, bool HasUncountableExit)
Update Plan to account for all early exits.
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, ScalarEvolution &SE)
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB)
Update Plan to account for the uncountable early exit from EarlyExitingVPBB to EarlyExitVPBB by.
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...