LLVM 22.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
21#include "VPlanCFG.h"
22#include "VPlanDominatorTree.h"
23#include "VPlanHelpers.h"
24#include "VPlanPatternMatch.h"
25#include "VPlanTransforms.h"
26#include "VPlanUtils.h"
28#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/Twine.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/CFG.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Type.h"
40#include "llvm/IR/Value.h"
43#include "llvm/Support/Debug.h"
49#include <cassert>
50#include <string>
51
52using namespace llvm;
53using namespace llvm::VPlanPatternMatch;
54
55namespace llvm {
57}
58
59/// @{
60/// Metadata attribute names
61const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
63 "llvm.loop.vectorize.followup_vectorized";
65 "llvm.loop.vectorize.followup_epilogue";
66/// @}
67
69
71 "vplan-print-in-dot-format", cl::Hidden,
72 cl::desc("Use dot format instead of plain text when dumping VPlans"));
73
74#define DEBUG_TYPE "loop-vectorize"
75
76#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
78 const VPBasicBlock *Parent = R.getParent();
79 VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr);
80 R.print(OS, "", SlotTracker);
81 return OS;
82}
83#endif
84
86 const ElementCount &VF) const {
87 switch (LaneKind) {
89 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
90 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
91 Builder.getInt32(VF.getKnownMinValue() - Lane));
93 return Builder.getInt32(Lane);
94 }
95 llvm_unreachable("Unknown lane kind");
96}
97
98VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
99 : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
100 if (Def)
101 Def->addDefinedValue(this);
102}
103
105 assert(Users.empty() && "trying to delete a VPValue with remaining users");
106 if (Def)
107 Def->removeDefinedValue(this);
108}
109
110#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
113 R->print(OS, "", SlotTracker);
114 else
116}
117
118void VPValue::dump() const {
119 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
121 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
123 dbgs() << "\n";
124}
125
126void VPDef::dump() const {
127 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
129 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
130 print(dbgs(), "", SlotTracker);
131 dbgs() << "\n";
132}
133#endif
134
138
142
143// Get the top-most entry block of \p Start. This is the entry block of the
144// containing VPlan. This function is templated to support both const and non-const blocks
145template <typename T> static T *getPlanEntry(T *Start) {
146 T *Next = Start;
147 T *Current = Start;
148 while ((Next = Next->getParent()))
149 Current = Next;
150
151 SmallSetVector<T *, 8> WorkList;
152 WorkList.insert(Current);
153
154 for (unsigned i = 0; i < WorkList.size(); i++) {
155 T *Current = WorkList[i];
156 if (!Current->hasPredecessors())
157 return Current;
158 auto &Predecessors = Current->getPredecessors();
159 WorkList.insert_range(Predecessors);
160 }
161
162 llvm_unreachable("VPlan without any entry node without predecessors");
163}
164
165VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
166
167const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
168
169/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
176
183
184void VPBlockBase::setPlan(VPlan *ParentPlan) {
185 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
186 Plan = ParentPlan;
187}
188
189/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
191 const VPBlockBase *Block = this;
193 Block = Region->getExiting();
195}
196
203
205 if (!Successors.empty() || !Parent)
206 return this;
207 assert(Parent->getExiting() == this &&
208 "Block w/o successors not the exiting block of its parent.");
209 return Parent->getEnclosingBlockWithSuccessors();
210}
211
213 if (!Predecessors.empty() || !Parent)
214 return this;
215 assert(Parent->getEntry() == this &&
216 "Block w/o predecessors not the entry of its parent.");
217 return Parent->getEnclosingBlockWithPredecessors();
218}
219
221 const VPDominatorTree &VPDT) {
222 auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
223 if (!VPBB)
224 return false;
225
226 // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
227 // VPBB as its entry, i.e., free of predecessors.
228 if (auto *R = VPBB->getParent())
229 return !R->isReplicator() && !VPBB->hasPredecessors();
230
231 // A header dominates its second predecessor (the latch), with the other
232 // predecessor being the preheader
233 return VPB->getPredecessors().size() == 2 &&
234 VPDT.dominates(VPB, VPB->getPredecessors()[1]);
235}
236
238 const VPDominatorTree &VPDT) {
239 // A latch has a header as its second successor, with its other successor
240 // leaving the loop. A preheader OTOH has a header as its first (and only)
241 // successor.
242 return VPB->getNumSuccessors() == 2 &&
243 VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
244}
245
247 iterator It = begin();
248 while (It != end() && It->isPhi())
249 It++;
250 return It;
251}
252
260
262 if (Def->isLiveIn())
263 return Def->getLiveInIRValue();
264
265 if (hasScalarValue(Def, Lane))
266 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
267
268 if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) &&
270 return Data.VPV2Scalars[Def][0];
271 }
272
273 // Look through BuildVector to avoid redundant extracts.
274 // TODO: Remove once replicate regions are unrolled explicitly.
275 if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
276 auto *BuildVector = cast<VPInstruction>(Def);
277 return get(BuildVector->getOperand(Lane.getKnownLane()), true);
278 }
279
281 auto *VecPart = Data.VPV2Vector[Def];
282 if (!VecPart->getType()->isVectorTy()) {
283 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
284 return VecPart;
285 }
286 // TODO: Cache created scalar values.
287 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
288 auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
289 // set(Def, Extract, Instance);
290 return Extract;
291}
292
293Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
294 if (NeedsScalar) {
295 assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) ||
297 (hasScalarValue(Def, VPLane(0)) &&
298 Data.VPV2Scalars[Def].size() == 1)) &&
299 "Trying to access a single scalar per part but has multiple scalars "
300 "per part.");
301 return get(Def, VPLane(0));
302 }
303
304 // If Values have been set for this Def return the one relevant for \p Part.
305 if (hasVectorValue(Def))
306 return Data.VPV2Vector[Def];
307
308 auto GetBroadcastInstrs = [this](Value *V) {
309 if (VF.isScalar())
310 return V;
311 // Broadcast the scalar into all locations in the vector.
312 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
313 return Shuf;
314 };
315
316 if (!hasScalarValue(Def, {0})) {
317 assert(Def->isLiveIn() && "expected a live-in");
318 Value *IRV = Def->getLiveInIRValue();
319 Value *B = GetBroadcastInstrs(IRV);
320 set(Def, B);
321 return B;
322 }
323
324 Value *ScalarValue = get(Def, VPLane(0));
325 // If we aren't vectorizing, we can just copy the scalar map values over
326 // to the vector map.
327 if (VF.isScalar()) {
328 set(Def, ScalarValue);
329 return ScalarValue;
330 }
331
332 bool IsSingleScalar = vputils::isSingleScalar(Def);
333
334 VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
335 // Check if there is a scalar value for the selected lane.
336 if (!hasScalarValue(Def, LastLane)) {
337 // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
338 // VPExpandSCEVRecipes can also be a single scalar.
340 VPExpandSCEVRecipe>(Def->getDefiningRecipe())) &&
341 "unexpected recipe found to be invariant");
342 IsSingleScalar = true;
343 LastLane = 0;
344 }
345
346 // We need to construct the vector value for a single-scalar value by
347 // broadcasting the scalar to all lanes.
348 // TODO: Replace by introducing Broadcast VPInstructions.
349 assert(IsSingleScalar && "must be a single-scalar at this point");
350 // Set the insert point after the last scalarized instruction or after the
351 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
352 // will directly follow the scalar definitions.
353 auto OldIP = Builder.saveIP();
354 auto *LastInst = cast<Instruction>(get(Def, LastLane));
355 auto NewIP = isa<PHINode>(LastInst)
356 ? LastInst->getParent()->getFirstNonPHIIt()
357 : std::next(BasicBlock::iterator(LastInst));
358 Builder.SetInsertPoint(&*NewIP);
359 Value *VectorValue = GetBroadcastInstrs(ScalarValue);
360 set(Def, VectorValue);
361 Builder.restoreIP(OldIP);
362 return VectorValue;
363}
364
366 const DILocation *DIL = DL;
367 // When a FSDiscriminator is enabled, we don't need to add the multiply
368 // factors to the discriminators.
369 if (DIL &&
370 Builder.GetInsertBlock()
371 ->getParent()
372 ->shouldEmitDebugInfoForProfiling() &&
374 // FIXME: For scalable vectors, assume vscale=1.
375 unsigned UF = Plan->getUF();
376 auto NewDIL =
378 if (NewDIL)
379 Builder.SetCurrentDebugLocation(*NewDIL);
380 else
381 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
382 << DIL->getFilename() << " Line: " << DIL->getLine());
383 } else
384 Builder.SetCurrentDebugLocation(DL);
385}
386
388 Value *WideValue,
389 const VPLane &Lane) {
390 Value *ScalarInst = get(Def, Lane);
391 Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF);
392 if (auto *StructTy = dyn_cast<StructType>(WideValue->getType())) {
393 // We must handle each element of a vectorized struct type.
394 for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) {
395 Value *ScalarValue = Builder.CreateExtractValue(ScalarInst, I);
396 Value *VectorValue = Builder.CreateExtractValue(WideValue, I);
397 VectorValue =
398 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
399 WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I);
400 }
401 } else {
402 WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
403 }
404 return WideValue;
405}
406
407BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
408 auto &CFG = State.CFG;
409 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
410 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
411 BasicBlock *PrevBB = CFG.PrevBB;
412 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
413 PrevBB->getParent(), CFG.ExitBB);
414 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
415
416 return NewBB;
417}
418
420 auto &CFG = State.CFG;
421 BasicBlock *NewBB = CFG.VPBB2IRBB[this];
422
423 // Register NewBB in its loop. In innermost loops its the same for all
424 // BB's.
425 Loop *ParentLoop = State.CurrentParentLoop;
426 // If this block has a sole successor that is an exit block or is an exit
427 // block itself then it needs adding to the same parent loop as the exit
428 // block.
429 VPBlockBase *SuccOrExitVPB = getSingleSuccessor();
430 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this;
431 if (State.Plan->isExitBlock(SuccOrExitVPB)) {
432 ParentLoop = State.LI->getLoopFor(
433 cast<VPIRBasicBlock>(SuccOrExitVPB)->getIRBasicBlock());
434 }
435
436 if (ParentLoop && !State.LI->getLoopFor(NewBB))
437 ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
438
440 if (VPBlockUtils::isHeader(this, State.VPDT)) {
441 // There's no block for the latch yet, connect to the preheader only.
442 Preds = {getPredecessors()[0]};
443 } else {
444 Preds = to_vector(getPredecessors());
445 }
446
447 // Hook up the new basic block to its predecessors.
448 for (VPBlockBase *PredVPBlock : Preds) {
449 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
450 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
451 assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
452 "Predecessor basic-block not found building successor.");
453 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
454 auto *PredBBTerminator = PredBB->getTerminator();
455 LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n');
456
457 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
458 if (isa<UnreachableInst>(PredBBTerminator)) {
459 assert(PredVPSuccessors.size() == 1 &&
460 "Predecessor ending w/o branch must have single successor.");
461 DebugLoc DL = PredBBTerminator->getDebugLoc();
462 PredBBTerminator->eraseFromParent();
463 auto *Br = BranchInst::Create(NewBB, PredBB);
464 Br->setDebugLoc(DL);
465 } else if (TermBr && !TermBr->isConditional()) {
466 TermBr->setSuccessor(0, NewBB);
467 } else {
468 // Set each forward successor here when it is created, excluding
469 // backedges. A backward successor is set when the branch is created.
470 // Branches to VPIRBasicBlocks must have the same successors in VPlan as
471 // in the original IR, except when the predecessor is the entry block.
472 // This enables including SCEV and memory runtime check blocks in VPlan.
473 // TODO: Remove exception by modeling the terminator of entry block using
474 // BranchOnCond.
475 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
476 assert((TermBr && (!TermBr->getSuccessor(idx) ||
477 (isa<VPIRBasicBlock>(this) &&
478 (TermBr->getSuccessor(idx) == NewBB ||
479 PredVPBlock == getPlan()->getEntry())))) &&
480 "Trying to reset an existing successor block.");
481 TermBr->setSuccessor(idx, NewBB);
482 }
483 CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
484 }
485}
486
489 "VPIRBasicBlock can have at most two successors at the moment!");
490 // Move completely disconnected blocks to their final position.
491 if (IRBB->hasNPredecessors(0) && succ_begin(IRBB) == succ_end(IRBB))
492 IRBB->moveAfter(State->CFG.PrevBB);
493 State->Builder.SetInsertPoint(IRBB->getTerminator());
494 State->CFG.PrevBB = IRBB;
495 State->CFG.VPBB2IRBB[this] = IRBB;
496 executeRecipes(State, IRBB);
497 // Create a branch instruction to terminate IRBB if one was not created yet
498 // and is needed.
499 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
500 auto *Br = State->Builder.CreateBr(IRBB);
501 Br->setOperand(0, nullptr);
502 IRBB->getTerminator()->eraseFromParent();
503 } else {
504 assert(
505 (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
506 "other blocks must be terminated by a branch");
507 }
508
509 connectToPredecessors(*State);
510}
511
512VPIRBasicBlock *VPIRBasicBlock::clone() {
513 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
514 for (VPRecipeBase &R : Recipes)
515 NewBlock->appendRecipe(R.clone());
516 return NewBlock;
517}
518
520 bool Replica = bool(State->Lane);
521 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
522
523 if (VPBlockUtils::isHeader(this, State->VPDT)) {
524 // Create and register the new vector loop.
525 Loop *PrevParentLoop = State->CurrentParentLoop;
526 State->CurrentParentLoop = State->LI->AllocateLoop();
527
528 // Insert the new loop into the loop nest and register the new basic blocks
529 // before calling any utilities such as SCEV that require valid LoopInfo.
530 if (PrevParentLoop)
531 PrevParentLoop->addChildLoop(State->CurrentParentLoop);
532 else
533 State->LI->addTopLevelLoop(State->CurrentParentLoop);
534 }
535
536 auto IsReplicateRegion = [](VPBlockBase *BB) {
538 assert((!R || R->isReplicator()) &&
539 "only replicate region blocks should remain");
540 return R;
541 };
542 // 1. Create an IR basic block.
543 if ((Replica && this == getParent()->getEntry()) ||
544 IsReplicateRegion(getSingleHierarchicalPredecessor())) {
545 // Reuse the previous basic block if the current VPBB is either
546 // * the entry to a replicate region, or
547 // * the exit of a replicate region.
548 State->CFG.VPBB2IRBB[this] = NewBB;
549 } else {
550 NewBB = createEmptyBasicBlock(*State);
551
552 State->Builder.SetInsertPoint(NewBB);
553 // Temporarily terminate with unreachable until CFG is rewired.
554 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
555 State->Builder.SetInsertPoint(Terminator);
556
557 State->CFG.PrevBB = NewBB;
558 State->CFG.VPBB2IRBB[this] = NewBB;
559 connectToPredecessors(*State);
560 }
561
562 // 2. Fill the IR basic block with IR instructions.
563 executeRecipes(State, NewBB);
564
565 // If this block is a latch, update CurrentParentLoop.
566 if (VPBlockUtils::isLatch(this, State->VPDT))
567 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
568}
569
570VPBasicBlock *VPBasicBlock::clone() {
571 auto *NewBlock = getPlan()->createVPBasicBlock(getName());
572 for (VPRecipeBase &R : *this)
573 NewBlock->appendRecipe(R.clone());
574 return NewBlock;
575}
576
578 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName()
579 << " in BB: " << BB->getName() << '\n');
580
581 State->CFG.PrevVPBB = this;
582
583 for (VPRecipeBase &Recipe : Recipes) {
584 State->setDebugLocFrom(Recipe.getDebugLoc());
585 Recipe.execute(*State);
586 }
587
588 LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB);
589}
590
591VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
592 assert((SplitAt == end() || SplitAt->getParent() == this) &&
593 "can only split at a position in the same block");
594
595 // Create new empty block after the block to split.
596 auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
598
599 // Finally, move the recipes starting at SplitAt to new block.
600 for (VPRecipeBase &ToMove :
601 make_early_inc_range(make_range(SplitAt, this->end())))
602 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
603
604 return SplitBlock;
605}
606
607/// Return the enclosing loop region for region \p P. The templated version is
608/// used to support both const and non-const block arguments.
609template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
610 if (P && P->isReplicator()) {
611 P = P->getParent();
612 // Multiple loop regions can be nested, but replicate regions can only be
613 // nested inside a loop region or must be outside any other region.
614 assert((!P || !P->isReplicator()) && "unexpected nested replicate regions");
615 }
616 return P;
617}
618
622
626
627static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
628 if (VPBB->empty()) {
629 assert(
630 VPBB->getNumSuccessors() < 2 &&
631 "block with multiple successors doesn't have a recipe as terminator");
632 return false;
633 }
634
635 const VPRecipeBase *R = &VPBB->back();
636 bool IsSwitch = isa<VPInstruction>(R) &&
637 cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
641 (void)IsCondBranch;
642 (void)IsSwitch;
643 if (VPBB->getNumSuccessors() == 2 ||
644 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
645 assert((IsCondBranch || IsSwitch) &&
646 "block with multiple successors not terminated by "
647 "conditional branch nor switch recipe");
648
649 return true;
650 }
651
652 if (VPBB->getNumSuccessors() > 2) {
653 assert(IsSwitch && "block with more than 2 successors not terminated by "
654 "a switch recipe");
655 return true;
656 }
657
658 assert(
659 !IsCondBranch &&
660 "block with 0 or 1 successors terminated by conditional branch recipe");
661 return false;
662}
663
665 if (hasConditionalTerminator(this))
666 return &back();
667 return nullptr;
668}
669
671 if (hasConditionalTerminator(this))
672 return &back();
673 return nullptr;
674}
675
677 return getParent() && getParent()->getExitingBasicBlock() == this;
678}
679
680#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
685
686void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
687 if (getSuccessors().empty()) {
688 O << Indent << "No successors\n";
689 } else {
690 O << Indent << "Successor(s): ";
691 ListSeparator LS;
692 for (auto *Succ : getSuccessors())
693 O << LS << Succ->getName();
694 O << '\n';
695 }
696}
697
698void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
699 VPSlotTracker &SlotTracker) const {
700 O << Indent << getName() << ":\n";
701
702 auto RecipeIndent = Indent + " ";
703 for (const VPRecipeBase &Recipe : *this) {
704 Recipe.print(O, RecipeIndent, SlotTracker);
705 O << '\n';
706 }
707
708 printSuccessors(O, Indent);
709}
710#endif
711
712static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
713
714// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
715// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
716// Remapping of operands must be done separately. Returns a pair with the new
717// entry and exiting blocks of the cloned region. If \p Entry isn't part of a
718// region, return nullptr for the exiting block.
719static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
721 VPBlockBase *Exiting = nullptr;
722 bool InRegion = Entry->getParent();
723 // First, clone blocks reachable from Entry.
724 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
725 VPBlockBase *NewBB = BB->clone();
726 Old2NewVPBlocks[BB] = NewBB;
727 if (InRegion && BB->getNumSuccessors() == 0) {
728 assert(!Exiting && "Multiple exiting blocks?");
729 Exiting = BB;
730 }
731 }
732 assert((!InRegion || Exiting) && "regions must have a single exiting block");
733
734 // Second, update the predecessors & successors of the cloned blocks.
735 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
736 VPBlockBase *NewBB = Old2NewVPBlocks[BB];
738 for (VPBlockBase *Pred : BB->getPredecessors()) {
739 NewPreds.push_back(Old2NewVPBlocks[Pred]);
740 }
741 NewBB->setPredecessors(NewPreds);
743 for (VPBlockBase *Succ : BB->successors()) {
744 NewSuccs.push_back(Old2NewVPBlocks[Succ]);
745 }
746 NewBB->setSuccessors(NewSuccs);
747 }
748
749#if !defined(NDEBUG)
750 // Verify that the order of predecessors and successors matches in the cloned
751 // version.
752 for (const auto &[OldBB, NewBB] :
754 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
755 for (const auto &[OldPred, NewPred] :
756 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
757 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
758
759 for (const auto &[OldSucc, NewSucc] :
760 zip(OldBB->successors(), NewBB->successors()))
761 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
762 }
763#endif
764
765 return std::make_pair(Old2NewVPBlocks[Entry],
766 Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
767}
768
769VPRegionBlock *VPRegionBlock::clone() {
770 const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
771 auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
772 getName(), isReplicator());
773 for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
774 Block->setParent(NewRegion);
775 return NewRegion;
776}
777
780 "Loop regions should have been lowered to plain CFG");
781 assert(!State->Lane && "Replicating a Region with non-null instance.");
782 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
783
785 Entry);
786 State->Lane = VPLane(0);
787 for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
788 State->Lane = VPLane(Lane, VPLane::Kind::First);
789 // Visit the VPBlocks connected to \p this, starting from it.
790 for (VPBlockBase *Block : RPOT) {
791 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
792 Block->execute(State);
793 }
794 }
795
796 // Exit replicating mode.
797 State->Lane.reset();
798}
799
802 for (VPRecipeBase &R : Recipes)
803 Cost += R.cost(VF, Ctx);
804 return Cost;
805}
806
807const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
808 const VPBlockBase *Pred = nullptr;
809 if (hasPredecessors()) {
810 Pred = getPredecessors()[Idx];
811 } else {
812 auto *Region = getParent();
813 assert(Region && !Region->isReplicator() && Region->getEntry() == this &&
814 "must be in the entry block of a non-replicate region");
815 assert(Idx < 2 && Region->getNumPredecessors() == 1 &&
816 "loop region has a single predecessor (preheader), its entry block "
817 "has 2 incoming blocks");
818
819 // Idx == 0 selects the predecessor of the region, Idx == 1 selects the
820 // region itself whose exiting block feeds the phi across the backedge.
821 Pred = Idx == 0 ? Region->getSinglePredecessor() : Region;
822 }
823 return Pred->getExitingBasicBlock();
824}
825
827 if (!isReplicator()) {
830 Cost += Block->cost(VF, Ctx);
831 InstructionCost BackedgeCost =
832 ForceTargetInstructionCost.getNumOccurrences()
833 ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences())
834 : Ctx.TTI.getCFInstrCost(Instruction::Br, Ctx.CostKind);
835 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
836 << ": vector loop backedge\n");
837 Cost += BackedgeCost;
838 return Cost;
839 }
840
841 // Compute the cost of a replicate region. Replicating isn't supported for
842 // scalable vectors, return an invalid cost for them.
843 // TODO: Discard scalable VPlans with replicate recipes earlier after
844 // construction.
845 if (VF.isScalable())
847
848 // First compute the cost of the conditionally executed recipes, followed by
849 // account for the branching cost, except if the mask is a header mask or
850 // uniform condition.
851 using namespace llvm::VPlanPatternMatch;
853 InstructionCost ThenCost = Then->cost(VF, Ctx);
854
855 // For the scalar case, we may not always execute the original predicated
856 // block, Thus, scale the block's cost by the probability of executing it.
857 if (VF.isScalar())
858 return ThenCost / getPredBlockCostDivisor(Ctx.CostKind);
859
860 return ThenCost;
861}
862
863#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
865 VPSlotTracker &SlotTracker) const {
866 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
867 auto NewIndent = Indent + " ";
868 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
869 O << '\n';
870 BlockBase->print(O, NewIndent, SlotTracker);
871 }
872 O << Indent << "}\n";
873
874 printSuccessors(O, Indent);
875}
876#endif
877
879 auto *Header = cast<VPBasicBlock>(getEntry());
880 if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
881 assert(this == getPlan()->getVectorLoopRegion() &&
882 "Canonical IV must be in the entry of the top-level loop region");
883 auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
884 {CanIV->getStartValue(), CanIV->getBackedgeValue()},
885 CanIV->getDebugLoc(), "index");
886 CanIV->replaceAllUsesWith(ScalarR);
887 CanIV->eraseFromParent();
888 }
889
890 VPBlockBase *Preheader = getSinglePredecessor();
891 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
893 VPBlockUtils::disconnectBlocks(Preheader, this);
894 VPBlockUtils::disconnectBlocks(this, Middle);
895
896 for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
897 VPB->setParent(getParent());
898
899 VPBlockUtils::connectBlocks(Preheader, Header);
900 VPBlockUtils::connectBlocks(ExitingLatch, Middle);
901 VPBlockUtils::connectBlocks(ExitingLatch, Header);
902}
903
904VPlan::VPlan(Loop *L) {
905 setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
906 ScalarHeader = createVPIRBasicBlock(L->getHeader());
907
908 SmallVector<BasicBlock *> IRExitBlocks;
909 L->getUniqueExitBlocks(IRExitBlocks);
910 for (BasicBlock *EB : IRExitBlocks)
911 ExitBlocks.push_back(createVPIRBasicBlock(EB));
912}
913
915 VPValue DummyValue;
916
917 for (auto *VPB : CreatedBlocks) {
918 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
919 // Replace all operands of recipes and all VPValues defined in VPBB with
920 // DummyValue so the block can be deleted.
921 for (VPRecipeBase &R : *VPBB) {
922 for (auto *Def : R.definedValues())
923 Def->replaceAllUsesWith(&DummyValue);
924
925 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
926 R.setOperand(I, &DummyValue);
927 }
928 }
929 delete VPB;
930 }
931 for (VPValue *VPV : getLiveIns())
932 delete VPV;
933 if (BackedgeTakenCount)
934 delete BackedgeTakenCount;
935}
936
938 auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
939 return VPIRBB->getIRBasicBlock() == IRBB;
940 });
941 assert(Iter != getExitBlocks().end() && "no exit block found");
942 return *Iter;
943}
944
946 return is_contained(ExitBlocks, VPBB);
947}
948
949/// Generate the code inside the preheader and body of the vectorized loop.
950/// Assumes a single pre-header basic-block was created for this. Introduce
951/// additional basic-blocks as needed, and fill them all.
953 // Initialize CFG state.
954 State->CFG.PrevVPBB = nullptr;
955 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
956
957 // Update VPDominatorTree since VPBasicBlock may be removed after State was
958 // constructed.
959 State->VPDT.recalculate(*this);
960
961 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
962 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
963 cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
964 State->CFG.DTU.applyUpdates(
965 {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
966
967 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
968 << ", UF=" << getUF() << '\n');
969 setName("Final VPlan");
970 LLVM_DEBUG(dump());
971
972 BasicBlock *ScalarPh = State->CFG.ExitBB;
973 VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
974 if (ScalarPhVPBB->hasPredecessors()) {
975 // Disconnect scalar preheader and scalar header, as the dominator tree edge
976 // will be updated as part of VPlan execution. This allows keeping the DTU
977 // logic generic during VPlan execution.
978 State->CFG.DTU.applyUpdates(
979 {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
980 } else {
981 Loop *OrigLoop =
982 State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
983 // If the original loop is unreachable, we need to delete it.
984 auto Blocks = OrigLoop->getBlocksVector();
985 Blocks.push_back(cast<VPIRBasicBlock>(ScalarPhVPBB)->getIRBasicBlock());
986 for (auto *BB : Blocks)
987 State->LI->removeBlock(BB);
988 State->LI->erase(OrigLoop);
989 }
990
992 Entry);
993 // Generate code for the VPlan, in parts of the vector skeleton, loop body and
994 // successor blocks including the middle, exit and scalar preheader blocks.
995 for (VPBlockBase *Block : RPOT)
996 Block->execute(State);
997
998 State->CFG.DTU.flush();
999
1000 VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
1001 if (!Header)
1002 return;
1003
1004 auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
1005 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1006
1007 // Fix the latch value of canonical, reduction and first-order recurrences
1008 // phis in the vector loop.
1009 for (VPRecipeBase &R : Header->phis()) {
1010 // Skip phi-like recipes that generate their backedege values themselves.
1011 if (isa<VPWidenPHIRecipe>(&R))
1012 continue;
1013
1014 auto *PhiR = cast<VPSingleDefRecipe>(&R);
1015 // VPInstructions currently model scalar Phis only.
1016 bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1018 cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1019
1020 Value *Phi = State->get(PhiR, NeedsScalar);
1021 // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1022 // not.
1023 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1024 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1025 }
1026}
1027
1029 // For now only return the cost of the vector loop region, ignoring any other
1030 // blocks, like the preheader or middle blocks, expect for checking them for
1031 // recipes with invalid costs.
1033
1034 // If the cost of the loop region is invalid or any recipe in the skeleton
1035 // outside loop regions are invalid return an invalid cost.
1038 [&VF, &Ctx](VPBasicBlock *VPBB) {
1039 return !VPBB->cost(VF, Ctx).isValid();
1040 }))
1042
1043 return Cost;
1044}
1045
1047 // TODO: Cache if possible.
1049 if (auto *R = dyn_cast<VPRegionBlock>(B))
1050 return R->isReplicator() ? nullptr : R;
1051 return nullptr;
1052}
1053
1056 if (auto *R = dyn_cast<VPRegionBlock>(B))
1057 return R->isReplicator() ? nullptr : R;
1058 return nullptr;
1059}
1060
1061#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1064
1065 if (VF.getNumUsers() > 0) {
1066 O << "\nLive-in ";
1067 VF.printAsOperand(O, SlotTracker);
1068 O << " = VF";
1069 }
1070
1071 if (VFxUF.getNumUsers() > 0) {
1072 O << "\nLive-in ";
1073 VFxUF.printAsOperand(O, SlotTracker);
1074 O << " = VF * UF";
1075 }
1076
1077 if (VectorTripCount.getNumUsers() > 0) {
1078 O << "\nLive-in ";
1079 VectorTripCount.printAsOperand(O, SlotTracker);
1080 O << " = vector-trip-count";
1081 }
1082
1083 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1084 O << "\nLive-in ";
1085 BackedgeTakenCount->printAsOperand(O, SlotTracker);
1086 O << " = backedge-taken count";
1087 }
1088
1089 O << "\n";
1090 if (TripCount) {
1091 if (TripCount->isLiveIn())
1092 O << "Live-in ";
1093 TripCount->printAsOperand(O, SlotTracker);
1094 O << " = original trip-count";
1095 O << "\n";
1096 }
1097}
1098
1102
1103 O << "VPlan '" << getName() << "' {";
1104
1105 printLiveIns(O);
1106
1108 RPOT(getEntry());
1109 for (const VPBlockBase *Block : RPOT) {
1110 O << '\n';
1111 Block->print(O, "", SlotTracker);
1112 }
1113
1114 O << "}\n";
1115}
1116
1117std::string VPlan::getName() const {
1118 std::string Out;
1119 raw_string_ostream RSO(Out);
1120 RSO << Name << " for ";
1121 if (!VFs.empty()) {
1122 RSO << "VF={" << VFs[0];
1123 for (ElementCount VF : drop_begin(VFs))
1124 RSO << "," << VF;
1125 RSO << "},";
1126 }
1127
1128 if (UFs.empty()) {
1129 RSO << "UF>=1";
1130 } else {
1131 RSO << "UF={" << UFs[0];
1132 for (unsigned UF : drop_begin(UFs))
1133 RSO << "," << UF;
1134 RSO << "}";
1135 }
1136
1137 return Out;
1138}
1139
1142 VPlanPrinter Printer(O, *this);
1143 Printer.dump();
1144}
1145
1147void VPlan::dump() const { print(dbgs()); }
1148#endif
1149
1150static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1151 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1152 // Update the operands of all cloned recipes starting at NewEntry. This
1153 // traverses all reachable blocks. This is done in two steps, to handle cycles
1154 // in PHI recipes.
1156 OldDeepRPOT(Entry);
1158 NewDeepRPOT(NewEntry);
1159 // First, collect all mappings from old to new VPValues defined by cloned
1160 // recipes.
1161 for (const auto &[OldBB, NewBB] :
1164 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1165 "blocks must have the same number of recipes");
1166 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1167 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1168 "recipes must have the same number of operands");
1169 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1170 "recipes must define the same number of operands");
1171 for (const auto &[OldV, NewV] :
1172 zip(OldR.definedValues(), NewR.definedValues()))
1173 Old2NewVPValues[OldV] = NewV;
1174 }
1175 }
1176
1177 // Update all operands to use cloned VPValues.
1178 for (VPBasicBlock *NewBB :
1180 for (VPRecipeBase &NewR : *NewBB)
1181 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1182 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1183 NewR.setOperand(I, NewOp);
1184 }
1185 }
1186}
1187
1189 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1190 // Clone blocks.
1191 const auto &[NewEntry, __] = cloneFrom(Entry);
1192
1193 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1194 VPIRBasicBlock *NewScalarHeader = nullptr;
1195 if (getScalarHeader()->hasPredecessors()) {
1196 NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
1197 vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
1198 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1199 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1200 }));
1201 } else {
1202 NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
1203 }
1204 // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1205 auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1206 DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1207 for (VPValue *OldLiveIn : getLiveIns()) {
1208 Old2NewVPValues[OldLiveIn] =
1209 NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
1210 }
1211 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1212 Old2NewVPValues[&VF] = &NewPlan->VF;
1213 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1214 if (BackedgeTakenCount) {
1215 NewPlan->BackedgeTakenCount = new VPValue();
1216 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1217 }
1218 if (TripCount && TripCount->isLiveIn())
1219 Old2NewVPValues[TripCount] =
1220 NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue());
1221 // else NewTripCount will be created and inserted into Old2NewVPValues when
1222 // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1223
1224 remapOperands(Entry, NewEntry, Old2NewVPValues);
1225
1226 // Initialize remaining fields of cloned VPlan.
1227 NewPlan->VFs = VFs;
1228 NewPlan->UFs = UFs;
1229 // TODO: Adjust names.
1230 NewPlan->Name = Name;
1231 if (TripCount) {
1232 assert(Old2NewVPValues.contains(TripCount) &&
1233 "TripCount must have been added to Old2NewVPValues");
1234 NewPlan->TripCount = Old2NewVPValues[TripCount];
1235 }
1236
1237 // Transfer all cloned blocks (the second half of all current blocks) from
1238 // current to new VPlan.
1239 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1240 for (unsigned I :
1241 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1242 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
1243 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1244
1245 // Update ExitBlocks of the new plan.
1246 for (VPBlockBase *VPB : NewPlan->CreatedBlocks) {
1247 if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(VPB) &&
1248 VPB != NewScalarHeader)
1249 NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
1250 }
1251
1252 return NewPlan;
1253}
1254
1256 auto *VPIRBB = new VPIRBasicBlock(IRBB);
1257 CreatedBlocks.push_back(VPIRBB);
1258 return VPIRBB;
1259}
1260
1262 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1263 for (Instruction &I :
1264 make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
1265 VPIRBB->appendRecipe(VPIRInstruction::create(I));
1266 return VPIRBB;
1267}
1268
1269#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1270
1271Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1272 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1273 Twine(getOrCreateBID(Block));
1274}
1275
1276Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1277 const std::string &Name = Block->getName();
1278 if (!Name.empty())
1279 return Name;
1280 return "VPB" + Twine(getOrCreateBID(Block));
1281}
1282
1284 Depth = 1;
1285 bumpIndent(0);
1286 OS << "digraph VPlan {\n";
1287 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1288 if (!Plan.getName().empty())
1289 OS << "\\n" << DOT::EscapeString(Plan.getName());
1290
1291 {
1292 // Print live-ins.
1293 std::string Str;
1294 raw_string_ostream SS(Str);
1295 Plan.printLiveIns(SS);
1297 StringRef(Str).rtrim('\n').split(Lines, "\n");
1298 for (auto Line : Lines)
1299 OS << DOT::EscapeString(Line.str()) << "\\n";
1300 }
1301
1302 OS << "\"]\n";
1303 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1304 OS << "edge [fontname=Courier, fontsize=30]\n";
1305 OS << "compound=true\n";
1306
1307 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1308 dumpBlock(Block);
1309
1310 OS << "}\n";
1311}
1312
1313void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1315 dumpBasicBlock(BasicBlock);
1317 dumpRegion(Region);
1318 else
1319 llvm_unreachable("Unsupported kind of VPBlock.");
1320}
1321
1322void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1323 bool Hidden, const Twine &Label) {
1324 // Due to "dot" we print an edge between two regions as an edge between the
1325 // exiting basic block and the entry basic of the respective regions.
1326 const VPBlockBase *Tail = From->getExitingBasicBlock();
1327 const VPBlockBase *Head = To->getEntryBasicBlock();
1328 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1329 OS << " [ label=\"" << Label << '\"';
1330 if (Tail != From)
1331 OS << " ltail=" << getUID(From);
1332 if (Head != To)
1333 OS << " lhead=" << getUID(To);
1334 if (Hidden)
1335 OS << "; splines=none";
1336 OS << "]\n";
1337}
1338
1339void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1340 auto &Successors = Block->getSuccessors();
1341 if (Successors.size() == 1)
1342 drawEdge(Block, Successors.front(), false, "");
1343 else if (Successors.size() == 2) {
1344 drawEdge(Block, Successors.front(), false, "T");
1345 drawEdge(Block, Successors.back(), false, "F");
1346 } else {
1347 unsigned SuccessorNumber = 0;
1348 for (auto *Successor : Successors)
1349 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1350 }
1351}
1352
1353void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1354 // Implement dot-formatted dump by performing plain-text dump into the
1355 // temporary storage followed by some post-processing.
1356 OS << Indent << getUID(BasicBlock) << " [label =\n";
1357 bumpIndent(1);
1358 std::string Str;
1359 raw_string_ostream SS(Str);
1360 // Use no indentation as we need to wrap the lines into quotes ourselves.
1361 BasicBlock->print(SS, "", SlotTracker);
1362
1363 // We need to process each line of the output separately, so split
1364 // single-string plain-text dump.
1366 StringRef(Str).rtrim('\n').split(Lines, "\n");
1367
1368 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1369 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1370 };
1371
1372 // Don't need the "+" after the last line.
1373 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1374 EmitLine(Line, " +\n");
1375 EmitLine(Lines.back(), "\n");
1376
1377 bumpIndent(-1);
1378 OS << Indent << "]\n";
1379
1380 dumpEdges(BasicBlock);
1381}
1382
1383void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1384 OS << Indent << "subgraph " << getUID(Region) << " {\n";
1385 bumpIndent(1);
1386 OS << Indent << "fontname=Courier\n"
1387 << Indent << "label=\""
1388 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1389 << DOT::EscapeString(Region->getName()) << "\"\n";
1390 // Dump the blocks of the region.
1391 assert(Region->getEntry() && "Region contains no inner blocks.");
1392 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1393 dumpBlock(Block);
1394 bumpIndent(-1);
1395 OS << Indent << "}\n";
1396 dumpEdges(Region);
1397}
1398
1399#endif
1400
1401/// Returns true if there is a vector loop region and \p VPV is defined in a
1402/// loop region.
1403static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1404 const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1405 return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1407}
1408
1413 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1414}
1415
1417 VPValue *New,
1418 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1419 // Note that this early exit is required for correctness; the implementation
1420 // below relies on the number of users for this VPValue to decrease, which
1421 // isn't the case if this == New.
1422 if (this == New)
1423 return;
1424
1425 for (unsigned J = 0; J < getNumUsers();) {
1426 VPUser *User = Users[J];
1427 bool RemovedUser = false;
1428 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1429 if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1430 continue;
1431
1432 RemovedUser = true;
1433 User->setOperand(I, New);
1434 }
1435 // If a user got removed after updating the current user, the next user to
1436 // update will be moved to the current position, so we only need to
1437 // increment the index if the number of users did not change.
1438 if (!RemovedUser)
1439 J++;
1440 }
1441}
1442
1444 for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
1445 if (getOperand(Idx) == From)
1446 setOperand(Idx, To);
1447 }
1448}
1449
1450#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1452 OS << Tracker.getOrCreateName(this);
1453}
1454
1457 Op->printAsOperand(O, SlotTracker);
1458 });
1459}
1460#endif
1461
1462void VPSlotTracker::assignName(const VPValue *V) {
1463 assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1464 auto *UV = V->getUnderlyingValue();
1465 auto *VPI = dyn_cast_or_null<VPInstruction>(V->getDefiningRecipe());
1466 if (!UV && !(VPI && !VPI->getName().empty())) {
1467 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1468 NextSlot++;
1469 return;
1470 }
1471
1472 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1473 // appending ".Number" to the name if there are multiple uses.
1474 std::string Name;
1475 if (UV)
1476 Name = getName(UV);
1477 else
1478 Name = VPI->getName();
1479
1480 assert(!Name.empty() && "Name cannot be empty.");
1481 StringRef Prefix = UV ? "ir<" : "vp<%";
1482 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1483
1484 // First assign the base name for V.
1485 const auto &[A, _] = VPValue2Name.try_emplace(V, BaseName);
1486 // Integer or FP constants with different types will result in he same string
1487 // due to stripping types.
1488 if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV))
1489 return;
1490
1491 // If it is already used by C > 0 other VPValues, increase the version counter
1492 // C and use it for V.
1493 const auto &[C, UseInserted] = BaseName2Version.try_emplace(BaseName, 0);
1494 if (!UseInserted) {
1495 C->second++;
1496 A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1497 }
1498}
1499
1500void VPSlotTracker::assignNames(const VPlan &Plan) {
1501 if (Plan.VF.getNumUsers() > 0)
1502 assignName(&Plan.VF);
1503 if (Plan.VFxUF.getNumUsers() > 0)
1504 assignName(&Plan.VFxUF);
1505 assignName(&Plan.VectorTripCount);
1506 if (Plan.BackedgeTakenCount)
1507 assignName(Plan.BackedgeTakenCount);
1508 for (VPValue *LI : Plan.getLiveIns())
1509 assignName(LI);
1510
1511 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1512 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1513 for (const VPBasicBlock *VPBB :
1515 assignNames(VPBB);
1516}
1517
1518void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1519 for (const VPRecipeBase &Recipe : *VPBB)
1520 for (VPValue *Def : Recipe.definedValues())
1521 assignName(Def);
1522}
1523
1524std::string VPSlotTracker::getName(const Value *V) {
1525 std::string Name;
1526 raw_string_ostream S(Name);
1527 if (V->hasName() || !isa<Instruction>(V)) {
1528 V->printAsOperand(S, false);
1529 return Name;
1530 }
1531
1532 if (!MST) {
1533 // Lazily create the ModuleSlotTracker when we first hit an unnamed
1534 // instruction.
1535 auto *I = cast<Instruction>(V);
1536 // This check is required to support unit tests with incomplete IR.
1537 if (I->getParent()) {
1538 MST = std::make_unique<ModuleSlotTracker>(I->getModule());
1539 MST->incorporateFunction(*I->getFunction());
1540 } else {
1541 MST = std::make_unique<ModuleSlotTracker>(nullptr);
1542 }
1543 }
1544 V->printAsOperand(S, false, *MST);
1545 return Name;
1546}
1547
1548std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1549 std::string Name = VPValue2Name.lookup(V);
1550 if (!Name.empty())
1551 return Name;
1552
1553 // If no name was assigned, no VPlan was provided when creating the slot
1554 // tracker or it is not reachable from the provided VPlan. This can happen,
1555 // e.g. when trying to print a recipe that has not been inserted into a VPlan
1556 // in a debugger.
1557 // TODO: Update VPSlotTracker constructor to assign names to recipes &
1558 // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1559 // here.
1560 const VPRecipeBase *DefR = V->getDefiningRecipe();
1561 (void)DefR;
1562 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1563 "VPValue defined by a recipe in a VPlan?");
1564
1565 // Use the underlying value's name, if there is one.
1566 if (auto *UV = V->getUnderlyingValue()) {
1567 std::string Name;
1568 raw_string_ostream S(Name);
1569 UV->printAsOperand(S, false);
1570 return (Twine("ir<") + Name + ">").str();
1571 }
1572
1573 return "<badref>";
1574}
1575
1577 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1578 assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1579 bool PredicateAtRangeStart = Predicate(Range.Start);
1580
1581 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1582 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1583 Range.End = TmpVF;
1584 break;
1585 }
1586
1587 return PredicateAtRangeStart;
1588}
1589
1590/// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
1591/// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
1592/// of VF's starting at a given VF and extending it as much as possible. Each
1593/// vectorization decision can potentially shorten this sub-range during
1594/// buildVPlan().
1596 ElementCount MaxVF) {
1597 auto MaxVFTimes2 = MaxVF * 2;
1598 for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
1599 VFRange SubRange = {VF, MaxVFTimes2};
1600 if (auto Plan = tryToBuildVPlan(SubRange)) {
1602 // Update the name of the latch of the top-level vector loop region region
1603 // after optimizations which includes block folding.
1604 Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
1605 VPlans.push_back(std::move(Plan));
1606 }
1607 VF = SubRange.End;
1608 }
1609}
1610
1612 assert(count_if(VPlans,
1613 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1614 1 &&
1615 "Multiple VPlans for VF.");
1616
1617 for (const VPlanPtr &Plan : VPlans) {
1618 if (Plan->hasVF(VF))
1619 return *Plan.get();
1620 }
1621 llvm_unreachable("No plan found!");
1622}
1623
1626 // Reserve first location for self reference to the LoopID metadata node.
1627 MDs.push_back(nullptr);
1628 bool IsUnrollMetadata = false;
1629 MDNode *LoopID = L->getLoopID();
1630 if (LoopID) {
1631 // First find existing loop unrolling disable metadata.
1632 for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) {
1633 auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
1634 if (MD) {
1635 const auto *S = dyn_cast<MDString>(MD->getOperand(0));
1636 if (!S)
1637 continue;
1638 if (S->getString().starts_with("llvm.loop.unroll.runtime.disable"))
1639 continue;
1640 IsUnrollMetadata =
1641 S->getString().starts_with("llvm.loop.unroll.disable");
1642 }
1643 MDs.push_back(LoopID->getOperand(I));
1644 }
1645 }
1646
1647 if (!IsUnrollMetadata) {
1648 // Add runtime unroll disable metadata.
1649 LLVMContext &Context = L->getHeader()->getContext();
1650 SmallVector<Metadata *, 1> DisableOperands;
1651 DisableOperands.push_back(
1652 MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
1653 MDNode *DisableNode = MDNode::get(Context, DisableOperands);
1654 MDs.push_back(DisableNode);
1655 MDNode *NewLoopID = MDNode::get(Context, MDs);
1656 // Set operand 0 to refer to the loop id itself.
1657 NewLoopID->replaceOperandWith(0, NewLoopID);
1658 L->setLoopID(NewLoopID);
1659 }
1660}
1661
1663 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
1664 bool VectorizingEpilogue, MDNode *OrigLoopID,
1665 std::optional<unsigned> OrigAverageTripCount,
1666 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
1667 bool DisableRuntimeUnroll) {
1668 // Update the metadata of the scalar loop. Skip the update when vectorizing
1669 // the epilogue loop to ensure it is updated only once. Also skip the update
1670 // when the scalar loop became unreachable.
1671 if (Plan.getScalarPreheader()->hasPredecessors() && !VectorizingEpilogue) {
1672 std::optional<MDNode *> RemainderLoopID =
1675 if (RemainderLoopID) {
1676 OrigLoop->setLoopID(*RemainderLoopID);
1677 } else {
1678 if (DisableRuntimeUnroll)
1680
1681 LoopVectorizeHints Hints(OrigLoop, true, *ORE);
1682 Hints.setAlreadyVectorized();
1683 }
1684 }
1685
1686 if (!VectorLoop)
1687 return;
1688
1689 if (std::optional<MDNode *> VectorizedLoopID = makeFollowupLoopID(
1690 OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1692 VectorLoop->setLoopID(*VectorizedLoopID);
1693 } else {
1694 // Keep all loop hints from the original loop on the vector loop (we'll
1695 // replace the vectorizer-specific hints below).
1696 if (OrigLoopID)
1697 VectorLoop->setLoopID(OrigLoopID);
1698
1699 if (!VectorizingEpilogue) {
1700 LoopVectorizeHints Hints(VectorLoop, true, *ORE);
1701 Hints.setAlreadyVectorized();
1702 }
1703
1704 // Check if it's EVL-vectorized and mark the corresponding metadata.
1705 bool IsEVLVectorized =
1706 llvm::any_of(*HeaderVPBB, [](const VPRecipeBase &Recipe) {
1707 // Looking for the ExplictVectorLength VPInstruction.
1708 if (const auto *VI = dyn_cast<VPInstruction>(&Recipe))
1709 return VI->getOpcode() == VPInstruction::ExplicitVectorLength;
1710 return false;
1711 });
1712 if (IsEVLVectorized) {
1713 LLVMContext &Context = VectorLoop->getHeader()->getContext();
1714 MDNode *LoopID = VectorLoop->getLoopID();
1715 auto *IsEVLVectorizedMD = MDNode::get(
1716 Context,
1717 {MDString::get(Context, "llvm.loop.isvectorized.tailfoldingstyle"),
1718 MDString::get(Context, "evl")});
1719 MDNode *NewLoopID = makePostTransformationMetadata(Context, LoopID, {},
1720 {IsEVLVectorizedMD});
1721 VectorLoop->setLoopID(NewLoopID);
1722 }
1723 }
1725 TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
1726 if (!UP.UnrollVectorizedLoop || VectorizingEpilogue)
1728
1729 // Set/update profile weights for the vector and remainder loops as original
1730 // loop iterations are now distributed among them. Note that original loop
1731 // becomes the scalar remainder loop after vectorization.
1732 //
1733 // For cases like foldTailByMasking() and requiresScalarEpiloque() we may
1734 // end up getting slightly roughened result but that should be OK since
1735 // profile is not inherently precise anyway. Note also possible bypass of
1736 // vector code caused by legality checks is ignored, assigning all the weight
1737 // to the vector loop, optimistically.
1738 //
1739 // For scalable vectorization we can't know at compile time how many
1740 // iterations of the loop are handled in one vector iteration, so instead
1741 // use the value of vscale used for tuning.
1742 if (!OrigAverageTripCount)
1743 return;
1744 // Calculate number of iterations in unrolled loop.
1745 unsigned AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
1746 // Calculate number of iterations for remainder loop.
1747 unsigned RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
1748
1749 if (HeaderVPBB) {
1750 setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
1751 OrigLoopInvocationWeight);
1752 }
1753 if (Plan.getScalarPreheader()->hasPredecessors()) {
1754 setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
1755 OrigLoopInvocationWeight);
1756 }
1757}
1758
1759#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1761 if (VPlans.empty()) {
1762 O << "LV: No VPlans built.\n";
1763 return;
1764 }
1765 for (const auto &Plan : VPlans)
1767 Plan->printDOT(O);
1768 else
1769 Plan->print(O);
1770}
1771#endif
1772
1775 if (!V->isLiveIn())
1776 return {};
1777
1778 return TTI::getOperandInfo(V->getLiveInIRValue());
1779}
1780
1783 if (VF.isScalar())
1784 return 0;
1785
1786 InstructionCost ScalarizationCost = 0;
1787 // Compute the cost of scalarizing the result if needed.
1788 if (!ResultTy->isVoidTy()) {
1789 for (Type *VectorTy :
1790 to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) {
1791 ScalarizationCost += TTI.getScalarizationOverhead(
1793 /*Insert=*/true,
1794 /*Extract=*/false, CostKind);
1795 }
1796 }
1797 // Compute the cost of scalarizing the operands, skipping ones that do not
1798 // require extraction/scalarization and do not incur any overhead.
1799 SmallPtrSet<const VPValue *, 4> UniqueOperands;
1801 for (auto *Op : Operands) {
1803 !UniqueOperands.insert(Op).second)
1804 continue;
1805 Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));
1806 }
1807 return ScalarizationCost +
1808 TTI.getOperandsScalarizationOverhead(Tys, CostKind);
1809}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
dxil pretty DXIL Metadata Pretty Printer
Flatten the CFG
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file provides utility VPlan to VPlan transformations.
static void addRuntimeUnrollDisableMetaData(Loop *L)
Definition VPlan.cpp:1624
static T * getPlanEntry(T *Start)
Definition VPlan.cpp:145
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
Definition VPlan.cpp:609
const char LLVMLoopVectorizeFollowupAll[]
Definition VPlan.cpp:61
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
Definition VPlan.cpp:1403
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition VPlan.cpp:627
const char LLVMLoopVectorizeFollowupVectorized[]
Definition VPlan.cpp:62
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1150
const char LLVMLoopVectorizeFollowupEpilogue[]
Definition VPlan.cpp:64
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
Definition VPlan.cpp:719
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition DebugLoc.h:124
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:187
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:156
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
static InstructionCost getInvalid(CostType Val=0)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A helper class to return the specified delimiter string after the first invocation of operator String...
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1611
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1662
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1595
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1576
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1760
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:526
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition LoopInfo.cpp:502
Metadata node.
Definition Metadata.h:1077
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1445
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1565
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1451
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:607
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:104
void insert_range(Range &&R)
Definition SetVector.h:193
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:356
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition StringMap.h:370
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition StringRef.h:812
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3755
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:3830
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3782
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:519
iterator end()
Definition VPlan.h:3792
iterator begin()
Recipe iterator methods.
Definition VPlan.h:3790
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:570
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition VPlan.cpp:800
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:807
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:246
void connectToPredecessors(VPTransformState &State)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition VPlan.cpp:419
VPRegionBlock * getEnclosingLoopRegion()
Definition VPlan.cpp:619
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:591
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3770
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition VPlan.cpp:577
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition VPlan.cpp:698
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition VPlan.cpp:676
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:664
const VPRecipeBase & back() const
Definition VPlan.h:3804
bool empty() const
Definition VPlan.h:3801
size_t size() const
Definition VPlan.h:3800
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:190
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:223
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:686
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:212
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
VPlan * getPlan()
Definition VPlan.cpp:165
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:184
const std::string & getName() const
Definition VPlan.h:164
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:242
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:150
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:204
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:170
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:228
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:131
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
Definition VPlan.cpp:237
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
Definition VPlan.cpp:220
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:187
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:206
VPlan-based builder utility analogous to IRBuilder.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void dump() const
Dump the VPDef to stderr (for debugging).
Definition VPlan.cpp:126
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPDef prints itself.
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
Recipe to expand a SCEV expression.
Definition VPlan.h:3374
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:3908
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:487
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:3932
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:512
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition VPlan.cpp:85
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:394
VPBasicBlock * getParent()
Definition VPlan.h:415
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:3943
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:769
const VPBlockBase * getEntry() const
Definition VPlan.h:3979
void dissolveToCFGLoop()
Remove the current region from its VPlan, connecting its predecessor to its entry,...
Definition VPlan.cpp:878
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4011
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition VPlan.cpp:826
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:864
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition VPlan.cpp:778
const VPBlockBase * getExiting() const
Definition VPlan.h:3991
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:3645
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
Definition VPlan.cpp:1548
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:199
void replaceUsesOfWith(VPValue *From, VPValue *To)
Replaces all uses of From in the VPUser with To.
Definition VPlan.cpp:1443
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1455
operand_range operands()
Definition VPlanValue.h:267
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:243
unsigned getNumOperands() const
Definition VPlanValue.h:237
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:238
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1409
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:135
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1451
friend class VPDef
Definition VPlanValue.h:49
Value * UnderlyingVal
Definition VPlanValue.h:61
void dump() const
Dump the value to stderr (for debugging).
Definition VPlan.cpp:118
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:98
virtual ~VPValue()
Definition VPlan.cpp:104
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:111
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1412
unsigned getNumUsers() const
Definition VPlanValue.h:113
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1416
VPDef * Def
Pointer to the VPDef that defines this VPValue.
Definition VPlanValue.h:65
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2108
LLVM_DUMP_METHOD void dump()
Definition VPlan.cpp:1283
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4046
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1141
friend class VPSlotTracker
Definition VPlan.h:4048
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1117
VPBasicBlock * getEntry()
Definition VPlan.h:4145
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
Definition VPlan.h:4386
void setName(const Twine &newName)
Definition VPlan.h:4293
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:937
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:914
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:945
friend class VPlanPrinter
Definition VPlan.h:4047
unsigned getUF() const
Definition VPlan.h:4275
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1255
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4197
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1046
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1028
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4134
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4376
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1261
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1147
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4188
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:952
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4327
void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1100
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4193
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1062
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1188
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:130
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::string EscapeString(const std::string &Label)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
bool match(Val *V, const Pattern &P)
VPInstruction_match< VPInstruction::BranchOnCount, Op0_t, Op1_t > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond, Op0_t > m_BranchOnCond(const Op0_t &Op0)
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
Definition VPlanUtils.h:44
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2211
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
auto cast_or_null(const Y &Val)
Definition Casting.h:720
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
LLVM_ABI cl::opt< bool > EnableFSDiscriminator
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1941
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
cl::opt< bool > EnableVPlanNativePath
Definition VPlan.cpp:56
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
unsigned getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind)
A helper function that returns how much we should divide the cost of a predicated block by.
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
Parameters that control the generic loop unrolling transformation.
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
ElementCount End
Struct to hold various analysis needed for cost computations.
InstructionCost getScalarizationOverhead(Type *ResultTy, ArrayRef< const VPValue * > Operands, ElementCount VF)
Estimate the overhead of scalarizing a recipe with result type ResultTy and Operands with VF.
Definition VPlan.cpp:1781
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition VPlan.cpp:1774
TargetTransformInfo::TargetCostKind CostKind
VPTypeAnalysis Types
const TargetTransformInfo & TTI
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
struct llvm::VPTransformState::DataState Data
struct llvm::VPTransformState::CFGState CFG
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:293
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy)
Definition VPlan.cpp:253
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
bool hasScalarValue(const VPValue *Def, VPLane Lane)
const TargetTransformInfo * TTI
Target Transform Info.
VPlan * Plan
Pointer to the VPlan code is generated for.
void set(const VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
bool hasVectorValue(const VPValue *Def)
VPDominatorTree VPDT
VPlan-based dominator tree.
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Value * packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue, const VPLane &Lane)
Insert the scalar value of Def at Lane into Lane of WideValue and return the resulting value.
Definition VPlan.cpp:387
AssumptionCache * AC
Hold a pointer to AssumptionCache to register new assumptions after replicating assume calls.
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition VPlan.cpp:365
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
static void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...