63 "llvm.loop.vectorize.followup_vectorized";
65 "llvm.loop.vectorize.followup_epilogue";
72 cl::desc(
"Use dot format instead of plain text when dumping VPlans"));
74#define DEBUG_TYPE "loop-vectorize"
76#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
90 return Builder.CreateSub(
getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
93 return Builder.getInt32(Lane);
101 Def->addDefinedValue(
this);
105 assert(Users.empty() &&
"trying to delete a VPValue with remaining users");
107 Def->removeDefinedValue(
this);
110#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
121 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() :
nullptr);
129 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() :
nullptr);
154 for (
unsigned i = 0; i < WorkList.
size(); i++) {
155 T *Current = WorkList[i];
156 if (!Current->hasPredecessors())
158 auto &Predecessors = Current->getPredecessors();
185 assert(ParentPlan->
getEntry() ==
this &&
"Can only set plan on its entry.");
205 if (!Successors.empty() || !Parent)
207 assert(Parent->getExiting() ==
this &&
208 "Block w/o successors not the exiting block of its parent.");
209 return Parent->getEnclosingBlockWithSuccessors();
213 if (!Predecessors.empty() || !Parent)
215 assert(Parent->getEntry() ==
this &&
216 "Block w/o predecessors not the entry of its parent.");
217 return Parent->getEnclosingBlockWithPredecessors();
228 if (
auto *R = VPBB->getParent())
229 return !R->isReplicator() && !VPBB->hasPredecessors();
248 while (It !=
end() && It->isPhi())
263 return Def->getLiveInIRValue();
266 return Data.VPV2Scalars[Def][
Lane.mapToCacheIndex(
VF)];
270 return Data.VPV2Scalars[Def][0];
277 return get(BuildVector->getOperand(
Lane.getKnownLane()),
true);
281 auto *VecPart =
Data.VPV2Vector[Def];
282 if (!VecPart->getType()->isVectorTy()) {
283 assert(
Lane.isFirstLane() &&
"cannot get lane > 0 for scalar");
288 auto *Extract =
Builder.CreateExtractElement(VecPart, LaneV);
298 Data.VPV2Scalars[Def].size() == 1)) &&
299 "Trying to access a single scalar per part but has multiple scalars "
306 return Data.VPV2Vector[Def];
308 auto GetBroadcastInstrs = [
this](
Value *V) {
317 assert(Def->isLiveIn() &&
"expected a live-in");
318 Value *IRV = Def->getLiveInIRValue();
319 Value *
B = GetBroadcastInstrs(IRV);
328 set(Def, ScalarValue);
334 VPLane LastLane(IsSingleScalar ? 0 :
VF.getFixedValue() - 1);
341 "unexpected recipe found to be invariant");
342 IsSingleScalar =
true;
349 assert(IsSingleScalar &&
"must be a single-scalar at this point");
356 ? LastInst->getParent()->getFirstNonPHIIt()
358 Builder.SetInsertPoint(&*NewIP);
359 Value *VectorValue = GetBroadcastInstrs(ScalarValue);
360 set(Def, VectorValue);
370 Builder.GetInsertBlock()
372 ->shouldEmitDebugInfoForProfiling() &&
375 unsigned UF = Plan->getUF();
379 Builder.SetCurrentDebugLocation(*NewDIL);
382 << DIL->getFilename() <<
" Line: " << DIL->getLine());
384 Builder.SetCurrentDebugLocation(
DL);
394 for (
unsigned I = 0, E = StructTy->getNumElements();
I != E;
I++) {
395 Value *ScalarValue =
Builder.CreateExtractValue(ScalarInst,
I);
396 Value *VectorValue =
Builder.CreateExtractValue(WideValue,
I);
398 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
399 WideValue =
Builder.CreateInsertValue(WideValue, VectorValue,
I);
402 WideValue =
Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
408 auto &
CFG = State.CFG;
420 auto &
CFG = State.CFG;
425 Loop *ParentLoop = State.CurrentParentLoop;
430 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB :
this;
431 if (State.Plan->isExitBlock(SuccOrExitVPB)) {
432 ParentLoop = State.LI->getLoopFor(
436 if (ParentLoop && !State.LI->getLoopFor(NewBB))
449 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
451 assert(
CFG.VPBB2IRBB.contains(PredVPBB) &&
452 "Predecessor basic-block not found building successor.");
459 assert(PredVPSuccessors.size() == 1 &&
460 "Predecessor ending w/o branch must have single successor.");
461 DebugLoc DL = PredBBTerminator->getDebugLoc();
462 PredBBTerminator->eraseFromParent();
465 }
else if (TermBr && !TermBr->isConditional()) {
466 TermBr->setSuccessor(0, NewBB);
475 unsigned idx = PredVPSuccessors.front() ==
this ? 0 : 1;
476 assert((TermBr && (!TermBr->getSuccessor(idx) ||
478 (TermBr->getSuccessor(idx) == NewBB ||
479 PredVPBlock ==
getPlan()->getEntry())))) &&
480 "Trying to reset an existing successor block.");
481 TermBr->setSuccessor(idx, NewBB);
489 "VPIRBasicBlock can have at most two successors at the moment!");
492 IRBB->moveAfter(State->CFG.PrevBB);
493 State->Builder.SetInsertPoint(IRBB->getTerminator());
494 State->CFG.PrevBB = IRBB;
495 State->CFG.VPBB2IRBB[
this] = IRBB;
500 auto *Br = State->Builder.CreateBr(IRBB);
501 Br->setOperand(0,
nullptr);
502 IRBB->getTerminator()->eraseFromParent();
506 "other blocks must be terminated by a branch");
520 bool Replica =
bool(State->Lane);
525 Loop *PrevParentLoop = State->CurrentParentLoop;
526 State->CurrentParentLoop = State->LI->AllocateLoop();
533 State->LI->addTopLevelLoop(State->CurrentParentLoop);
538 assert((!R || R->isReplicator()) &&
539 "only replicate region blocks should remain");
543 if ((Replica &&
this ==
getParent()->getEntry()) ||
548 State->CFG.VPBB2IRBB[
this] = NewBB;
550 NewBB = createEmptyBasicBlock(*State);
552 State->Builder.SetInsertPoint(NewBB);
555 State->Builder.SetInsertPoint(Terminator);
557 State->CFG.PrevBB = NewBB;
558 State->CFG.VPBB2IRBB[
this] = NewBB;
567 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
579 <<
" in BB: " << BB->
getName() <<
'\n');
581 State->CFG.PrevVPBB =
this;
584 State->setDebugLocFrom(Recipe.getDebugLoc());
585 Recipe.execute(*State);
592 assert((SplitAt ==
end() || SplitAt->getParent() ==
this) &&
593 "can only split at a position in the same block");
610 if (
P &&
P->isReplicator()) {
614 assert((!
P || !
P->isReplicator()) &&
"unexpected nested replicate regions");
631 "block with multiple successors doesn't have a recipe as terminator");
646 "block with multiple successors not terminated by "
647 "conditional branch nor switch recipe");
653 assert(IsSwitch &&
"block with more than 2 successors not terminated by "
660 "block with 0 or 1 successors terminated by conditional branch recipe");
680#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
688 O << Indent <<
"No successors\n";
690 O << Indent <<
"Successor(s): ";
693 O << LS << Succ->getName();
700 O << Indent <<
getName() <<
":\n";
702 auto RecipeIndent = Indent +
" ";
726 Old2NewVPBlocks[BB] = NewBB;
727 if (InRegion && BB->getNumSuccessors() == 0) {
728 assert(!Exiting &&
"Multiple exiting blocks?");
732 assert((!InRegion || Exiting) &&
"regions must have a single exiting block");
739 NewPreds.
push_back(Old2NewVPBlocks[Pred]);
744 NewSuccs.
push_back(Old2NewVPBlocks[Succ]);
752 for (
const auto &[OldBB, NewBB] :
755 for (
const auto &[OldPred, NewPred] :
756 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
757 assert(NewPred == Old2NewVPBlocks[OldPred] &&
"Different predecessors");
759 for (
const auto &[OldSucc, NewSucc] :
760 zip(OldBB->successors(), NewBB->successors()))
761 assert(NewSucc == Old2NewVPBlocks[OldSucc] &&
"Different successors");
765 return std::make_pair(Old2NewVPBlocks[Entry],
766 Exiting ? Old2NewVPBlocks[Exiting] :
nullptr);
774 Block->setParent(NewRegion);
780 "Loop regions should have been lowered to plain CFG");
781 assert(!State->Lane &&
"Replicating a Region with non-null instance.");
782 assert(!State->VF.isScalable() &&
"VF is assumed to be non scalable.");
787 for (
unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
792 Block->execute(State);
803 Cost += R.cost(VF, Ctx);
814 "must be in the entry block of a non-replicate region");
816 "loop region has a single predecessor (preheader), its entry block "
817 "has 2 incoming blocks");
821 Pred = Idx == 0 ?
Region->getSinglePredecessor() :
Region;
823 return Pred->getExitingBasicBlock();
834 : Ctx.TTI.getCFInstrCost(Instruction::Br, Ctx.CostKind);
835 LLVM_DEBUG(
dbgs() <<
"Cost of " << BackedgeCost <<
" for VF " << VF
836 <<
": vector loop backedge\n");
837 Cost += BackedgeCost;
863#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
867 auto NewIndent = Indent +
" ";
872 O << Indent <<
"}\n";
882 "Canonical IV must be in the entry of the top-level loop region");
884 {CanIV->getStartValue(), CanIV->getBackedgeValue()},
885 CanIV->getDebugLoc(),
"index");
887 CanIV->eraseFromParent();
909 L->getUniqueExitBlocks(IRExitBlocks);
917 for (
auto *VPB : CreatedBlocks) {
922 for (
auto *Def : R.definedValues())
923 Def->replaceAllUsesWith(&DummyValue);
925 for (
unsigned I = 0, E = R.getNumOperands();
I != E;
I++)
926 R.setOperand(
I, &DummyValue);
933 if (BackedgeTakenCount)
934 delete BackedgeTakenCount;
954 State->CFG.PrevVPBB =
nullptr;
955 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
959 State->VPDT.recalculate(*
this);
962 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
964 State->CFG.DTU.applyUpdates(
968 <<
", UF=" <<
getUF() <<
'\n');
976 State->CFG.DTU.applyUpdates(
984 Block->execute(State);
986 State->CFG.DTU.flush();
993 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1008 Value *Phi = State->get(PhiR, NeedsScalar);
1011 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1038 return R->isReplicator() ? nullptr : R;
1045 return R->isReplicator() ? nullptr : R;
1049#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1053 if (VF.getNumUsers() > 0) {
1059 if (VFxUF.getNumUsers() > 0) {
1065 if (VectorTripCount.getNumUsers() > 0) {
1068 O <<
" = vector-trip-count";
1071 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1073 BackedgeTakenCount->printAsOperand(O,
SlotTracker);
1074 O <<
" = backedge-taken count";
1079 if (TripCount->isLiveIn())
1082 O <<
" = original trip-count";
1091 O <<
"VPlan '" <<
getName() <<
"' {";
1108 RSO << Name <<
" for ";
1110 RSO <<
"VF={" << VFs[0];
1119 RSO <<
"UF={" << UFs[0];
1146 NewDeepRPOT(NewEntry);
1149 for (
const auto &[OldBB, NewBB] :
1152 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().
size() &&
1153 "blocks must have the same number of recipes");
1154 for (
const auto &[OldR, NewR] :
zip(*OldBB, *NewBB)) {
1155 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1156 "recipes must have the same number of operands");
1157 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1158 "recipes must define the same number of operands");
1159 for (
const auto &[OldV, NewV] :
1160 zip(OldR.definedValues(), NewR.definedValues()))
1161 Old2NewVPValues[OldV] = NewV;
1169 for (
unsigned I = 0,
E = NewR.getNumOperands();
I !=
E; ++
I) {
1171 NewR.setOperand(
I, NewOp);
1177 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1179 const auto &[NewEntry, __] =
cloneFrom(Entry);
1187 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1196 Old2NewVPValues[OldLiveIn] =
1197 NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
1199 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1200 Old2NewVPValues[&VF] = &NewPlan->VF;
1201 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1202 if (BackedgeTakenCount) {
1203 NewPlan->BackedgeTakenCount =
new VPValue();
1204 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1206 if (TripCount && TripCount->isLiveIn())
1207 Old2NewVPValues[TripCount] =
1208 NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue());
1218 NewPlan->Name = Name;
1221 "TripCount must have been added to Old2NewVPValues");
1222 NewPlan->TripCount = Old2NewVPValues[TripCount];
1227 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1229 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1230 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[
I]);
1231 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1236 VPB != NewScalarHeader)
1245 CreatedBlocks.push_back(VPIRBB);
1257#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1265 const std::string &Name =
Block->getName();
1274 OS <<
"digraph VPlan {\n";
1275 OS <<
"graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1276 if (!Plan.getName().empty())
1283 Plan.printLiveIns(SS);
1286 for (
auto Line : Lines)
1291 OS <<
"node [shape=rect, fontname=Courier, fontsize=30]\n";
1292 OS <<
"edge [fontname=Courier, fontsize=30]\n";
1293 OS <<
"compound=true\n";
1311 bool Hidden,
const Twine &Label) {
1316 OS << Indent << getUID(
Tail) <<
" -> " << getUID(Head);
1317 OS <<
" [ label=\"" << Label <<
'\"';
1319 OS <<
" ltail=" << getUID(From);
1321 OS <<
" lhead=" << getUID(To);
1323 OS <<
"; splines=none";
1328 auto &Successors =
Block->getSuccessors();
1329 if (Successors.size() == 1)
1330 drawEdge(
Block, Successors.front(),
false,
"");
1331 else if (Successors.size() == 2) {
1332 drawEdge(
Block, Successors.front(),
false,
"T");
1333 drawEdge(
Block, Successors.back(),
false,
"F");
1335 unsigned SuccessorNumber = 0;
1344 OS << Indent << getUID(BasicBlock) <<
" [label =\n";
1347 raw_string_ostream
SS(Str);
1354 StringRef(Str).rtrim(
'\n').split(Lines,
"\n");
1356 auto EmitLine = [&](StringRef
Line, StringRef Suffix) {
1362 EmitLine(Line,
" +\n");
1363 EmitLine(
Lines.back(),
"\n");
1366 OS << Indent <<
"]\n";
1368 dumpEdges(BasicBlock);
1372 OS << Indent <<
"subgraph " << getUID(Region) <<
" {\n";
1374 OS << Indent <<
"fontname=Courier\n"
1375 << Indent <<
"label=\""
1379 assert(
Region->getEntry() &&
"Region contains no inner blocks.");
1383 OS << Indent <<
"}\n";
1393 return DefR && (!DefR->
getParent()->getPlan()->getVectorLoopRegion() ||
1415 bool RemovedUser =
false;
1438#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1450void VPSlotTracker::assignName(
const VPValue *V) {
1451 assert(!VPValue2Name.contains(V) &&
"VPValue already has a name!");
1452 auto *UV = V->getUnderlyingValue();
1454 if (!UV && !(VPI && !VPI->getName().empty())) {
1455 VPValue2Name[V] = (
Twine(
"vp<%") +
Twine(NextSlot) +
">").str();
1466 Name = VPI->getName();
1468 assert(!Name.empty() &&
"Name cannot be empty.");
1470 std::string BaseName = (
Twine(Prefix) + Name +
Twine(
">")).str();
1473 const auto &[
A,
_] = VPValue2Name.try_emplace(V, BaseName);
1481 const auto &[
C, UseInserted] = BaseName2Version.
try_emplace(BaseName, 0);
1484 A->second = (BaseName +
Twine(
".") +
Twine(
C->second)).str();
1488void VPSlotTracker::assignNames(
const VPlan &Plan) {
1490 assignName(&Plan.VF);
1492 assignName(&Plan.VFxUF);
1493 assignName(&Plan.VectorTripCount);
1494 if (Plan.BackedgeTakenCount)
1495 assignName(Plan.BackedgeTakenCount);
1499 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1500 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.
getEntry()));
1501 for (
const VPBasicBlock *VPBB :
1506void VPSlotTracker::assignNames(
const VPBasicBlock *VPBB) {
1507 for (
const VPRecipeBase &Recipe : *VPBB)
1508 for (VPValue *Def : Recipe.definedValues())
1512std::string VPSlotTracker::getName(
const Value *V) {
1514 raw_string_ostream S(Name);
1516 V->printAsOperand(S,
false);
1525 if (
I->getParent()) {
1526 MST = std::make_unique<ModuleSlotTracker>(
I->getModule());
1527 MST->incorporateFunction(*
I->getFunction());
1529 MST = std::make_unique<ModuleSlotTracker>(
nullptr);
1532 V->printAsOperand(S,
false, *MST);
1537 std::string Name = VPValue2Name.lookup(V);
1551 "VPValue defined by a recipe in a VPlan?");
1554 if (
auto *UV = V->getUnderlyingValue()) {
1557 UV->printAsOperand(S,
false);
1558 return (
Twine(
"ir<") + Name +
">").str();
1566 assert(!
Range.isEmpty() &&
"Trying to test an empty VF range.");
1567 bool PredicateAtRangeStart = Predicate(
Range.Start);
1570 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1575 return PredicateAtRangeStart;
1585 auto MaxVFTimes2 = MaxVF * 2;
1587 VFRange SubRange = {VF, MaxVFTimes2};
1588 if (
auto Plan = tryToBuildVPlan(SubRange)) {
1593 VPlans.push_back(std::move(Plan));
1601 [VF](
const VPlanPtr &Plan) {
return Plan->hasVF(VF); }) ==
1603 "Multiple VPlans for VF.");
1605 for (
const VPlanPtr &Plan : VPlans) {
1606 if (Plan->hasVF(VF))
1616 bool IsUnrollMetadata =
false;
1617 MDNode *LoopID = L->getLoopID();
1626 if (S->getString().starts_with(
"llvm.loop.unroll.runtime.disable"))
1629 S->getString().starts_with(
"llvm.loop.unroll.disable");
1635 if (!IsUnrollMetadata) {
1637 LLVMContext &Context = L->getHeader()->getContext();
1640 MDString::get(Context,
"llvm.loop.unroll.runtime.disable"));
1646 L->setLoopID(NewLoopID);
1652 unsigned EstimatedVFxUF,
bool DisableRuntimeUnroll) {
1653 MDNode *LID = OrigLoop->getLoopID();
1656 if (!VectorizingEpilogue) {
1659 if (RemainderLoopID) {
1660 OrigLoop->setLoopID(*RemainderLoopID);
1662 if (DisableRuntimeUnroll)
1666 Hints.setAlreadyVectorized();
1673 if (std::optional<MDNode *> VectorizedLoopID =
1676 VectorLoop->
setLoopID(*VectorizedLoopID);
1683 if (!VectorizingEpilogue) {
1685 Hints.setAlreadyVectorized();
1689 bool IsEVLVectorized =
1696 if (IsEVLVectorized) {
1701 {
MDString::get(Context,
"llvm.loop.isvectorized.tailfoldingstyle"),
1704 {IsEVLVectorizedMD});
1709 TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
1729#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1731 if (VPlans.empty()) {
1732 O <<
"LV: No VPlans built.\n";
1735 for (
const auto &Plan : VPlans)
1759 for (
Type *VectorTy :
1761 ScalarizationCost +=
TTI.getScalarizationOverhead(
1777 return ScalarizationCost +
1778 TTI.getOperandsScalarizationOverhead(Tys,
CostKind);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
dxil pretty DXIL Metadata Pretty Printer
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
mir Rename Register Operands
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file defines the SmallVector class.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static void addRuntimeUnrollDisableMetaData(Loop *L)
static T * getPlanEntry(T *Start)
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
const char LLVMLoopVectorizeFollowupAll[]
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
const char LLVMLoopVectorizeFollowupVectorized[]
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
const char LLVMLoopVectorizeFollowupEpilogue[]
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
static constexpr UpdateKind Delete
static constexpr UpdateKind Insert
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
constexpr bool isScalar() const
Exactly one element.
Common base class shared among various IRBuilders.
static InstructionCost getInvalid(CostType Val=0)
This is an important class for using LLVM in a threaded context.
A helper class to return the specified delimiter string after the first invocation of operator String...
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, bool VectorizingEpilogue, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
void printPlans(raw_ostream &O)
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
unsigned getNumOperands() const
Return number of MDNode operands.
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVoidTy() const
Return true if this is 'void'.
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
iterator begin()
Recipe iterator methods.
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
void connectToPredecessors(VPTransformState &State)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
bool isExiting() const
Returns true if the block is exiting it's parent region.
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
void setName(const Twine &newName)
size_t getNumSuccessors() const
iterator_range< VPBlockBase ** > successors()
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
size_t getNumPredecessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
VPBlockBase * getEnclosingBlockWithPredecessors()
const VPBlocksTy & getPredecessors() const
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
const std::string & getName() const
VPBlockBase * getSinglePredecessor() const
const VPBlocksTy & getHierarchicalSuccessors()
VPBlockBase(const unsigned char SC, const std::string &N)
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
VPlan-based builder utility analogous to IRBuilder.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void dump() const
Dump the VPDef to stderr (for debugging).
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPDef prints itself.
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
Recipe to expand a SCEV expression.
A special type of VPBasicBlock that wraps an existing IR basic block.
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
BasicBlock * getIRBasicBlock() const
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
const VPBlockBase * getEntry() const
void dissolveToCFGLoop()
Remove the current region from its VPlan, connecting its predecessor to its entry,...
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
const VPBlockBase * getExiting() const
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void replaceUsesOfWith(VPValue *From, VPValue *To)
Replaces all uses of From in the VPUser with To.
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
void dump() const
Dump the value to stderr (for debugging).
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
VPDef * Def
Pointer to the VPDef that defines this VPValue.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
LLVM_DUMP_METHOD void dump()
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
friend class VPSlotTracker
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
VPBasicBlock * getEntry()
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
void setName(const Twine &newName)
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
LLVM_ABI_FOR_TEST ~VPlan()
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
friend class VPlanPrinter
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
void setEntry(VPBasicBlock *VPBB)
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
void print(raw_ostream &O) const
Print this VPlan to O.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI std::string EscapeString(const std::string &Label)
@ BasicBlock
Various leaf nodes.
bool match(Val *V, const Pattern &P)
VPInstruction_match< VPInstruction::BranchOnCount, Op0_t, Op1_t > m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond, Op0_t > m_BranchOnCond(const Op0_t &Op0)
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI cl::opt< bool > EnableFSDiscriminator
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, Loop *RemainderLoop, uint64_t UF)
Set weights for UnrolledLoop and RemainderLoop based on weights for OrigLoop and the following distri...
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI llvm::MDNode * makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID, llvm::ArrayRef< llvm::StringRef > RemovePrefixes, llvm::ArrayRef< llvm::MDNode * > AddAttrs)
Create a new LoopID after the loop has been transformed.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
cl::opt< bool > EnableVPlanNativePath
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
unsigned getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind)
A helper function that returns how much we should divide the cost of a predicated block by.
std::unique_ptr< VPlan > VPlanPtr
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
InstructionCost getScalarizationOverhead(Type *ResultTy, ArrayRef< const VPValue * > Operands, ElementCount VF)
Estimate the overhead of scalarizing a recipe with result type ResultTy and Operands with VF.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI