LLVM 22.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanAnalysis.h"
28#include "VPlanValue.h"
29#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/Twine.h"
34#include "llvm/ADT/ilist.h"
35#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <algorithm>
44#include <cassert>
45#include <cstddef>
46#include <string>
47
48namespace llvm {
49
50class BasicBlock;
51class DominatorTree;
52class InnerLoopVectorizer;
53class IRBuilderBase;
54struct VPTransformState;
55class raw_ostream;
56class RecurrenceDescriptor;
57class SCEV;
58class Type;
59class VPBasicBlock;
60class VPBuilder;
61class VPDominatorTree;
62class VPRegionBlock;
63class VPlan;
64class VPLane;
65class VPReplicateRecipe;
66class VPlanSlp;
67class Value;
68class LoopVectorizationCostModel;
69class LoopVersioning;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
80/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
82 friend class VPBlockUtils;
83
84 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
85
86 /// An optional name for the block.
87 std::string Name;
88
89 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
90 /// it is a topmost VPBlockBase.
91 VPRegionBlock *Parent = nullptr;
92
93 /// List of predecessor blocks.
95
96 /// List of successor blocks.
98
99 /// VPlan containing the block. Can only be set on the entry block of the
100 /// plan.
101 VPlan *Plan = nullptr;
102
103 /// Add \p Successor as the last successor to this block.
104 void appendSuccessor(VPBlockBase *Successor) {
105 assert(Successor && "Cannot add nullptr successor!");
106 Successors.push_back(Successor);
107 }
108
109 /// Add \p Predecessor as the last predecessor to this block.
110 void appendPredecessor(VPBlockBase *Predecessor) {
111 assert(Predecessor && "Cannot add nullptr predecessor!");
112 Predecessors.push_back(Predecessor);
113 }
114
115 /// Remove \p Predecessor from the predecessors of this block.
116 void removePredecessor(VPBlockBase *Predecessor) {
117 auto Pos = find(Predecessors, Predecessor);
118 assert(Pos && "Predecessor does not exist");
119 Predecessors.erase(Pos);
120 }
121
122 /// Remove \p Successor from the successors of this block.
123 void removeSuccessor(VPBlockBase *Successor) {
124 auto Pos = find(Successors, Successor);
125 assert(Pos && "Successor does not exist");
126 Successors.erase(Pos);
127 }
128
129 /// This function replaces one predecessor with another, useful when
130 /// trying to replace an old block in the CFG with a new one.
131 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
132 auto I = find(Predecessors, Old);
133 assert(I != Predecessors.end());
134 assert(Old->getParent() == New->getParent() &&
135 "replaced predecessor must have the same parent");
136 *I = New;
137 }
138
139 /// This function replaces one successor with another, useful when
140 /// trying to replace an old block in the CFG with a new one.
141 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
142 auto I = find(Successors, Old);
143 assert(I != Successors.end());
144 assert(Old->getParent() == New->getParent() &&
145 "replaced successor must have the same parent");
146 *I = New;
147 }
148
149protected:
150 VPBlockBase(const unsigned char SC, const std::string &N)
151 : SubclassID(SC), Name(N) {}
152
153public:
154 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
155 /// that are actually instantiated. Values of this enumeration are kept in the
156 /// SubclassID field of the VPBlockBase objects. They are used for concrete
157 /// type identification.
158 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
159
161
162 virtual ~VPBlockBase() = default;
163
164 const std::string &getName() const { return Name; }
165
166 void setName(const Twine &newName) { Name = newName.str(); }
167
168 /// \return an ID for the concrete type of this object.
169 /// This is used to implement the classof checks. This should not be used
170 /// for any other purpose, as the values may change as LLVM evolves.
171 unsigned getVPBlockID() const { return SubclassID; }
172
173 VPRegionBlock *getParent() { return Parent; }
174 const VPRegionBlock *getParent() const { return Parent; }
175
176 /// \return A pointer to the plan containing the current block.
177 VPlan *getPlan();
178 const VPlan *getPlan() const;
179
180 /// Sets the pointer of the plan containing the block. The block must be the
181 /// entry block into the VPlan.
182 void setPlan(VPlan *ParentPlan);
183
184 void setParent(VPRegionBlock *P) { Parent = P; }
185
186 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
187 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
188 /// VPBlockBase is a VPBasicBlock, it is returned.
189 const VPBasicBlock *getEntryBasicBlock() const;
190 VPBasicBlock *getEntryBasicBlock();
191
192 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
193 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
194 /// VPBlockBase is a VPBasicBlock, it is returned.
195 const VPBasicBlock *getExitingBasicBlock() const;
196 VPBasicBlock *getExitingBasicBlock();
197
198 const VPBlocksTy &getSuccessors() const { return Successors; }
199 VPBlocksTy &getSuccessors() { return Successors; }
200
203
204 const VPBlocksTy &getPredecessors() const { return Predecessors; }
205 VPBlocksTy &getPredecessors() { return Predecessors; }
206
207 /// \return the successor of this VPBlockBase if it has a single successor.
208 /// Otherwise return a null pointer.
210 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
211 }
212
213 /// \return the predecessor of this VPBlockBase if it has a single
214 /// predecessor. Otherwise return a null pointer.
216 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
217 }
218
219 size_t getNumSuccessors() const { return Successors.size(); }
220 size_t getNumPredecessors() const { return Predecessors.size(); }
221
222 /// Returns true if this block has any predecessors.
223 bool hasPredecessors() const { return !Predecessors.empty(); }
224
225 /// An Enclosing Block of a block B is any block containing B, including B
226 /// itself. \return the closest enclosing block starting from "this", which
227 /// has successors. \return the root enclosing block if all enclosing blocks
228 /// have no successors.
229 VPBlockBase *getEnclosingBlockWithSuccessors();
230
231 /// \return the closest enclosing block starting from "this", which has
232 /// predecessors. \return the root enclosing block if all enclosing blocks
233 /// have no predecessors.
234 VPBlockBase *getEnclosingBlockWithPredecessors();
235
236 /// \return the successors either attached directly to this VPBlockBase or, if
237 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
238 /// successors of its own, search recursively for the first enclosing
239 /// VPRegionBlock that has successors and return them. If no such
240 /// VPRegionBlock exists, return the (empty) successors of the topmost
241 /// VPBlockBase reached.
243 return getEnclosingBlockWithSuccessors()->getSuccessors();
244 }
245
246 /// \return the hierarchical successor of this VPBlockBase if it has a single
247 /// hierarchical successor. Otherwise return a null pointer.
249 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
250 }
251
252 /// \return the predecessors either attached directly to this VPBlockBase or,
253 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
254 /// predecessors of its own, search recursively for the first enclosing
255 /// VPRegionBlock that has predecessors and return them. If no such
256 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
257 /// VPBlockBase reached.
259 return getEnclosingBlockWithPredecessors()->getPredecessors();
260 }
261
262 /// \return the hierarchical predecessor of this VPBlockBase if it has a
263 /// single hierarchical predecessor. Otherwise return a null pointer.
265 return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
266 }
267
268 /// Set a given VPBlockBase \p Successor as the single successor of this
269 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
270 /// This VPBlockBase must have no successors.
271 void setOneSuccessor(VPBlockBase *Successor) {
272 assert(Successors.empty() && "Setting one successor when others exist.");
273 assert(Successor->getParent() == getParent() &&
274 "connected blocks must have the same parent");
275 appendSuccessor(Successor);
276 }
277
278 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
279 /// successors of this VPBlockBase. This VPBlockBase is not added as
280 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
281 /// successors.
282 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
283 assert(Successors.empty() && "Setting two successors when others exist.");
284 appendSuccessor(IfTrue);
285 appendSuccessor(IfFalse);
286 }
287
288 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
289 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
290 /// as successor of any VPBasicBlock in \p NewPreds.
292 assert(Predecessors.empty() && "Block predecessors already set.");
293 for (auto *Pred : NewPreds)
294 appendPredecessor(Pred);
295 }
296
297 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
298 /// This VPBlockBase must have no successors. This VPBlockBase is not added
299 /// as predecessor of any VPBasicBlock in \p NewSuccs.
301 assert(Successors.empty() && "Block successors already set.");
302 for (auto *Succ : NewSuccs)
303 appendSuccessor(Succ);
304 }
305
306 /// Remove all the predecessor of this block.
307 void clearPredecessors() { Predecessors.clear(); }
308
309 /// Remove all the successors of this block.
310 void clearSuccessors() { Successors.clear(); }
311
312 /// Swap predecessors of the block. The block must have exactly 2
313 /// predecessors.
315 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
316 std::swap(Predecessors[0], Predecessors[1]);
317 }
318
319 /// Swap successors of the block. The block must have exactly 2 successors.
320 // TODO: This should be part of introducing conditional branch recipes rather
321 // than being independent.
323 assert(Successors.size() == 2 && "must have 2 successors to swap");
324 std::swap(Successors[0], Successors[1]);
325 }
326
327 /// Returns the index for \p Pred in the blocks predecessors list.
328 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
329 assert(count(Predecessors, Pred) == 1 &&
330 "must have Pred exactly once in Predecessors");
331 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
332 }
333
334 /// Returns the index for \p Succ in the blocks successor list.
335 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
336 assert(count(Successors, Succ) == 1 &&
337 "must have Succ exactly once in Successors");
338 return std::distance(Successors.begin(), find(Successors, Succ));
339 }
340
341 /// The method which generates the output IR that correspond to this
342 /// VPBlockBase, thereby "executing" the VPlan.
343 virtual void execute(VPTransformState *State) = 0;
344
345 /// Return the cost of the block.
347
348 /// Return true if it is legal to hoist instructions into this block.
350 // There are currently no constraints that prevent an instruction to be
351 // hoisted into a VPBlockBase.
352 return true;
353 }
354
355#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
356 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
357 OS << getName();
358 }
359
360 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
361 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
362 /// consequtive numbers.
363 ///
364 /// Note that the numbering is applied to the whole VPlan, so printing
365 /// individual blocks is consistent with the whole VPlan printing.
366 virtual void print(raw_ostream &O, const Twine &Indent,
367 VPSlotTracker &SlotTracker) const = 0;
368
369 /// Print plain-text dump of this VPlan to \p O.
370 void print(raw_ostream &O) const;
371
372 /// Print the successors of this block to \p O, prefixing all lines with \p
373 /// Indent.
374 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
375
376 /// Dump this VPBlockBase to dbgs().
377 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
378#endif
379
380 /// Clone the current block and it's recipes without updating the operands of
381 /// the cloned recipes, including all blocks in the single-entry single-exit
382 /// region for VPRegionBlocks.
383 virtual VPBlockBase *clone() = 0;
384};
385
386/// VPRecipeBase is a base class modeling a sequence of one or more output IR
387/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
388/// and is responsible for deleting its defined values. Single-value
389/// recipes must inherit from VPSingleDef instead of inheriting from both
390/// VPRecipeBase and VPValue separately.
392 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
393 public VPDef,
394 public VPUser {
395 friend VPBasicBlock;
396 friend class VPBlockUtils;
397
398 /// Each VPRecipe belongs to a single VPBasicBlock.
399 VPBasicBlock *Parent = nullptr;
400
401 /// The debug location for the recipe.
402 DebugLoc DL;
403
404public:
406 DebugLoc DL = DebugLoc::getUnknown())
407 : VPDef(SC), VPUser(Operands), DL(DL) {}
408
409 virtual ~VPRecipeBase() = default;
410
411 /// Clone the current recipe.
412 virtual VPRecipeBase *clone() = 0;
413
414 /// \return the VPBasicBlock which this VPRecipe belongs to.
415 VPBasicBlock *getParent() { return Parent; }
416 const VPBasicBlock *getParent() const { return Parent; }
417
418 /// The method which generates the output IR instructions that correspond to
419 /// this VPRecipe, thereby "executing" the VPlan.
420 virtual void execute(VPTransformState &State) = 0;
421
422 /// Return the cost of this recipe, taking into account if the cost
423 /// computation should be skipped and the ForceTargetInstructionCost flag.
424 /// Also takes care of printing the cost for debugging.
426
427 /// Insert an unlinked recipe into a basic block immediately before
428 /// the specified recipe.
429 void insertBefore(VPRecipeBase *InsertPos);
430 /// Insert an unlinked recipe into \p BB immediately before the insertion
431 /// point \p IP;
432 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
433
434 /// Insert an unlinked Recipe into a basic block immediately after
435 /// the specified Recipe.
436 void insertAfter(VPRecipeBase *InsertPos);
437
438 /// Unlink this recipe from its current VPBasicBlock and insert it into
439 /// the VPBasicBlock that MovePos lives in, right after MovePos.
440 void moveAfter(VPRecipeBase *MovePos);
441
442 /// Unlink this recipe and insert into BB before I.
443 ///
444 /// \pre I is a valid iterator into BB.
445 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
446
447 /// This method unlinks 'this' from the containing basic block, but does not
448 /// delete it.
449 void removeFromParent();
450
451 /// This method unlinks 'this' from the containing basic block and deletes it.
452 ///
453 /// \returns an iterator pointing to the element after the erased one
454 iplist<VPRecipeBase>::iterator eraseFromParent();
455
456 /// Method to support type inquiry through isa, cast, and dyn_cast.
457 static inline bool classof(const VPDef *D) {
458 // All VPDefs are also VPRecipeBases.
459 return true;
460 }
461
462 static inline bool classof(const VPUser *U) { return true; }
463
464 /// Returns true if the recipe may have side-effects.
465 bool mayHaveSideEffects() const;
466
467 /// Returns true for PHI-like recipes.
468 bool isPhi() const;
469
470 /// Returns true if the recipe may read from memory.
471 bool mayReadFromMemory() const;
472
473 /// Returns true if the recipe may write to memory.
474 bool mayWriteToMemory() const;
475
476 /// Returns true if the recipe may read from or write to memory.
477 bool mayReadOrWriteMemory() const {
478 return mayReadFromMemory() || mayWriteToMemory();
479 }
480
481 /// Returns the debug location of the recipe.
482 DebugLoc getDebugLoc() const { return DL; }
483
484 /// Return true if the recipe is a scalar cast.
485 bool isScalarCast() const;
486
487 /// Set the recipe's debug location to \p NewDL.
488 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
489
490protected:
491 /// Compute the cost of this recipe either using a recipe's specialized
492 /// implementation or using the legacy cost model and the underlying
493 /// instructions.
494 virtual InstructionCost computeCost(ElementCount VF,
495 VPCostContext &Ctx) const;
496};
497
498// Helper macro to define common classof implementations for recipes.
499#define VP_CLASSOF_IMPL(VPDefID) \
500 static inline bool classof(const VPDef *D) { \
501 return D->getVPDefID() == VPDefID; \
502 } \
503 static inline bool classof(const VPValue *V) { \
504 auto *R = V->getDefiningRecipe(); \
505 return R && R->getVPDefID() == VPDefID; \
506 } \
507 static inline bool classof(const VPUser *U) { \
508 auto *R = dyn_cast<VPRecipeBase>(U); \
509 return R && R->getVPDefID() == VPDefID; \
510 } \
511 static inline bool classof(const VPRecipeBase *R) { \
512 return R->getVPDefID() == VPDefID; \
513 } \
514 static inline bool classof(const VPSingleDefRecipe *R) { \
515 return R->getVPDefID() == VPDefID; \
516 }
517
518/// VPSingleDef is a base class for recipes for modeling a sequence of one or
519/// more output IR that define a single result VPValue.
520/// Note that VPRecipeBase must be inherited from before VPValue.
521class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
522public:
523 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
525 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
526
527 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
529 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
530
531 static inline bool classof(const VPRecipeBase *R) {
532 switch (R->getVPDefID()) {
533 case VPRecipeBase::VPDerivedIVSC:
534 case VPRecipeBase::VPEVLBasedIVPHISC:
535 case VPRecipeBase::VPExpandSCEVSC:
536 case VPRecipeBase::VPExpressionSC:
537 case VPRecipeBase::VPInstructionSC:
538 case VPRecipeBase::VPReductionEVLSC:
539 case VPRecipeBase::VPReductionSC:
540 case VPRecipeBase::VPReplicateSC:
541 case VPRecipeBase::VPScalarIVStepsSC:
542 case VPRecipeBase::VPVectorPointerSC:
543 case VPRecipeBase::VPVectorEndPointerSC:
544 case VPRecipeBase::VPWidenCallSC:
545 case VPRecipeBase::VPWidenCanonicalIVSC:
546 case VPRecipeBase::VPWidenCastSC:
547 case VPRecipeBase::VPWidenGEPSC:
548 case VPRecipeBase::VPWidenIntrinsicSC:
549 case VPRecipeBase::VPWidenSC:
550 case VPRecipeBase::VPWidenSelectSC:
551 case VPRecipeBase::VPBlendSC:
552 case VPRecipeBase::VPPredInstPHISC:
553 case VPRecipeBase::VPCanonicalIVPHISC:
554 case VPRecipeBase::VPActiveLaneMaskPHISC:
555 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
556 case VPRecipeBase::VPWidenPHISC:
557 case VPRecipeBase::VPWidenIntOrFpInductionSC:
558 case VPRecipeBase::VPWidenPointerInductionSC:
559 case VPRecipeBase::VPReductionPHISC:
560 case VPRecipeBase::VPPartialReductionSC:
561 return true;
562 case VPRecipeBase::VPBranchOnMaskSC:
563 case VPRecipeBase::VPInterleaveEVLSC:
564 case VPRecipeBase::VPInterleaveSC:
565 case VPRecipeBase::VPIRInstructionSC:
566 case VPRecipeBase::VPWidenLoadEVLSC:
567 case VPRecipeBase::VPWidenLoadSC:
568 case VPRecipeBase::VPWidenStoreEVLSC:
569 case VPRecipeBase::VPWidenStoreSC:
570 case VPRecipeBase::VPHistogramSC:
571 // TODO: Widened stores don't define a value, but widened loads do. Split
572 // the recipes to be able to make widened loads VPSingleDefRecipes.
573 return false;
574 }
575 llvm_unreachable("Unhandled VPDefID");
576 }
577
578 static inline bool classof(const VPUser *U) {
579 auto *R = dyn_cast<VPRecipeBase>(U);
580 return R && classof(R);
581 }
582
583 virtual VPSingleDefRecipe *clone() override = 0;
584
585 /// Returns the underlying instruction.
587 return cast<Instruction>(getUnderlyingValue());
588 }
590 return cast<Instruction>(getUnderlyingValue());
591 }
592
593#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
594 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
595 LLVM_DUMP_METHOD void dump() const;
596#endif
597};
598
599/// Class to record and manage LLVM IR flags.
601 enum class OperationType : unsigned char {
602 Cmp,
603 OverflowingBinOp,
604 Trunc,
605 DisjointOp,
606 PossiblyExactOp,
607 GEPOp,
608 FPMathOp,
609 NonNegOp,
610 Other
611 };
612
613public:
614 struct WrapFlagsTy {
615 char HasNUW : 1;
616 char HasNSW : 1;
617
619 };
620
622 char HasNUW : 1;
623 char HasNSW : 1;
624
626 };
627
629 char IsDisjoint : 1;
631 };
632
634 char NonNeg : 1;
635 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
636 };
637
638private:
639 struct ExactFlagsTy {
640 char IsExact : 1;
641 };
642 struct FastMathFlagsTy {
643 char AllowReassoc : 1;
644 char NoNaNs : 1;
645 char NoInfs : 1;
646 char NoSignedZeros : 1;
647 char AllowReciprocal : 1;
648 char AllowContract : 1;
649 char ApproxFunc : 1;
650
651 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
652 };
653
654 OperationType OpType;
655
656 union {
661 ExactFlagsTy ExactFlags;
664 FastMathFlagsTy FMFs;
665 unsigned AllFlags;
666 };
667
668public:
669 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
670
672 if (auto *Op = dyn_cast<CmpInst>(&I)) {
673 OpType = OperationType::Cmp;
674 CmpPredicate = Op->getPredicate();
675 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
676 OpType = OperationType::DisjointOp;
677 DisjointFlags.IsDisjoint = Op->isDisjoint();
678 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
679 OpType = OperationType::OverflowingBinOp;
680 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
681 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
682 OpType = OperationType::Trunc;
683 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
684 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
685 OpType = OperationType::PossiblyExactOp;
686 ExactFlags.IsExact = Op->isExact();
687 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
688 OpType = OperationType::GEPOp;
689 GEPFlags = GEP->getNoWrapFlags();
690 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
691 OpType = OperationType::NonNegOp;
692 NonNegFlags.NonNeg = PNNI->hasNonNeg();
693 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
694 OpType = OperationType::FPMathOp;
695 FMFs = Op->getFastMathFlags();
696 } else {
697 OpType = OperationType::Other;
698 AllFlags = 0;
699 }
700 }
701
703 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
704
706 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
707
708 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
709
711 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
712
714 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
715
717 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
718
719public:
721 OpType = Other.OpType;
722 AllFlags = Other.AllFlags;
723 }
724
725 /// Drop all poison-generating flags.
727 // NOTE: This needs to be kept in-sync with
728 // Instruction::dropPoisonGeneratingFlags.
729 switch (OpType) {
730 case OperationType::OverflowingBinOp:
731 WrapFlags.HasNUW = false;
732 WrapFlags.HasNSW = false;
733 break;
734 case OperationType::Trunc:
735 TruncFlags.HasNUW = false;
736 TruncFlags.HasNSW = false;
737 break;
738 case OperationType::DisjointOp:
740 break;
741 case OperationType::PossiblyExactOp:
742 ExactFlags.IsExact = false;
743 break;
744 case OperationType::GEPOp:
746 break;
747 case OperationType::FPMathOp:
748 FMFs.NoNaNs = false;
749 FMFs.NoInfs = false;
750 break;
751 case OperationType::NonNegOp:
752 NonNegFlags.NonNeg = false;
753 break;
754 case OperationType::Cmp:
755 case OperationType::Other:
756 break;
757 }
758 }
759
760 /// Apply the IR flags to \p I.
761 void applyFlags(Instruction &I) const {
762 switch (OpType) {
763 case OperationType::OverflowingBinOp:
764 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
765 I.setHasNoSignedWrap(WrapFlags.HasNSW);
766 break;
767 case OperationType::Trunc:
768 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
769 I.setHasNoSignedWrap(TruncFlags.HasNSW);
770 break;
771 case OperationType::DisjointOp:
772 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
773 break;
774 case OperationType::PossiblyExactOp:
775 I.setIsExact(ExactFlags.IsExact);
776 break;
777 case OperationType::GEPOp:
778 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
779 break;
780 case OperationType::FPMathOp:
781 I.setHasAllowReassoc(FMFs.AllowReassoc);
782 I.setHasNoNaNs(FMFs.NoNaNs);
783 I.setHasNoInfs(FMFs.NoInfs);
784 I.setHasNoSignedZeros(FMFs.NoSignedZeros);
785 I.setHasAllowReciprocal(FMFs.AllowReciprocal);
786 I.setHasAllowContract(FMFs.AllowContract);
787 I.setHasApproxFunc(FMFs.ApproxFunc);
788 break;
789 case OperationType::NonNegOp:
790 I.setNonNeg(NonNegFlags.NonNeg);
791 break;
792 case OperationType::Cmp:
793 case OperationType::Other:
794 break;
795 }
796 }
797
799 assert(OpType == OperationType::Cmp &&
800 "recipe doesn't have a compare predicate");
801 return CmpPredicate;
802 }
803
805 assert(OpType == OperationType::Cmp &&
806 "recipe doesn't have a compare predicate");
807 CmpPredicate = Pred;
808 }
809
811
812 /// Returns true if the recipe has a comparison predicate.
813 bool hasPredicate() const { return OpType == OperationType::Cmp; }
814
815 /// Returns true if the recipe has fast-math flags.
816 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
817
819
820 /// Returns true if the recipe has non-negative flag.
821 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
822
823 bool isNonNeg() const {
824 assert(OpType == OperationType::NonNegOp &&
825 "recipe doesn't have a NNEG flag");
826 return NonNegFlags.NonNeg;
827 }
828
829 bool hasNoUnsignedWrap() const {
830 switch (OpType) {
831 case OperationType::OverflowingBinOp:
832 return WrapFlags.HasNUW;
833 case OperationType::Trunc:
834 return TruncFlags.HasNUW;
835 default:
836 llvm_unreachable("recipe doesn't have a NUW flag");
837 }
838 }
839
840 bool hasNoSignedWrap() const {
841 switch (OpType) {
842 case OperationType::OverflowingBinOp:
843 return WrapFlags.HasNSW;
844 case OperationType::Trunc:
845 return TruncFlags.HasNSW;
846 default:
847 llvm_unreachable("recipe doesn't have a NSW flag");
848 }
849 }
850
851 bool isDisjoint() const {
852 assert(OpType == OperationType::DisjointOp &&
853 "recipe cannot have a disjoing flag");
855 }
856
857#if !defined(NDEBUG)
858 /// Returns true if the set flags are valid for \p Opcode.
859 bool flagsValidForOpcode(unsigned Opcode) const;
860#endif
861
862#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
863 void printFlags(raw_ostream &O) const;
864#endif
865};
866
867/// A pure-virtual common base class for recipes defining a single VPValue and
868/// using IR flags.
870 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
873
874 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
875 Instruction &I)
877
878 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
879 const VPIRFlags &Flags,
881 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
882
883 static inline bool classof(const VPRecipeBase *R) {
884 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
885 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
886 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
887 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
888 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
889 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
890 R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
891 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
892 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
893 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
894 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
895 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
896 }
897
898 static inline bool classof(const VPUser *U) {
899 auto *R = dyn_cast<VPRecipeBase>(U);
900 return R && classof(R);
901 }
902
903 static inline bool classof(const VPValue *V) {
904 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
905 return R && classof(R);
906 }
907
908 static inline bool classof(const VPSingleDefRecipe *U) {
909 auto *R = dyn_cast<VPRecipeBase>(U);
910 return R && classof(R);
911 }
912
913 void execute(VPTransformState &State) override = 0;
914
915 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
916 std::optional<InstructionCost>
917 getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF,
918 VPCostContext &Ctx) const;
919};
920
921/// Helper to access the operand that contains the unroll part for this recipe
922/// after unrolling.
923template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
924protected:
925 /// Return the VPValue operand containing the unroll part or null if there is
926 /// no such operand.
927 VPValue *getUnrollPartOperand(const VPUser &U) const;
928
929 /// Return the unroll part.
930 unsigned getUnrollPart(const VPUser &U) const;
931};
932
933/// Helper to manage IR metadata for recipes. It filters out metadata that
934/// cannot be propagated.
937
938public:
940
941 /// Adds metatadata that can be preserved from the original instruction
942 /// \p I.
944
945 /// Adds metatadata that can be preserved from the original instruction
946 /// \p I and noalias metadata guaranteed by runtime checks using \p LVer.
948
949 /// Copy constructor for cloning.
951
953 Metadata = Other.Metadata;
954 return *this;
955 }
956
957 /// Add all metadata to \p I.
958 void applyMetadata(Instruction &I) const;
959
960 /// Add metadata with kind \p Kind and \p Node.
961 void addMetadata(unsigned Kind, MDNode *Node) {
962 Metadata.emplace_back(Kind, Node);
963 }
964
965 /// Intersect this VPIRMetada object with \p MD, keeping only metadata
966 /// nodes that are common to both.
967 void intersect(const VPIRMetadata &MD);
968};
969
970/// This is a concrete Recipe that models a single VPlan-level instruction.
971/// While as any Recipe it may generate a sequence of IR instructions when
972/// executed, these instructions would always form a single-def expression as
973/// the VPInstruction is also a single def-use vertex.
975 public VPIRMetadata,
976 public VPUnrollPartAccessor<1> {
977 friend class VPlanSlp;
978
979public:
980 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
981 enum {
982 FirstOrderRecurrenceSplice =
983 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
984 // values of a first-order recurrence.
988 // Creates a mask where each lane is active (true) whilst the current
989 // counter (first operand + index) is less than the second operand. i.e.
990 // mask[i] = icmpt ult (op0 + i), op1
991 // The size of the mask returned is VF * Multiplier (UF, third op).
995 // Increment the canonical IV separately for each unrolled part.
1000 /// Given operands of (the same) struct type, creates a struct of fixed-
1001 /// width vectors each containing a struct field of all operands. The
1002 /// number of operands matches the element count of every vector.
1004 /// Creates a fixed-width vector containing all operands. The number of
1005 /// operands matches the vector element count.
1007 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1008 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1012 // Extracts the last lane from its operand if it is a vector, or the last
1013 // part if scalar. In the latter case, the recipe will be removed during
1014 // unrolling.
1016 // Extracts the second-to-last lane from its operand or the second-to-last
1017 // part if it is scalar. In the latter case, the recipe will be removed
1018 // during unrolling.
1020 LogicalAnd, // Non-poison propagating logical And.
1021 // Add an offset in bytes (second operand) to a base pointer (first
1022 // operand). Only generates scalar values (either for the first lane only or
1023 // for all lanes, depending on its uses).
1025 // Add a vector offset in bytes (second operand) to a scalar base pointer
1026 // (first operand).
1028 // Returns a scalar boolean value, which is true if any lane of its
1029 // (boolean) vector operands is true. It produces the reduced value across
1030 // all unrolled iterations. Unrolling will add all copies of its original
1031 // operand as additional operands. AnyOf is poison-safe as all operands
1032 // will be frozen.
1034 // Calculates the first active lane index of the vector predicate operands.
1035 // It produces the lane index across all unrolled iterations. Unrolling will
1036 // add all copies of its original operand as additional operands.
1038
1039 // The opcodes below are used for VPInstructionWithType.
1040 //
1041 /// Scale the first operand (vector step) by the second operand
1042 /// (scalar-step). Casts both operands to the result type if needed.
1044 /// Start vector for reductions with 3 operands: the original start value,
1045 /// the identity value for the reduction and an integer indicating the
1046 /// scaling factor.
1048 // Creates a step vector starting from 0 to VF with a step of 1.
1050 /// Extracts a single lane (first operand) from a set of vector operands.
1051 /// The lane specifies an index into a vector formed by combining all vector
1052 /// operands (all operands after the first one).
1054 /// Explicit user for the resume phi of the canonical induction in the main
1055 /// VPlan, used by the epilogue vector loop.
1057 /// Returns the value for vscale.
1059 };
1060
1061private:
1062 typedef unsigned char OpcodeTy;
1063 OpcodeTy Opcode;
1064
1065 /// An optional name that can be used for the generated IR instruction.
1066 const std::string Name;
1067
1068 /// Returns true if this VPInstruction generates scalar values for all lanes.
1069 /// Most VPInstructions generate a single value per part, either vector or
1070 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1071 /// values per all lanes, stemming from an original ingredient. This method
1072 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1073 /// underlying ingredient.
1074 bool doesGeneratePerAllLanes() const;
1075
1076 /// Returns true if we can generate a scalar for the first lane only if
1077 /// needed.
1078 bool canGenerateScalarForFirstLane() const;
1079
1080 /// Utility methods serving execute(): generates a single vector instance of
1081 /// the modeled instruction. \returns the generated value. . In some cases an
1082 /// existing value is returned rather than a generated one.
1083 Value *generate(VPTransformState &State);
1084
1085 /// Utility methods serving execute(): generates a scalar single instance of
1086 /// the modeled instruction for a given lane. \returns the scalar generated
1087 /// value for lane \p Lane.
1088 Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
1089
1090#if !defined(NDEBUG)
1091 /// Return the number of operands determined by the opcode of the
1092 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1093 /// directly by the opcode.
1094 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1095#endif
1096
1097public:
1099 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
1100 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1101 VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {}
1102
1104 const VPIRFlags &Flags, DebugLoc DL = DebugLoc::getUnknown(),
1105 const Twine &Name = "");
1106
1107 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1108
1109 VPInstruction *clone() override {
1111 auto *New = new VPInstruction(Opcode, Operands, *this, getDebugLoc(), Name);
1112 if (getUnderlyingValue())
1113 New->setUnderlyingValue(getUnderlyingInstr());
1114 return New;
1115 }
1116
1117 unsigned getOpcode() const { return Opcode; }
1118
1119 /// Generate the instruction.
1120 /// TODO: We currently execute only per-part unless a specific instance is
1121 /// provided.
1122 void execute(VPTransformState &State) override;
1123
1124 /// Return the cost of this VPInstruction.
1125 InstructionCost computeCost(ElementCount VF,
1126 VPCostContext &Ctx) const override;
1127
1128#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1129 /// Print the VPInstruction to \p O.
1130 void print(raw_ostream &O, const Twine &Indent,
1131 VPSlotTracker &SlotTracker) const override;
1132
1133 /// Print the VPInstruction to dbgs() (for debugging).
1134 LLVM_DUMP_METHOD void dump() const;
1135#endif
1136
1137 bool hasResult() const {
1138 // CallInst may or may not have a result, depending on the called function.
1139 // Conservatively return calls have results for now.
1140 switch (getOpcode()) {
1141 case Instruction::Ret:
1142 case Instruction::Br:
1143 case Instruction::Store:
1144 case Instruction::Switch:
1145 case Instruction::IndirectBr:
1146 case Instruction::Resume:
1147 case Instruction::CatchRet:
1148 case Instruction::Unreachable:
1149 case Instruction::Fence:
1150 case Instruction::AtomicRMW:
1151 case VPInstruction::BranchOnCond:
1152 case VPInstruction::BranchOnCount:
1153 return false;
1154 default:
1155 return true;
1156 }
1157 }
1158
1159 /// Returns true if the underlying opcode may read from or write to memory.
1160 bool opcodeMayReadOrWriteFromMemory() const;
1161
1162 /// Returns true if the recipe only uses the first lane of operand \p Op.
1163 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1164
1165 /// Returns true if the recipe only uses the first part of operand \p Op.
1166 bool onlyFirstPartUsed(const VPValue *Op) const override;
1167
1168 /// Returns true if this VPInstruction produces a scalar value from a vector,
1169 /// e.g. by performing a reduction or extracting a lane.
1170 bool isVectorToScalar() const;
1171
1172 /// Returns true if this VPInstruction's operands are single scalars and the
1173 /// result is also a single scalar.
1174 bool isSingleScalar() const;
1175
1176 /// Returns the symbolic name assigned to the VPInstruction.
1177 StringRef getName() const { return Name; }
1178};
1179
1180/// A specialization of VPInstruction augmenting it with a dedicated result
1181/// type, to be used when the opcode and operands of the VPInstruction don't
1182/// directly determine the result type. Note that there is no separate VPDef ID
1183/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1184/// distinguished purely by the opcode.
1186 /// Scalar result type produced by the recipe.
1187 Type *ResultTy;
1188
1189public:
1191 Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL,
1192 const Twine &Name = "")
1193 : VPInstruction(Opcode, Operands, Flags, DL, Name), ResultTy(ResultTy) {}
1194
1195 static inline bool classof(const VPRecipeBase *R) {
1196 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1197 // type information.
1198 if (R->isScalarCast())
1199 return true;
1200 auto *VPI = dyn_cast<VPInstruction>(R);
1201 if (!VPI)
1202 return false;
1203 switch (VPI->getOpcode()) {
1207 return true;
1208 default:
1209 return false;
1210 }
1211 }
1212
1213 static inline bool classof(const VPUser *R) {
1214 return isa<VPInstructionWithType>(cast<VPRecipeBase>(R));
1215 }
1216
1217 VPInstruction *clone() override {
1219 auto *New =
1221 getDebugLoc(), getName());
1222 New->setUnderlyingValue(getUnderlyingValue());
1223 return New;
1224 }
1225
1226 void execute(VPTransformState &State) override;
1227
1228 /// Return the cost of this VPInstruction.
1230 VPCostContext &Ctx) const override {
1231 // TODO: Compute accurate cost after retiring the legacy cost model.
1232 return 0;
1233 }
1234
1235 Type *getResultType() const { return ResultTy; }
1236
1237#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1238 /// Print the recipe.
1239 void print(raw_ostream &O, const Twine &Indent,
1240 VPSlotTracker &SlotTracker) const override;
1241#endif
1242};
1243
1244/// Helper type to provide functions to access incoming values and blocks for
1245/// phi-like recipes.
1247protected:
1248 /// Return a VPRecipeBase* to the current object.
1249 virtual const VPRecipeBase *getAsRecipe() const = 0;
1250
1251public:
1252 virtual ~VPPhiAccessors() = default;
1253
1254 /// Returns the incoming VPValue with index \p Idx.
1255 VPValue *getIncomingValue(unsigned Idx) const {
1256 return getAsRecipe()->getOperand(Idx);
1257 }
1258
1259 /// Returns the incoming block with index \p Idx.
1260 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1261
1262 /// Returns the number of incoming values, also number of incoming blocks.
1263 virtual unsigned getNumIncoming() const {
1264 return getAsRecipe()->getNumOperands();
1265 }
1266
1267 /// Returns an interator range over the incoming values.
1269 return make_range(getAsRecipe()->op_begin(),
1270 getAsRecipe()->op_begin() + getNumIncoming());
1271 }
1272
1274 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1275
1276 /// Returns an iterator range over the incoming blocks.
1278 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1279 return getIncomingBlock(Idx);
1280 };
1281 return map_range(index_range(0, getNumIncoming()), GetBlock);
1282 }
1283
1284 /// Returns an iterator range over pairs of incoming values and corresponding
1285 /// incoming blocks.
1290 }
1291
1292 /// Removes the incoming value for \p IncomingBlock, which must be a
1293 /// predecessor.
1294 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1295
1296#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1297 /// Print the recipe.
1299#endif
1300};
1301
1305
1306 static inline bool classof(const VPUser *U) {
1307 auto *VPI = dyn_cast<VPInstruction>(U);
1308 return VPI && VPI->getOpcode() == Instruction::PHI;
1309 }
1310
1311 static inline bool classof(const VPValue *V) {
1312 auto *VPI = dyn_cast<VPInstruction>(V);
1313 return VPI && VPI->getOpcode() == Instruction::PHI;
1314 }
1315
1316 static inline bool classof(const VPSingleDefRecipe *SDR) {
1317 auto *VPI = dyn_cast<VPInstruction>(SDR);
1318 return VPI && VPI->getOpcode() == Instruction::PHI;
1319 }
1320
1321 VPPhi *clone() override {
1322 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1323 PhiR->setUnderlyingValue(getUnderlyingValue());
1324 return PhiR;
1325 }
1326
1327 void execute(VPTransformState &State) override;
1328
1329#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1330 /// Print the recipe.
1331 void print(raw_ostream &O, const Twine &Indent,
1332 VPSlotTracker &SlotTracker) const override;
1333#endif
1334
1335protected:
1336 const VPRecipeBase *getAsRecipe() const override { return this; }
1337};
1338
1339/// A recipe to wrap on original IR instruction not to be modified during
1340/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1341/// Expect PHIs, VPIRInstructions cannot have any operands.
1343 Instruction &I;
1344
1345protected:
1346 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1347 /// subclasses may need to be created, e.g. VPIRPhi.
1349 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1350
1351public:
1352 ~VPIRInstruction() override = default;
1353
1354 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1355 /// VPIRInstruction.
1357
1358 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1359
1361 auto *R = create(I);
1362 for (auto *Op : operands())
1363 R->addOperand(Op);
1364 return R;
1365 }
1366
1367 void execute(VPTransformState &State) override;
1368
1369 /// Return the cost of this VPIRInstruction.
1371 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1372
1373 Instruction &getInstruction() const { return I; }
1374
1375#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1376 /// Print the recipe.
1377 void print(raw_ostream &O, const Twine &Indent,
1378 VPSlotTracker &SlotTracker) const override;
1379#endif
1380
1381 bool usesScalars(const VPValue *Op) const override {
1383 "Op must be an operand of the recipe");
1384 return true;
1385 }
1386
1387 bool onlyFirstPartUsed(const VPValue *Op) const override {
1389 "Op must be an operand of the recipe");
1390 return true;
1391 }
1392
1393 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1395 "Op must be an operand of the recipe");
1396 return true;
1397 }
1398
1399 /// Update the recipes first operand to the last lane of the operand using \p
1400 /// Builder. Must only be used for VPIRInstructions with at least one operand
1401 /// wrapping a PHINode.
1403};
1404
1405/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1406/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1407/// allowed, and it is used to add a new incoming value for the single
1408/// predecessor VPBB.
1410 public VPPhiAccessors {
1412
1413 static inline bool classof(const VPRecipeBase *U) {
1414 auto *R = dyn_cast<VPIRInstruction>(U);
1415 return R && isa<PHINode>(R->getInstruction());
1416 }
1417
1418 PHINode &getIRPhi() { return cast<PHINode>(getInstruction()); }
1419
1420 void execute(VPTransformState &State) override;
1421
1422#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1423 /// Print the recipe.
1424 void print(raw_ostream &O, const Twine &Indent,
1425 VPSlotTracker &SlotTracker) const override;
1426#endif
1427
1428protected:
1429 const VPRecipeBase *getAsRecipe() const override { return this; }
1430};
1431
1432/// VPWidenRecipe is a recipe for producing a widened instruction using the
1433/// opcode and operands of the recipe. This recipe covers most of the
1434/// traditional vectorization cases where each recipe transforms into a
1435/// vectorized version of itself.
1437 public VPIRMetadata {
1438 unsigned Opcode;
1439
1440public:
1442 const VPIRFlags &Flags, const VPIRMetadata &Metadata,
1443 DebugLoc DL)
1444 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1445 VPIRMetadata(Metadata), Opcode(Opcode) {}
1446
1449 Opcode(I.getOpcode()) {}
1450
1451 ~VPWidenRecipe() override = default;
1452
1453 VPWidenRecipe *clone() override {
1454 auto *R =
1455 new VPWidenRecipe(getOpcode(), operands(), *this, *this, getDebugLoc());
1456 R->setUnderlyingValue(getUnderlyingValue());
1457 return R;
1458 }
1459
1460 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1461
1462 /// Produce a widened instruction using the opcode and operands of the recipe,
1463 /// processing State.VF elements.
1464 void execute(VPTransformState &State) override;
1465
1466 /// Return the cost of this VPWidenRecipe.
1467 InstructionCost computeCost(ElementCount VF,
1468 VPCostContext &Ctx) const override;
1469
1470 unsigned getOpcode() const { return Opcode; }
1471
1472#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1473 /// Print the recipe.
1474 void print(raw_ostream &O, const Twine &Indent,
1475 VPSlotTracker &SlotTracker) const override;
1476#endif
1477};
1478
1479/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1481 /// Cast instruction opcode.
1482 Instruction::CastOps Opcode;
1483
1484 /// Result type for the cast.
1485 Type *ResultTy;
1486
1487public:
1489 CastInst &UI)
1490 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPIRMetadata(UI),
1491 Opcode(Opcode), ResultTy(ResultTy) {
1492 assert(UI.getOpcode() == Opcode &&
1493 "opcode of underlying cast doesn't match");
1494 }
1495
1497 const VPIRFlags &Flags = {},
1499 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1500 VPIRMetadata(), Opcode(Opcode), ResultTy(ResultTy) {
1501 assert(flagsValidForOpcode(Opcode) &&
1502 "Set flags not supported for the provided opcode");
1503 }
1504
1505 ~VPWidenCastRecipe() override = default;
1506
1508 if (auto *UV = getUnderlyingValue())
1509 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1510 *cast<CastInst>(UV));
1511
1512 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1513 }
1514
1515 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1516
1517 /// Produce widened copies of the cast.
1518 void execute(VPTransformState &State) override;
1519
1520 /// Return the cost of this VPWidenCastRecipe.
1522 VPCostContext &Ctx) const override;
1523
1524#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1525 /// Print the recipe.
1526 void print(raw_ostream &O, const Twine &Indent,
1527 VPSlotTracker &SlotTracker) const override;
1528#endif
1529
1530 Instruction::CastOps getOpcode() const { return Opcode; }
1531
1532 /// Returns the result type of the cast.
1533 Type *getResultType() const { return ResultTy; }
1534};
1535
1536/// A recipe for widening vector intrinsics.
1538 /// ID of the vector intrinsic to widen.
1539 Intrinsic::ID VectorIntrinsicID;
1540
1541 /// Scalar return type of the intrinsic.
1542 Type *ResultTy;
1543
1544 /// True if the intrinsic may read from memory.
1545 bool MayReadFromMemory;
1546
1547 /// True if the intrinsic may read write to memory.
1548 bool MayWriteToMemory;
1549
1550 /// True if the intrinsic may have side-effects.
1551 bool MayHaveSideEffects;
1552
1553public:
1555 ArrayRef<VPValue *> CallArguments, Type *Ty,
1557 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1558 VPIRMetadata(CI), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1559 MayReadFromMemory(CI.mayReadFromMemory()),
1560 MayWriteToMemory(CI.mayWriteToMemory()),
1561 MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1562
1564 ArrayRef<VPValue *> CallArguments, Type *Ty,
1566 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
1567 VPIRMetadata(), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
1568 LLVMContext &Ctx = Ty->getContext();
1569 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1570 MemoryEffects ME = Attrs.getMemoryEffects();
1571 MayReadFromMemory = !ME.onlyWritesMemory();
1572 MayWriteToMemory = !ME.onlyReadsMemory();
1573 MayHaveSideEffects = MayWriteToMemory ||
1574 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1575 !Attrs.hasAttribute(Attribute::WillReturn);
1576 }
1577
1578 ~VPWidenIntrinsicRecipe() override = default;
1579
1581 if (Value *CI = getUnderlyingValue())
1582 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1583 operands(), ResultTy, getDebugLoc());
1584 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1585 getDebugLoc());
1586 }
1587
1588 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1589
1590 /// Produce a widened version of the vector intrinsic.
1591 void execute(VPTransformState &State) override;
1592
1593 /// Return the cost of this vector intrinsic.
1595 VPCostContext &Ctx) const override;
1596
1597 /// Return the ID of the intrinsic.
1598 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1599
1600 /// Return the scalar return type of the intrinsic.
1601 Type *getResultType() const { return ResultTy; }
1602
1603 /// Return to name of the intrinsic as string.
1605
1606 /// Returns true if the intrinsic may read from memory.
1607 bool mayReadFromMemory() const { return MayReadFromMemory; }
1608
1609 /// Returns true if the intrinsic may write to memory.
1610 bool mayWriteToMemory() const { return MayWriteToMemory; }
1611
1612 /// Returns true if the intrinsic may have side-effects.
1613 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1614
1615#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1616 /// Print the recipe.
1617 void print(raw_ostream &O, const Twine &Indent,
1618 VPSlotTracker &SlotTracker) const override;
1619#endif
1620
1621 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1622};
1623
1624/// A recipe for widening Call instructions using library calls.
1626 public VPIRMetadata {
1627 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1628 /// between a given VF and the chosen vectorized variant, so there will be a
1629 /// different VPlan for each VF with a valid variant.
1630 Function *Variant;
1631
1632public:
1634 ArrayRef<VPValue *> CallArguments,
1635 DebugLoc DL = DebugLoc::getUnknown())
1636 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1637 *cast<Instruction>(UV)),
1638 VPIRMetadata(*cast<Instruction>(UV)), Variant(Variant) {
1639 assert(
1640 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1641 "last operand must be the called function");
1642 }
1643
1644 ~VPWidenCallRecipe() override = default;
1645
1647 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1648 getDebugLoc());
1649 }
1650
1651 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1652
1653 /// Produce a widened version of the call instruction.
1654 void execute(VPTransformState &State) override;
1655
1656 /// Return the cost of this VPWidenCallRecipe.
1657 InstructionCost computeCost(ElementCount VF,
1658 VPCostContext &Ctx) const override;
1659
1661 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1662 }
1663
1664 operand_range args() { return drop_end(operands()); }
1665 const_operand_range args() const { return drop_end(operands()); }
1666
1667#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1668 /// Print the recipe.
1669 void print(raw_ostream &O, const Twine &Indent,
1670 VPSlotTracker &SlotTracker) const override;
1671#endif
1672};
1673
1674/// A recipe representing a sequence of load -> update -> store as part of
1675/// a histogram operation. This means there may be aliasing between vector
1676/// lanes, which is handled by the llvm.experimental.vector.histogram family
1677/// of intrinsics. The only update operations currently supported are
1678/// 'add' and 'sub' where the other term is loop-invariant.
1680 /// Opcode of the update operation, currently either add or sub.
1681 unsigned Opcode;
1682
1683public:
1684 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1686 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1687
1688 ~VPHistogramRecipe() override = default;
1689
1691 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1692 }
1693
1694 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1695
1696 /// Produce a vectorized histogram operation.
1697 void execute(VPTransformState &State) override;
1698
1699 /// Return the cost of this VPHistogramRecipe.
1701 VPCostContext &Ctx) const override;
1702
1703 unsigned getOpcode() const { return Opcode; }
1704
1705 /// Return the mask operand if one was provided, or a null pointer if all
1706 /// lanes should be executed unconditionally.
1707 VPValue *getMask() const {
1708 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1709 }
1710
1711#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1712 /// Print the recipe
1713 void print(raw_ostream &O, const Twine &Indent,
1714 VPSlotTracker &SlotTracker) const override;
1715#endif
1716};
1717
1718/// A recipe for widening select instructions.
1720 public VPIRMetadata {
1722 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I),
1723 VPIRMetadata(I) {}
1724
1725 ~VPWidenSelectRecipe() override = default;
1726
1728 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1729 operands());
1730 }
1731
1732 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1733
1734 /// Produce a widened version of the select instruction.
1735 void execute(VPTransformState &State) override;
1736
1737 /// Return the cost of this VPWidenSelectRecipe.
1738 InstructionCost computeCost(ElementCount VF,
1739 VPCostContext &Ctx) const override;
1740
1741#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1742 /// Print the recipe.
1743 void print(raw_ostream &O, const Twine &Indent,
1744 VPSlotTracker &SlotTracker) const override;
1745#endif
1746
1747 unsigned getOpcode() const { return Instruction::Select; }
1748
1749 VPValue *getCond() const {
1750 return getOperand(0);
1751 }
1752
1753 bool isInvariantCond() const {
1754 return getCond()->isDefinedOutsideLoopRegions();
1755 }
1756
1757 /// Returns true if the recipe only uses the first lane of operand \p Op.
1758 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1759 assert(is_contained(operands(), Op) &&
1760 "Op must be an operand of the recipe");
1761 return Op == getCond() && isInvariantCond();
1762 }
1763};
1764
1765/// A recipe for handling GEP instructions.
1767 bool isPointerLoopInvariant() const {
1768 return getOperand(0)->isDefinedOutsideLoopRegions();
1769 }
1770
1771 bool isIndexLoopInvariant(unsigned I) const {
1772 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1773 }
1774
1775 bool areAllOperandsInvariant() const {
1776 return all_of(operands(), [](VPValue *Op) {
1777 return Op->isDefinedOutsideLoopRegions();
1778 });
1779 }
1780
1781public:
1783 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {
1785 (void)Metadata;
1787 assert(Metadata.empty() && "unexpected metadata on GEP");
1788 }
1789
1790 ~VPWidenGEPRecipe() override = default;
1791
1793 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1794 operands());
1795 }
1796
1797 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1798
1799 /// Generate the gep nodes.
1800 void execute(VPTransformState &State) override;
1801
1802 /// Return the cost of this VPWidenGEPRecipe.
1804 VPCostContext &Ctx) const override {
1805 // TODO: Compute accurate cost after retiring the legacy cost model.
1806 return 0;
1807 }
1808
1809#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1810 /// Print the recipe.
1811 void print(raw_ostream &O, const Twine &Indent,
1812 VPSlotTracker &SlotTracker) const override;
1813#endif
1814
1815 /// Returns true if the recipe only uses the first lane of operand \p Op.
1816 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1817 assert(is_contained(operands(), Op) &&
1818 "Op must be an operand of the recipe");
1819 if (Op == getOperand(0))
1820 return isPointerLoopInvariant();
1821 else
1822 return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions();
1823 }
1824};
1825
1826/// A recipe to compute a pointer to the last element of each part of a widened
1827/// memory access for widened memory accesses of IndexedTy. Used for
1828/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1830 public VPUnrollPartAccessor<2> {
1831 Type *IndexedTy;
1832
1833 /// The constant stride of the pointer computed by this recipe, expressed in
1834 /// units of IndexedTy.
1835 int64_t Stride;
1836
1837public:
1839 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1840 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1841 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1842 IndexedTy(IndexedTy), Stride(Stride) {
1843 assert(Stride < 0 && "Stride must be negative");
1844 }
1845
1846 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1847
1849 const VPValue *getVFValue() const { return getOperand(1); }
1850
1851 void execute(VPTransformState &State) override;
1852
1853 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1855 "Op must be an operand of the recipe");
1856 return true;
1857 }
1858
1859 /// Return the cost of this VPVectorPointerRecipe.
1861 VPCostContext &Ctx) const override {
1862 // TODO: Compute accurate cost after retiring the legacy cost model.
1863 return 0;
1864 }
1865
1866 /// Returns true if the recipe only uses the first part of operand \p Op.
1867 bool onlyFirstPartUsed(const VPValue *Op) const override {
1869 "Op must be an operand of the recipe");
1870 assert(getNumOperands() <= 2 && "must have at most two operands");
1871 return true;
1872 }
1873
1875 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1876 Stride, getGEPNoWrapFlags(),
1877 getDebugLoc());
1878 }
1879
1880#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1881 /// Print the recipe.
1882 void print(raw_ostream &O, const Twine &Indent,
1883 VPSlotTracker &SlotTracker) const override;
1884#endif
1885};
1886
1887/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1889 public VPUnrollPartAccessor<1> {
1890 Type *IndexedTy;
1891
1892public:
1894 DebugLoc DL)
1895 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1896 GEPFlags, DL),
1897 IndexedTy(IndexedTy) {}
1898
1899 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1900
1901 void execute(VPTransformState &State) override;
1902
1903 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1905 "Op must be an operand of the recipe");
1906 return true;
1907 }
1908
1909 /// Returns true if the recipe only uses the first part of operand \p Op.
1910 bool onlyFirstPartUsed(const VPValue *Op) const override {
1912 "Op must be an operand of the recipe");
1913 assert(getNumOperands() <= 2 && "must have at most two operands");
1914 return true;
1915 }
1916
1918 return new VPVectorPointerRecipe(getOperand(0), IndexedTy,
1920 }
1921
1922 /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
1923 /// this is only accurate after the VPlan has been unrolled.
1924 bool isFirstPart() const { return getUnrollPart(*this) == 0; }
1925
1926 /// Return the cost of this VPHeaderPHIRecipe.
1928 VPCostContext &Ctx) const override {
1929 // TODO: Compute accurate cost after retiring the legacy cost model.
1930 return 0;
1931 }
1932
1933#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1934 /// Print the recipe.
1935 void print(raw_ostream &O, const Twine &Indent,
1936 VPSlotTracker &SlotTracker) const override;
1937#endif
1938};
1939
1940/// A pure virtual base class for all recipes modeling header phis, including
1941/// phis for first order recurrences, pointer inductions and reductions. The
1942/// start value is the first operand of the recipe and the incoming value from
1943/// the backedge is the second operand.
1944///
1945/// Inductions are modeled using the following sub-classes:
1946/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1947/// starting at a specified value (zero for the main vector loop, the resume
1948/// value for the epilogue vector loop) and stepping by 1. The induction
1949/// controls exiting of the vector loop by comparing against the vector trip
1950/// count. Produces a single scalar PHI for the induction value per
1951/// iteration.
1952/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1953/// floating point inductions with arbitrary start and step values. Produces
1954/// a vector PHI per-part.
1955/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1956/// value of an IV with different start and step values. Produces a single
1957/// scalar value per iteration
1958/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1959/// canonical or derived induction.
1960/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1961/// pointer induction. Produces either a vector PHI per-part or scalar values
1962/// per-lane based on the canonical induction.
1964 public VPPhiAccessors {
1965protected:
1966 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1967 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
1968 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
1969 UnderlyingInstr, DL) {}
1970
1971 const VPRecipeBase *getAsRecipe() const override { return this; }
1972
1973public:
1974 ~VPHeaderPHIRecipe() override = default;
1975
1976 /// Method to support type inquiry through isa, cast, and dyn_cast.
1977 static inline bool classof(const VPRecipeBase *B) {
1978 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1979 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1980 }
1981 static inline bool classof(const VPValue *V) {
1982 auto *B = V->getDefiningRecipe();
1983 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1984 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1985 }
1986
1987 /// Generate the phi nodes.
1988 void execute(VPTransformState &State) override = 0;
1989
1990 /// Return the cost of this header phi recipe.
1991 InstructionCost computeCost(ElementCount VF,
1992 VPCostContext &Ctx) const override;
1993
1994#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1995 /// Print the recipe.
1996 void print(raw_ostream &O, const Twine &Indent,
1997 VPSlotTracker &SlotTracker) const override = 0;
1998#endif
1999
2000 /// Returns the start value of the phi, if one is set.
2002 return getNumOperands() == 0 ? nullptr : getOperand(0);
2003 }
2005 return getNumOperands() == 0 ? nullptr : getOperand(0);
2006 }
2007
2008 /// Update the start value of the recipe.
2009 void setStartValue(VPValue *V) { setOperand(0, V); }
2010
2011 /// Returns the incoming value from the loop backedge.
2013 return getOperand(1);
2014 }
2015
2016 /// Update the incoming value from the loop backedge.
2017 void setBackedgeValue(VPValue *V) { setOperand(1, V); }
2018
2019 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2020 /// to be a recipe.
2022 return *getBackedgeValue()->getDefiningRecipe();
2023 }
2024};
2025
2026/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2027/// VPWidenPointerInductionRecipe), providing shared functionality, including
2028/// retrieving the step value, induction descriptor and original phi node.
2030 const InductionDescriptor &IndDesc;
2031
2032public:
2033 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2034 VPValue *Step, const InductionDescriptor &IndDesc,
2035 DebugLoc DL)
2036 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2037 addOperand(Step);
2038 }
2039
2040 static inline bool classof(const VPRecipeBase *R) {
2041 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2042 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2043 }
2044
2045 static inline bool classof(const VPValue *V) {
2046 auto *R = V->getDefiningRecipe();
2047 return R && classof(R);
2048 }
2049
2050 static inline bool classof(const VPHeaderPHIRecipe *R) {
2051 return classof(static_cast<const VPRecipeBase *>(R));
2052 }
2053
2054 virtual void execute(VPTransformState &State) override = 0;
2055
2056 /// Returns the step value of the induction.
2058 const VPValue *getStepValue() const { return getOperand(1); }
2059
2060 /// Update the step value of the recipe.
2061 void setStepValue(VPValue *V) { setOperand(1, V); }
2062
2064 const VPValue *getVFValue() const { return getOperand(2); }
2065
2066 /// Returns the number of incoming values, also number of incoming blocks.
2067 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2068 /// incoming value, its start value.
2069 unsigned getNumIncoming() const override { return 1; }
2070
2071 PHINode *getPHINode() const { return cast<PHINode>(getUnderlyingValue()); }
2072
2073 /// Returns the induction descriptor for the recipe.
2074 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2075
2077 // TODO: All operands of base recipe must exist and be at same index in
2078 // derived recipe.
2080 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2081 }
2082
2084 // TODO: All operands of base recipe must exist and be at same index in
2085 // derived recipe.
2087 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2088 }
2089
2090 /// Returns true if the recipe only uses the first lane of operand \p Op.
2091 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2093 "Op must be an operand of the recipe");
2094 // The recipe creates its own wide start value, so it only requests the
2095 // first lane of the operand.
2096 // TODO: Remove once creating the start value is modeled separately.
2097 return Op == getStartValue() || Op == getStepValue();
2098 }
2099};
2100
2101/// A recipe for handling phi nodes of integer and floating-point inductions,
2102/// producing their vector values. This is an abstract recipe and must be
2103/// converted to concrete recipes before executing.
2105 TruncInst *Trunc;
2106
2107 // If this recipe is unrolled it will have 2 additional operands.
2108 bool isUnrolled() const { return getNumOperands() == 5; }
2109
2110public:
2112 VPValue *VF, const InductionDescriptor &IndDesc,
2113 DebugLoc DL)
2114 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2115 Step, IndDesc, DL),
2116 Trunc(nullptr) {
2117 addOperand(VF);
2118 }
2119
2121 VPValue *VF, const InductionDescriptor &IndDesc,
2122 TruncInst *Trunc, DebugLoc DL)
2123 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2124 Step, IndDesc, DL),
2125 Trunc(Trunc) {
2126 addOperand(VF);
2128 (void)Metadata;
2129 if (Trunc)
2131 assert(Metadata.empty() && "unexpected metadata on Trunc");
2132 }
2133
2135
2140 }
2141
2142 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2143
2144 void execute(VPTransformState &State) override {
2145 llvm_unreachable("cannot execute this recipe, should be expanded via "
2146 "expandVPWidenIntOrFpInductionRecipe");
2147 }
2148
2149#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2150 /// Print the recipe.
2151 void print(raw_ostream &O, const Twine &Indent,
2152 VPSlotTracker &SlotTracker) const override;
2153#endif
2154
2156 // If the recipe has been unrolled return the VPValue for the induction
2157 // increment.
2158 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2159 }
2160
2161 /// Returns the number of incoming values, also number of incoming blocks.
2162 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2163 /// incoming value, its start value.
2164 unsigned getNumIncoming() const override { return 1; }
2165
2166 /// Returns the first defined value as TruncInst, if it is one or nullptr
2167 /// otherwise.
2168 TruncInst *getTruncInst() { return Trunc; }
2169 const TruncInst *getTruncInst() const { return Trunc; }
2170
2171 /// Returns true if the induction is canonical, i.e. starting at 0 and
2172 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2173 /// same type as the canonical induction.
2174 bool isCanonical() const;
2175
2176 /// Returns the scalar type of the induction.
2178 return Trunc ? Trunc->getType()
2180 }
2181
2182 /// Returns the VPValue representing the value of this induction at
2183 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2184 /// take place.
2186 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2187 }
2188};
2189
2191 bool IsScalarAfterVectorization;
2192
2193public:
2194 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2195 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2196 /// VF*UF.
2198 VPValue *NumUnrolledElems,
2199 const InductionDescriptor &IndDesc,
2200 bool IsScalarAfterVectorization, DebugLoc DL)
2201 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2202 Step, IndDesc, DL),
2203 IsScalarAfterVectorization(IsScalarAfterVectorization) {
2204 addOperand(NumUnrolledElems);
2205 }
2206
2208
2211 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
2212 getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization,
2213 getDebugLoc());
2214 }
2215
2216 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2217
2218 /// Generate vector values for the pointer induction.
2219 void execute(VPTransformState &State) override {
2220 llvm_unreachable("cannot execute this recipe, should be expanded via "
2221 "expandVPWidenPointerInduction");
2222 };
2223
2224 /// Returns true if only scalar values will be generated.
2225 bool onlyScalarsGenerated(bool IsScalable);
2226
2227#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2228 /// Print the recipe.
2229 void print(raw_ostream &O, const Twine &Indent,
2230 VPSlotTracker &SlotTracker) const override;
2231#endif
2232};
2233
2234/// A recipe for widened phis. Incoming values are operands of the recipe and
2235/// their operand index corresponds to the incoming predecessor block. If the
2236/// recipe is placed in an entry block to a (non-replicate) region, it must have
2237/// exactly 2 incoming values, the first from the predecessor of the region and
2238/// the second from the exiting block of the region.
2240 public VPPhiAccessors {
2241 /// Name to use for the generated IR instruction for the widened phi.
2242 std::string Name;
2243
2244protected:
2245 const VPRecipeBase *getAsRecipe() const override { return this; }
2246
2247public:
2248 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2249 /// debug location \p DL.
2250 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2251 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2252 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL),
2253 Name(Name.str()) {
2254 if (Start)
2255 addOperand(Start);
2256 }
2257
2259 auto *C = new VPWidenPHIRecipe(cast<PHINode>(getUnderlyingValue()),
2260 getOperand(0), getDebugLoc(), Name);
2261 for (VPValue *Op : llvm::drop_begin(operands()))
2262 C->addOperand(Op);
2263 return C;
2264 }
2265
2266 ~VPWidenPHIRecipe() override = default;
2267
2268 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2269
2270 /// Generate the phi/select nodes.
2271 void execute(VPTransformState &State) override;
2272
2273#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2274 /// Print the recipe.
2275 void print(raw_ostream &O, const Twine &Indent,
2276 VPSlotTracker &SlotTracker) const override;
2277#endif
2278};
2279
2280/// A recipe for handling first-order recurrence phis. The start value is the
2281/// first operand of the recipe and the incoming value from the backedge is the
2282/// second operand.
2285 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2286
2287 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2288
2291 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
2292 }
2293
2294 void execute(VPTransformState &State) override;
2295
2296 /// Return the cost of this first-order recurrence phi recipe.
2298 VPCostContext &Ctx) const override;
2299
2300#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2301 /// Print the recipe.
2302 void print(raw_ostream &O, const Twine &Indent,
2303 VPSlotTracker &SlotTracker) const override;
2304#endif
2305
2306 /// Returns true if the recipe only uses the first lane of operand \p Op.
2307 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2309 "Op must be an operand of the recipe");
2310 return Op == getStartValue();
2311 }
2312};
2313
2314/// A recipe for handling reduction phis. The start value is the first operand
2315/// of the recipe and the incoming value from the backedge is the second
2316/// operand.
2318 public VPUnrollPartAccessor<2> {
2319 /// The recurrence kind of the reduction.
2320 const RecurKind Kind;
2321
2322 /// The phi is part of an in-loop reduction.
2323 bool IsInLoop;
2324
2325 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2326 bool IsOrdered;
2327
2328 /// When expanding the reduction PHI, the plan's VF element count is divided
2329 /// by this factor to form the reduction phi's VF.
2330 unsigned VFScaleFactor = 1;
2331
2332public:
2333 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2335 bool IsInLoop = false, bool IsOrdered = false,
2336 unsigned VFScaleFactor = 1)
2337 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2338 IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
2339 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2340 }
2341
2342 ~VPReductionPHIRecipe() override = default;
2343
2345 auto *R = new VPReductionPHIRecipe(
2346 dyn_cast_or_null<PHINode>(getUnderlyingValue()), getRecurrenceKind(),
2347 *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor);
2348 R->addOperand(getBackedgeValue());
2349 return R;
2350 }
2351
2352 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2353
2354 /// Generate the phi/select nodes.
2355 void execute(VPTransformState &State) override;
2356
2357 /// Get the factor that the VF of this recipe's output should be scaled by.
2358 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2359
2360#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2361 /// Print the recipe.
2362 void print(raw_ostream &O, const Twine &Indent,
2363 VPSlotTracker &SlotTracker) const override;
2364#endif
2365
2366 /// Returns the number of incoming values, also number of incoming blocks.
2367 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2368 /// incoming value, its start value.
2369 unsigned getNumIncoming() const override { return 2; }
2370
2371 /// Returns the recurrence kind of the reduction.
2372 RecurKind getRecurrenceKind() const { return Kind; }
2373
2374 /// Returns true, if the phi is part of an ordered reduction.
2375 bool isOrdered() const { return IsOrdered; }
2376
2377 /// Returns true, if the phi is part of an in-loop reduction.
2378 bool isInLoop() const { return IsInLoop; }
2379
2380 /// Returns true if the recipe only uses the first lane of operand \p Op.
2381 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2383 "Op must be an operand of the recipe");
2384 return isOrdered() || isInLoop();
2385 }
2386};
2387
2388/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2389/// instructions.
2391public:
2392 /// The blend operation is a User of the incoming values and of their
2393 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2394 /// be omitted (implied by passing an odd number of operands) in which case
2395 /// all other incoming values are merged into it.
2397 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2398 assert(Operands.size() > 0 && "Expected at least one operand!");
2399 }
2400
2401 VPBlendRecipe *clone() override {
2402 return new VPBlendRecipe(cast_or_null<PHINode>(getUnderlyingValue()),
2403 operands(), getDebugLoc());
2404 }
2405
2406 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2407
2408 /// A normalized blend is one that has an odd number of operands, whereby the
2409 /// first operand does not have an associated mask.
2410 bool isNormalized() const { return getNumOperands() % 2; }
2411
2412 /// Return the number of incoming values, taking into account when normalized
2413 /// the first incoming value will have no mask.
2414 unsigned getNumIncomingValues() const {
2415 return (getNumOperands() + isNormalized()) / 2;
2416 }
2417
2418 /// Return incoming value number \p Idx.
2419 VPValue *getIncomingValue(unsigned Idx) const {
2420 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2421 }
2422
2423 /// Return mask number \p Idx.
2424 VPValue *getMask(unsigned Idx) const {
2425 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2426 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2427 }
2428
2429 void execute(VPTransformState &State) override {
2430 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2431 }
2432
2433 /// Return the cost of this VPWidenMemoryRecipe.
2434 InstructionCost computeCost(ElementCount VF,
2435 VPCostContext &Ctx) const override;
2436
2437#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2438 /// Print the recipe.
2439 void print(raw_ostream &O, const Twine &Indent,
2440 VPSlotTracker &SlotTracker) const override;
2441#endif
2442
2443 /// Returns true if the recipe only uses the first lane of operand \p Op.
2444 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2445 assert(is_contained(operands(), Op) &&
2446 "Op must be an operand of the recipe");
2447 // Recursing through Blend recipes only, must terminate at header phi's the
2448 // latest.
2449 return all_of(users(),
2450 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2451 }
2452};
2453
2454/// A common base class for interleaved memory operations.
2455/// An Interleaved memory operation is a memory access method that combines
2456/// multiple strided loads/stores into a single wide load/store with shuffles.
2457/// The first operand is the start address. The optional operands are, in order,
2458/// the stored values and the mask.
2460 public VPIRMetadata {
2462
2463 /// Indicates if the interleave group is in a conditional block and requires a
2464 /// mask.
2465 bool HasMask = false;
2466
2467 /// Indicates if gaps between members of the group need to be masked out or if
2468 /// unusued gaps can be loaded speculatively.
2469 bool NeedsMaskForGaps = false;
2470
2471protected:
2472 VPInterleaveBase(const unsigned char SC,
2475 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2476 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2477 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2478 NeedsMaskForGaps(NeedsMaskForGaps) {
2479 // TODO: extend the masked interleaved-group support to reversed access.
2480 assert((!Mask || !IG->isReverse()) &&
2481 "Reversed masked interleave-group not supported.");
2482 for (unsigned I = 0; I < IG->getFactor(); ++I)
2483 if (Instruction *Inst = IG->getMember(I)) {
2484 if (Inst->getType()->isVoidTy())
2485 continue;
2486 new VPValue(Inst, this);
2487 }
2488
2489 for (auto *SV : StoredValues)
2490 addOperand(SV);
2491 if (Mask) {
2492 HasMask = true;
2493 addOperand(Mask);
2494 }
2495 }
2496
2497public:
2498 VPInterleaveBase *clone() override = 0;
2499
2500 static inline bool classof(const VPRecipeBase *R) {
2501 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2502 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2503 }
2504
2505 static inline bool classof(const VPUser *U) {
2506 auto *R = dyn_cast<VPRecipeBase>(U);
2507 return R && classof(R);
2508 }
2509
2510 /// Return the address accessed by this recipe.
2511 VPValue *getAddr() const {
2512 return getOperand(0); // Address is the 1st, mandatory operand.
2513 }
2514
2515 /// Return the mask used by this recipe. Note that a full mask is represented
2516 /// by a nullptr.
2517 VPValue *getMask() const {
2518 // Mask is optional and the last operand.
2519 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2520 }
2521
2522 /// Return true if the access needs a mask because of the gaps.
2523 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2524
2526
2527 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2528
2529 void execute(VPTransformState &State) override {
2530 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2531 }
2532
2533 /// Return the cost of this recipe.
2534 InstructionCost computeCost(ElementCount VF,
2535 VPCostContext &Ctx) const override;
2536
2537 /// Returns true if the recipe only uses the first lane of operand \p Op.
2538 virtual bool onlyFirstLaneUsed(const VPValue *Op) const override = 0;
2539
2540 /// Returns the number of stored operands of this interleave group. Returns 0
2541 /// for load interleave groups.
2542 virtual unsigned getNumStoreOperands() const = 0;
2543
2544 /// Return the VPValues stored by this interleave group. If it is a load
2545 /// interleave group, return an empty ArrayRef.
2547 return ArrayRef<VPValue *>(op_end() -
2548 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2549 getNumStoreOperands());
2550 }
2551};
2552
2553/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2554/// or stores into one wide load/store and shuffles. The first operand of a
2555/// VPInterleave recipe is the address, followed by the stored values, followed
2556/// by an optional mask.
2558public:
2560 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2561 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2562 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2563 NeedsMaskForGaps, MD, DL) {}
2564
2565 ~VPInterleaveRecipe() override = default;
2566
2568 return new VPInterleaveRecipe(getInterleaveGroup(), getAddr(),
2569 getStoredValues(), getMask(),
2570 needsMaskForGaps(), *this, getDebugLoc());
2571 }
2572
2573 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2574
2575 /// Generate the wide load or store, and shuffles.
2576 void execute(VPTransformState &State) override;
2577
2578#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2579 /// Print the recipe.
2580 void print(raw_ostream &O, const Twine &Indent,
2581 VPSlotTracker &SlotTracker) const override;
2582#endif
2583
2584 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2585 assert(is_contained(operands(), Op) &&
2586 "Op must be an operand of the recipe");
2587 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2588 }
2589
2590 unsigned getNumStoreOperands() const override {
2591 return getNumOperands() - (getMask() ? 2 : 1);
2592 }
2593};
2594
2595/// A recipe for interleaved memory operations with vector-predication
2596/// intrinsics. The first operand is the address, the second operand is the
2597/// explicit vector length. Stored values and mask are optional operands.
2599public:
2601 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2602 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2603 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2604 R.getDebugLoc()) {
2605 assert(!getInterleaveGroup()->isReverse() &&
2606 "Reversed interleave-group with tail folding is not supported.");
2607 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2608 "supported for scalable vector.");
2609 }
2610
2611 ~VPInterleaveEVLRecipe() override = default;
2612
2614 llvm_unreachable("cloning not implemented yet");
2615 }
2616
2617 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2618
2619 /// The VPValue of the explicit vector length.
2620 VPValue *getEVL() const { return getOperand(1); }
2621
2622 /// Generate the wide load or store, and shuffles.
2623 void execute(VPTransformState &State) override;
2624
2625#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2626 /// Print the recipe.
2627 void print(raw_ostream &O, const Twine &Indent,
2628 VPSlotTracker &SlotTracker) const override;
2629#endif
2630
2631 /// The recipe only uses the first lane of the address, and EVL operand.
2632 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2633 assert(is_contained(operands(), Op) &&
2634 "Op must be an operand of the recipe");
2635 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2636 Op == getEVL();
2637 }
2638
2639 unsigned getNumStoreOperands() const override {
2640 return getNumOperands() - (getMask() ? 3 : 2);
2641 }
2642};
2643
2644/// A recipe to represent inloop reduction operations, performing a reduction on
2645/// a vector operand into a scalar value, and adding the result to a chain.
2646/// The Operands are {ChainOp, VecOp, [Condition]}.
2648 /// The recurrence kind for the reduction in question.
2649 RecurKind RdxKind;
2650 bool IsOrdered;
2651 /// Whether the reduction is conditional.
2652 bool IsConditional = false;
2653
2654protected:
2655 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2658 bool IsOrdered, DebugLoc DL)
2659 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2660 IsOrdered(IsOrdered) {
2661 if (CondOp) {
2662 IsConditional = true;
2663 addOperand(CondOp);
2664 }
2665 setUnderlyingValue(I);
2666 }
2667
2668public:
2670 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2671 bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2672 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2673 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2674 IsOrdered, DL) {}
2675
2677 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2678 bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2679 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2680 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2681 IsOrdered, DL) {}
2682
2683 ~VPReductionRecipe() override = default;
2684
2686 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2687 getUnderlyingInstr(), getChainOp(), getVecOp(),
2688 getCondOp(), IsOrdered, getDebugLoc());
2689 }
2690
2691 static inline bool classof(const VPRecipeBase *R) {
2692 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2693 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2694 }
2695
2696 static inline bool classof(const VPUser *U) {
2697 auto *R = dyn_cast<VPRecipeBase>(U);
2698 return R && classof(R);
2699 }
2700
2701 /// Generate the reduction in the loop.
2702 void execute(VPTransformState &State) override;
2703
2704 /// Return the cost of VPReductionRecipe.
2705 InstructionCost computeCost(ElementCount VF,
2706 VPCostContext &Ctx) const override;
2707
2708#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2709 /// Print the recipe.
2710 void print(raw_ostream &O, const Twine &Indent,
2711 VPSlotTracker &SlotTracker) const override;
2712#endif
2713
2714 /// Return the recurrence kind for the in-loop reduction.
2715 RecurKind getRecurrenceKind() const { return RdxKind; }
2716 /// Return true if the in-loop reduction is ordered.
2717 bool isOrdered() const { return IsOrdered; };
2718 /// Return true if the in-loop reduction is conditional.
2719 bool isConditional() const { return IsConditional; };
2720 /// The VPValue of the scalar Chain being accumulated.
2721 VPValue *getChainOp() const { return getOperand(0); }
2722 /// The VPValue of the vector value to be reduced.
2723 VPValue *getVecOp() const { return getOperand(1); }
2724 /// The VPValue of the condition for the block.
2726 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2727 }
2728};
2729
2730/// A recipe for forming partial reductions. In the loop, an accumulator and
2731/// vector operand are added together and passed to the next iteration as the
2732/// next accumulator. After the loop body, the accumulator is reduced to a
2733/// scalar value.
2735 unsigned Opcode;
2736
2737 /// The divisor by which the VF of this recipe's output should be divided
2738 /// during execution.
2739 unsigned VFScaleFactor;
2740
2741public:
2743 VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
2744 : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
2745 VFScaleFactor, ReductionInst) {}
2746 VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2747 VPValue *Cond, unsigned ScaleFactor,
2748 Instruction *ReductionInst = nullptr)
2749 : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
2750 FastMathFlags(), ReductionInst,
2751 ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
2752 Opcode(Opcode), VFScaleFactor(ScaleFactor) {
2753 [[maybe_unused]] auto *AccumulatorRecipe =
2755 assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2756 isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2757 "Unexpected operand order for partial reduction recipe");
2758 }
2759 ~VPPartialReductionRecipe() override = default;
2760
2762 return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
2763 getCondOp(), VFScaleFactor,
2765 }
2766
2767 VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2768
2769 /// Generate the reduction in the loop.
2770 void execute(VPTransformState &State) override;
2771
2772 /// Return the cost of this VPPartialReductionRecipe.
2774 VPCostContext &Ctx) const override;
2775
2776 /// Get the binary op's opcode.
2777 unsigned getOpcode() const { return Opcode; }
2778
2779 /// Get the factor that the VF of this recipe's output should be scaled by.
2780 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2781
2782#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2783 /// Print the recipe.
2784 void print(raw_ostream &O, const Twine &Indent,
2785 VPSlotTracker &SlotTracker) const override;
2786#endif
2787};
2788
2789/// A recipe to represent inloop reduction operations with vector-predication
2790/// intrinsics, performing a reduction on a vector operand with the explicit
2791/// vector length (EVL) into a scalar value, and adding the result to a chain.
2792/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2794public:
2796 DebugLoc DL = DebugLoc::getUnknown())
2798 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2799 R.getFastMathFlags(),
2800 cast_or_null<Instruction>(R.getUnderlyingValue()),
2801 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2802 R.isOrdered(), DL) {}
2803
2804 ~VPReductionEVLRecipe() override = default;
2805
2807 llvm_unreachable("cloning not implemented yet");
2808 }
2809
2810 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2811
2812 /// Generate the reduction in the loop
2813 void execute(VPTransformState &State) override;
2814
2815#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2816 /// Print the recipe.
2817 void print(raw_ostream &O, const Twine &Indent,
2818 VPSlotTracker &SlotTracker) const override;
2819#endif
2820
2821 /// The VPValue of the explicit vector length.
2822 VPValue *getEVL() const { return getOperand(2); }
2823
2824 /// Returns true if the recipe only uses the first lane of operand \p Op.
2825 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2826 assert(is_contained(operands(), Op) &&
2827 "Op must be an operand of the recipe");
2828 return Op == getEVL();
2829 }
2830};
2831
2832/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2833/// copies of the original scalar type, one per lane, instead of producing a
2834/// single copy of widened type for all lanes. If the instruction is known to be
2835/// a single scalar, only one copy, per lane zero, will be generated.
2837 public VPIRMetadata {
2838 /// Indicator if only a single replica per lane is needed.
2839 bool IsSingleScalar;
2840
2841 /// Indicator if the replicas are also predicated.
2842 bool IsPredicated;
2843
2844public:
2846 bool IsSingleScalar, VPValue *Mask = nullptr,
2848 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2849 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2850 IsPredicated(Mask) {
2851 if (Mask)
2852 addOperand(Mask);
2853 }
2854
2855 ~VPReplicateRecipe() override = default;
2856
2858 auto *Copy =
2859 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar,
2860 isPredicated() ? getMask() : nullptr, *this);
2861 Copy->transferFlags(*this);
2862 return Copy;
2863 }
2864
2865 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2866
2867 /// Generate replicas of the desired Ingredient. Replicas will be generated
2868 /// for all parts and lanes unless a specific part and lane are specified in
2869 /// the \p State.
2870 void execute(VPTransformState &State) override;
2871
2872 /// Return the cost of this VPReplicateRecipe.
2873 InstructionCost computeCost(ElementCount VF,
2874 VPCostContext &Ctx) const override;
2875
2876#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2877 /// Print the recipe.
2878 void print(raw_ostream &O, const Twine &Indent,
2879 VPSlotTracker &SlotTracker) const override;
2880#endif
2881
2882 bool isSingleScalar() const { return IsSingleScalar; }
2883
2884 bool isPredicated() const { return IsPredicated; }
2885
2886 /// Returns true if the recipe only uses the first lane of operand \p Op.
2887 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2888 assert(is_contained(operands(), Op) &&
2889 "Op must be an operand of the recipe");
2890 return isSingleScalar();
2891 }
2892
2893 /// Returns true if the recipe uses scalars of operand \p Op.
2894 bool usesScalars(const VPValue *Op) const override {
2895 assert(is_contained(operands(), Op) &&
2896 "Op must be an operand of the recipe");
2897 return true;
2898 }
2899
2900 /// Returns true if the recipe is used by a widened recipe via an intervening
2901 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2902 /// in a vector.
2903 bool shouldPack() const;
2904
2905 /// Return the mask of a predicated VPReplicateRecipe.
2907 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2908 return getOperand(getNumOperands() - 1);
2909 }
2910
2911 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2912};
2913
2914/// A recipe for generating conditional branches on the bits of a mask.
2916public:
2918 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
2919
2921 return new VPBranchOnMaskRecipe(getOperand(0), getDebugLoc());
2922 }
2923
2924 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2925
2926 /// Generate the extraction of the appropriate bit from the block mask and the
2927 /// conditional branch.
2928 void execute(VPTransformState &State) override;
2929
2930 /// Return the cost of this VPBranchOnMaskRecipe.
2931 InstructionCost computeCost(ElementCount VF,
2932 VPCostContext &Ctx) const override;
2933
2934#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2935 /// Print the recipe.
2936 void print(raw_ostream &O, const Twine &Indent,
2937 VPSlotTracker &SlotTracker) const override {
2938 O << Indent << "BRANCH-ON-MASK ";
2939 printOperands(O, SlotTracker);
2940 }
2941#endif
2942
2943 /// Returns true if the recipe uses scalars of operand \p Op.
2944 bool usesScalars(const VPValue *Op) const override {
2945 assert(is_contained(operands(), Op) &&
2946 "Op must be an operand of the recipe");
2947 return true;
2948 }
2949};
2950
2951/// A recipe to combine multiple recipes into a single 'expression' recipe,
2952/// which should be considered a single entity for cost-modeling and transforms.
2953/// The recipe needs to be 'decomposed', i.e. replaced by its individual
2954/// expression recipes, before execute. The individual expression recipes are
2955/// completely disconnected from the def-use graph of other recipes not part of
2956/// the expression. Def-use edges between pairs of expression recipes remain
2957/// intact, whereas every edge between an expression recipe and a recipe outside
2958/// the expression is elevated to connect the non-expression recipe with the
2959/// VPExpressionRecipe itself.
2961 /// Recipes included in this VPExpressionRecipe.
2962 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
2963
2964 /// Temporary VPValues used for external operands of the expression, i.e.
2965 /// operands not defined by recipes in the expression.
2966 SmallVector<VPValue *> LiveInPlaceholders;
2967
2968 enum class ExpressionTypes {
2969 /// Represents an inloop extended reduction operation, performing a
2970 /// reduction on an extended vector operand into a scalar value, and adding
2971 /// the result to a chain.
2972 ExtendedReduction,
2973 /// Represent an inloop multiply-accumulate reduction, multiplying the
2974 /// extended vector operands, performing a reduction.add on the result, and
2975 /// adding the scalar result to a chain.
2976 ExtMulAccReduction,
2977 /// Represent an inloop multiply-accumulate reduction, multiplying the
2978 /// vector operands, performing a reduction.add on the result, and adding
2979 /// the scalar result to a chain.
2980 MulAccReduction,
2981 };
2982
2983 /// Type of the expression.
2984 ExpressionTypes ExpressionType;
2985
2986 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
2987 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
2988 /// in the expression) are replaced by temporary VPValues and the original
2989 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
2990 /// as needed (excluding last) to ensure they are only used by other recipes
2991 /// in the expression.
2992 VPExpressionRecipe(ExpressionTypes ExpressionType,
2993 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
2994
2995public:
2997 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
2999 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3002 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3003 {Ext0, Ext1, Mul, Red}) {}
3004
3006 for (auto *R : reverse(ExpressionRecipes))
3007 delete R;
3008 for (VPValue *T : LiveInPlaceholders)
3009 delete T;
3010 }
3011
3012 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3013
3015 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3016 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3017 for (auto *R : ExpressionRecipes)
3018 NewExpressiondRecipes.push_back(R->clone());
3019 for (auto *New : NewExpressiondRecipes) {
3020 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3021 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3022 // Update placeholder operands in the cloned recipe to use the external
3023 // operands, to be internalized when the cloned expression is constructed.
3024 for (const auto &[Placeholder, OutsideOp] :
3025 zip(LiveInPlaceholders, operands()))
3026 New->replaceUsesOfWith(Placeholder, OutsideOp);
3027 }
3028 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3029 }
3030
3031 /// Return the VPValue to use to infer the result type of the recipe.
3033 unsigned OpIdx =
3034 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3035 : 1;
3036 return getOperand(getNumOperands() - OpIdx);
3037 }
3038
3039 /// Insert the recipes of the expression back into the VPlan, directly before
3040 /// the current recipe. Leaves the expression recipe empty, which must be
3041 /// removed before codegen.
3042 void decompose();
3043
3044 /// Method for generating code, must not be called as this recipe is abstract.
3045 void execute(VPTransformState &State) override {
3046 llvm_unreachable("recipe must be removed before execute");
3047 }
3048
3050 VPCostContext &Ctx) const override;
3051
3052#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3053 /// Print the recipe.
3054 void print(raw_ostream &O, const Twine &Indent,
3055 VPSlotTracker &SlotTracker) const override;
3056#endif
3057
3058 /// Returns true if this expression contains recipes that may read from or
3059 /// write to memory.
3060 bool mayReadOrWriteMemory() const;
3061
3062 /// Returns true if this expression contains recipes that may have side
3063 /// effects.
3064 bool mayHaveSideEffects() const;
3065};
3066
3067/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3068/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3069/// order to merge values that are set under such a branch and feed their uses.
3070/// The phi nodes can be scalar or vector depending on the users of the value.
3071/// This recipe works in concert with VPBranchOnMaskRecipe.
3073public:
3074 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3075 /// nodes after merging back from a Branch-on-Mask.
3077 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3078 ~VPPredInstPHIRecipe() override = default;
3079
3081 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3082 }
3083
3084 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3085
3086 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3087 /// retain SSA form.
3088 void execute(VPTransformState &State) override;
3089
3090 /// Return the cost of this VPPredInstPHIRecipe.
3092 VPCostContext &Ctx) const override {
3093 // TODO: Compute accurate cost after retiring the legacy cost model.
3094 return 0;
3095 }
3096
3097#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3098 /// Print the recipe.
3099 void print(raw_ostream &O, const Twine &Indent,
3100 VPSlotTracker &SlotTracker) const override;
3101#endif
3102
3103 /// Returns true if the recipe uses scalars of operand \p Op.
3104 bool usesScalars(const VPValue *Op) const override {
3105 assert(is_contained(operands(), Op) &&
3106 "Op must be an operand of the recipe");
3107 return true;
3108 }
3109};
3110
3111/// A common base class for widening memory operations. An optional mask can be
3112/// provided as the last operand.
3114 public VPIRMetadata {
3115protected:
3117
3118 /// Whether the accessed addresses are consecutive.
3120
3121 /// Whether the consecutive accessed addresses are in reverse order.
3123
3124 /// Whether the memory access is masked.
3125 bool IsMasked = false;
3126
3127 void setMask(VPValue *Mask) {
3128 assert(!IsMasked && "cannot re-set mask");
3129 if (!Mask)
3130 return;
3131 addOperand(Mask);
3132 IsMasked = true;
3133 }
3134
3135 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3136 std::initializer_list<VPValue *> Operands,
3137 bool Consecutive, bool Reverse,
3139 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3140 Consecutive(Consecutive), Reverse(Reverse) {
3141 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3142 }
3143
3144public:
3146 llvm_unreachable("cloning not supported");
3147 }
3148
3149 static inline bool classof(const VPRecipeBase *R) {
3150 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3151 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3152 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3153 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3154 }
3155
3156 static inline bool classof(const VPUser *U) {
3157 auto *R = dyn_cast<VPRecipeBase>(U);
3158 return R && classof(R);
3159 }
3160
3161 /// Return whether the loaded-from / stored-to addresses are consecutive.
3162 bool isConsecutive() const { return Consecutive; }
3163
3164 /// Return whether the consecutive loaded/stored addresses are in reverse
3165 /// order.
3166 bool isReverse() const { return Reverse; }
3167
3168 /// Return the address accessed by this recipe.
3169 VPValue *getAddr() const { return getOperand(0); }
3170
3171 /// Returns true if the recipe is masked.
3172 bool isMasked() const { return IsMasked; }
3173
3174 /// Return the mask used by this recipe. Note that a full mask is represented
3175 /// by a nullptr.
3176 VPValue *getMask() const {
3177 // Mask is optional and therefore the last operand.
3178 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3179 }
3180
3181 /// Generate the wide load/store.
3182 void execute(VPTransformState &State) override {
3183 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3184 }
3185
3186 /// Return the cost of this VPWidenMemoryRecipe.
3187 InstructionCost computeCost(ElementCount VF,
3188 VPCostContext &Ctx) const override;
3189
3190 Instruction &getIngredient() const { return Ingredient; }
3191};
3192
3193/// A recipe for widening load operations, using the address to load from and an
3194/// optional mask.
3196 public VPValue {
3198 bool Consecutive, bool Reverse,
3200 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3201 Reverse, Metadata, DL),
3202 VPValue(this, &Load) {
3203 setMask(Mask);
3204 }
3205
3207 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
3208 getMask(), Consecutive, Reverse, *this,
3209 getDebugLoc());
3210 }
3211
3212 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3213
3214 /// Generate a wide load or gather.
3215 void execute(VPTransformState &State) override;
3216
3217#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3218 /// Print the recipe.
3219 void print(raw_ostream &O, const Twine &Indent,
3220 VPSlotTracker &SlotTracker) const override;
3221#endif
3222
3223 /// Returns true if the recipe only uses the first lane of operand \p Op.
3224 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3225 assert(is_contained(operands(), Op) &&
3226 "Op must be an operand of the recipe");
3227 // Widened, consecutive loads operations only demand the first lane of
3228 // their address.
3229 return Op == getAddr() && isConsecutive();
3230 }
3231};
3232
3233/// A recipe for widening load operations with vector-predication intrinsics,
3234/// using the address to load from, the explicit vector length and an optional
3235/// mask.
3236struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3238 VPValue *Mask)
3239 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3240 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3241 L.getDebugLoc()),
3242 VPValue(this, &getIngredient()) {
3243 setMask(Mask);
3244 }
3245
3246 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3247
3248 /// Return the EVL operand.
3249 VPValue *getEVL() const { return getOperand(1); }
3250
3251 /// Generate the wide load or gather.
3252 void execute(VPTransformState &State) override;
3253
3254 /// Return the cost of this VPWidenLoadEVLRecipe.
3256 VPCostContext &Ctx) const override;
3257
3258#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3259 /// Print the recipe.
3260 void print(raw_ostream &O, const Twine &Indent,
3261 VPSlotTracker &SlotTracker) const override;
3262#endif
3263
3264 /// Returns true if the recipe only uses the first lane of operand \p Op.
3265 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3267 "Op must be an operand of the recipe");
3268 // Widened loads only demand the first lane of EVL and consecutive loads
3269 // only demand the first lane of their address.
3270 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3271 }
3272};
3273
3274/// A recipe for widening store operations, using the stored value, the address
3275/// to store to and an optional mask.
3278 VPValue *Mask, bool Consecutive, bool Reverse,
3280 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3281 Consecutive, Reverse, Metadata, DL) {
3282 setMask(Mask);
3283 }
3284
3286 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
3287 getStoredValue(), getMask(), Consecutive,
3288 Reverse, *this, getDebugLoc());
3289 }
3290
3291 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3292
3293 /// Return the value stored by this recipe.
3294 VPValue *getStoredValue() const { return getOperand(1); }
3295
3296 /// Generate a wide store or scatter.
3297 void execute(VPTransformState &State) override;
3298
3299#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3300 /// Print the recipe.
3301 void print(raw_ostream &O, const Twine &Indent,
3302 VPSlotTracker &SlotTracker) const override;
3303#endif
3304
3305 /// Returns true if the recipe only uses the first lane of operand \p Op.
3306 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3307 assert(is_contained(operands(), Op) &&
3308 "Op must be an operand of the recipe");
3309 // Widened, consecutive stores only demand the first lane of their address,
3310 // unless the same operand is also stored.
3311 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3312 }
3313};
3314
3315/// A recipe for widening store operations with vector-predication intrinsics,
3316/// using the value to store, the address to store to, the explicit vector
3317/// length and an optional mask.
3320 VPValue *Mask)
3321 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3322 {Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
3323 S.isReverse(), S, S.getDebugLoc()) {
3324 setMask(Mask);
3325 }
3326
3327 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3328
3329 /// Return the address accessed by this recipe.
3330 VPValue *getStoredValue() const { return getOperand(1); }
3331
3332 /// Return the EVL operand.
3333 VPValue *getEVL() const { return getOperand(2); }
3334
3335 /// Generate the wide store or scatter.
3336 void execute(VPTransformState &State) override;
3337
3338 /// Return the cost of this VPWidenStoreEVLRecipe.
3340 VPCostContext &Ctx) const override;
3341
3342#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3343 /// Print the recipe.
3344 void print(raw_ostream &O, const Twine &Indent,
3345 VPSlotTracker &SlotTracker) const override;
3346#endif
3347
3348 /// Returns true if the recipe only uses the first lane of operand \p Op.
3349 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3351 "Op must be an operand of the recipe");
3352 if (Op == getEVL()) {
3353 assert(getStoredValue() != Op && "unexpected store of EVL");
3354 return true;
3355 }
3356 // Widened, consecutive memory operations only demand the first lane of
3357 // their address, unless the same operand is also stored. That latter can
3358 // happen with opaque pointers.
3359 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3360 }
3361};
3362
3363/// Recipe to expand a SCEV expression.
3365 const SCEV *Expr;
3366
3367public:
3369 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3370
3371 ~VPExpandSCEVRecipe() override = default;
3372
3373 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3374
3375 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3376
3377 void execute(VPTransformState &State) override {
3378 llvm_unreachable("SCEV expressions must be expanded before final execute");
3379 }
3380
3381 /// Return the cost of this VPExpandSCEVRecipe.
3383 VPCostContext &Ctx) const override {
3384 // TODO: Compute accurate cost after retiring the legacy cost model.
3385 return 0;
3386 }
3387
3388#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3389 /// Print the recipe.
3390 void print(raw_ostream &O, const Twine &Indent,
3391 VPSlotTracker &SlotTracker) const override;
3392#endif
3393
3394 const SCEV *getSCEV() const { return Expr; }
3395};
3396
3397/// Canonical scalar induction phi of the vector loop. Starting at the specified
3398/// start value (either 0 or the resume value when vectorizing the epilogue
3399/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3400/// canonical induction variable.
3402public:
3404 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3405
3406 ~VPCanonicalIVPHIRecipe() override = default;
3407
3409 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3410 R->addOperand(getBackedgeValue());
3411 return R;
3412 }
3413
3414 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3415
3416 void execute(VPTransformState &State) override {
3417 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3418 "scalar phi recipe");
3419 }
3420
3421#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3422 /// Print the recipe.
3423 void print(raw_ostream &O, const Twine &Indent,
3424 VPSlotTracker &SlotTracker) const override;
3425#endif
3426
3427 /// Returns the scalar type of the induction.
3429 return getStartValue()->getLiveInIRValue()->getType();
3430 }
3431
3432 /// Returns true if the recipe only uses the first lane of operand \p Op.
3433 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3435 "Op must be an operand of the recipe");
3436 return true;
3437 }
3438
3439 /// Returns true if the recipe only uses the first part of operand \p Op.
3440 bool onlyFirstPartUsed(const VPValue *Op) const override {
3442 "Op must be an operand of the recipe");
3443 return true;
3444 }
3445
3446 /// Return the cost of this VPCanonicalIVPHIRecipe.
3448 VPCostContext &Ctx) const override {
3449 // For now, match the behavior of the legacy cost model.
3450 return 0;
3451 }
3452};
3453
3454/// A recipe for generating the active lane mask for the vector loop that is
3455/// used to predicate the vector operations.
3456/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3457/// remove VPActiveLaneMaskPHIRecipe.
3459public:
3461 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3462 DL) {}
3463
3464 ~VPActiveLaneMaskPHIRecipe() override = default;
3465
3468 if (getNumOperands() == 2)
3469 R->addOperand(getOperand(1));
3470 return R;
3471 }
3472
3473 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3474
3475 /// Generate the active lane mask phi of the vector loop.
3476 void execute(VPTransformState &State) override;
3477
3478#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3479 /// Print the recipe.
3480 void print(raw_ostream &O, const Twine &Indent,
3481 VPSlotTracker &SlotTracker) const override;
3482#endif
3483};
3484
3485/// A recipe for generating the phi node for the current index of elements,
3486/// adjusted in accordance with EVL value. It starts at the start value of the
3487/// canonical induction and gets incremented by EVL in each iteration of the
3488/// vector loop.
3490public:
3492 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3493
3494 ~VPEVLBasedIVPHIRecipe() override = default;
3495
3497 llvm_unreachable("cloning not implemented yet");
3498 }
3499
3500 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3501
3502 void execute(VPTransformState &State) override {
3503 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3504 "scalar phi recipe");
3505 }
3506
3507 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3509 VPCostContext &Ctx) const override {
3510 // For now, match the behavior of the legacy cost model.
3511 return 0;
3512 }
3513
3514 /// Returns true if the recipe only uses the first lane of operand \p Op.
3515 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3517 "Op must be an operand of the recipe");
3518 return true;
3519 }
3520
3521#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3522 /// Print the recipe.
3523 void print(raw_ostream &O, const Twine &Indent,
3524 VPSlotTracker &SlotTracker) const override;
3525#endif
3526};
3527
3528/// A Recipe for widening the canonical induction variable of the vector loop.
3530 public VPUnrollPartAccessor<1> {
3531public:
3533 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3534
3535 ~VPWidenCanonicalIVRecipe() override = default;
3536
3538 return new VPWidenCanonicalIVRecipe(
3539 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
3540 }
3541
3542 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3543
3544 /// Generate a canonical vector induction variable of the vector loop, with
3545 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3546 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3547 void execute(VPTransformState &State) override;
3548
3549 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3551 VPCostContext &Ctx) const override {
3552 // TODO: Compute accurate cost after retiring the legacy cost model.
3553 return 0;
3554 }
3555
3556#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3557 /// Print the recipe.
3558 void print(raw_ostream &O, const Twine &Indent,
3559 VPSlotTracker &SlotTracker) const override;
3560#endif
3561};
3562
3563/// A recipe for converting the input value \p IV value to the corresponding
3564/// value of an IV with different start and step values, using Start + IV *
3565/// Step.
3567 /// Kind of the induction.
3569 /// If not nullptr, the floating point induction binary operator. Must be set
3570 /// for floating point inductions.
3571 const FPMathOperator *FPBinOp;
3572
3573 /// Name to use for the generated IR instruction for the derived IV.
3574 std::string Name;
3575
3576public:
3578 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3579 const Twine &Name = "")
3581 IndDesc.getKind(),
3582 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3583 Start, CanonicalIV, Step, Name) {}
3584
3586 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3587 VPValue *Step, const Twine &Name = "")
3588 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3589 FPBinOp(FPBinOp), Name(Name.str()) {}
3590
3591 ~VPDerivedIVRecipe() override = default;
3592
3594 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3595 getStepValue());
3596 }
3597
3598 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3599
3600 /// Generate the transformed value of the induction at offset StartValue (1.
3601 /// operand) + IV (2. operand) * StepValue (3, operand).
3602 void execute(VPTransformState &State) override;
3603
3604 /// Return the cost of this VPDerivedIVRecipe.
3606 VPCostContext &Ctx) const override {
3607 // TODO: Compute accurate cost after retiring the legacy cost model.
3608 return 0;
3609 }
3610
3611#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3612 /// Print the recipe.
3613 void print(raw_ostream &O, const Twine &Indent,
3614 VPSlotTracker &SlotTracker) const override;
3615#endif
3616
3618 return getStartValue()->getLiveInIRValue()->getType();
3619 }
3620
3621 VPValue *getStartValue() const { return getOperand(0); }
3622 VPValue *getStepValue() const { return getOperand(2); }
3623
3624 /// Returns true if the recipe only uses the first lane of operand \p Op.
3625 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3627 "Op must be an operand of the recipe");
3628 return true;
3629 }
3630};
3631
3632/// A recipe for handling phi nodes of integer and floating-point inductions,
3633/// producing their scalar values.
3635 public VPUnrollPartAccessor<3> {
3636 Instruction::BinaryOps InductionOpcode;
3637
3638public:
3641 DebugLoc DL)
3642 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3643 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3644 InductionOpcode(Opcode) {}
3645
3647 VPValue *Step, VPValue *VF,
3648 DebugLoc DL = DebugLoc::getUnknown())
3650 IV, Step, VF, IndDesc.getInductionOpcode(),
3651 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3652 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3653 : FastMathFlags(),
3654 DL) {}
3655
3656 ~VPScalarIVStepsRecipe() override = default;
3657
3659 return new VPScalarIVStepsRecipe(
3660 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3661 hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(),
3662 getDebugLoc());
3663 }
3664
3665 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3666 /// this is only accurate after the VPlan has been unrolled.
3667 bool isPart0() const { return getUnrollPart(*this) == 0; }
3668
3669 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3670
3671 /// Generate the scalarized versions of the phi node as needed by their users.
3672 void execute(VPTransformState &State) override;
3673
3674 /// Return the cost of this VPScalarIVStepsRecipe.
3676 VPCostContext &Ctx) const override {
3677 // TODO: Compute accurate cost after retiring the legacy cost model.
3678 return 0;
3679 }
3680
3681#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3682 /// Print the recipe.
3683 void print(raw_ostream &O, const Twine &Indent,
3684 VPSlotTracker &SlotTracker) const override;
3685#endif
3686
3687 VPValue *getStepValue() const { return getOperand(1); }
3688
3689 /// Returns true if the recipe only uses the first lane of operand \p Op.
3690 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3691 assert(is_contained(operands(), Op) &&
3692 "Op must be an operand of the recipe");
3693 return true;
3694 }
3695};
3696
3697/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3698/// types implementing VPPhiAccessors. Used by isa<> & co.
3700 static inline bool isPossible(const VPRecipeBase *f) {
3701 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3702 return isa<VPIRPhi, VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPhi>(f);
3703 }
3704};
3705/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3706/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3707template <typename SrcTy>
3708struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3709
3711
3712 /// doCast is used by cast<>.
3713 static inline VPPhiAccessors *doCast(SrcTy R) {
3714 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3715 switch (R->getVPDefID()) {
3716 case VPDef::VPInstructionSC:
3717 return cast<VPPhi>(R);
3718 case VPDef::VPIRInstructionSC:
3719 return cast<VPIRPhi>(R);
3720 case VPDef::VPWidenPHISC:
3721 return cast<VPWidenPHIRecipe>(R);
3722 default:
3723 return cast<VPHeaderPHIRecipe>(R);
3724 }
3725 }());
3726 }
3727
3728 /// doCastIfPossible is used by dyn_cast<>.
3729 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3730 if (!Self::isPossible(f))
3731 return nullptr;
3732 return doCast(f);
3733 }
3734};
3735template <>
3738template <>
3741
3742/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3743/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3744/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3746 friend class VPlan;
3747
3748 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3749 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3750 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3751 if (Recipe)
3752 appendRecipe(Recipe);
3753 }
3754
3755public:
3757
3758protected:
3759 /// The VPRecipes held in the order of output instructions to generate.
3761
3762 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3763 : VPBlockBase(BlockSC, Name.str()) {}
3764
3765public:
3766 ~VPBasicBlock() override {
3767 while (!Recipes.empty())
3768 Recipes.pop_back();
3769 }
3770
3771 /// Instruction iterators...
3776
3777 //===--------------------------------------------------------------------===//
3778 /// Recipe iterator methods
3779 ///
3780 inline iterator begin() { return Recipes.begin(); }
3781 inline const_iterator begin() const { return Recipes.begin(); }
3782 inline iterator end() { return Recipes.end(); }
3783 inline const_iterator end() const { return Recipes.end(); }
3784
3785 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3786 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3787 inline reverse_iterator rend() { return Recipes.rend(); }
3788 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3789
3790 inline size_t size() const { return Recipes.size(); }
3791 inline bool empty() const { return Recipes.empty(); }
3792 inline const VPRecipeBase &front() const { return Recipes.front(); }
3793 inline VPRecipeBase &front() { return Recipes.front(); }
3794 inline const VPRecipeBase &back() const { return Recipes.back(); }
3795 inline VPRecipeBase &back() { return Recipes.back(); }
3796
3797 /// Returns a reference to the list of recipes.
3798 RecipeListTy &getRecipeList() { return Recipes; }
3799
3800 /// Returns a pointer to a member of the recipe list.
3802 return &VPBasicBlock::Recipes;
3803 }
3804
3805 /// Method to support type inquiry through isa, cast, and dyn_cast.
3806 static inline bool classof(const VPBlockBase *V) {
3807 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3808 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3809 }
3810
3811 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3812 assert(Recipe && "No recipe to append.");
3813 assert(!Recipe->Parent && "Recipe already in VPlan");
3814 Recipe->Parent = this;
3815 Recipes.insert(InsertPt, Recipe);
3816 }
3817
3818 /// Augment the existing recipes of a VPBasicBlock with an additional
3819 /// \p Recipe as the last recipe.
3820 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3821
3822 /// The method which generates the output IR instructions that correspond to
3823 /// this VPBasicBlock, thereby "executing" the VPlan.
3824 void execute(VPTransformState *State) override;
3825
3826 /// Return the cost of this VPBasicBlock.
3827 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
3828
3829 /// Return the position of the first non-phi node recipe in the block.
3830 iterator getFirstNonPhi();
3831
3832 /// Returns an iterator range over the PHI-like recipes in the block.
3834 return make_range(begin(), getFirstNonPhi());
3835 }
3836
3837 /// Split current block at \p SplitAt by inserting a new block between the
3838 /// current block and its successors and moving all recipes starting at
3839 /// SplitAt to the new block. Returns the new block.
3840 VPBasicBlock *splitAt(iterator SplitAt);
3841
3842 VPRegionBlock *getEnclosingLoopRegion();
3843 const VPRegionBlock *getEnclosingLoopRegion() const;
3844
3845#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3846 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3847 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3848 ///
3849 /// Note that the numbering is applied to the whole VPlan, so printing
3850 /// individual blocks is consistent with the whole VPlan printing.
3851 void print(raw_ostream &O, const Twine &Indent,
3852 VPSlotTracker &SlotTracker) const override;
3853 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3854#endif
3855
3856 /// If the block has multiple successors, return the branch recipe terminating
3857 /// the block. If there are no or only a single successor, return nullptr;
3858 VPRecipeBase *getTerminator();
3859 const VPRecipeBase *getTerminator() const;
3860
3861 /// Returns true if the block is exiting it's parent region.
3862 bool isExiting() const;
3863
3864 /// Clone the current block and it's recipes, without updating the operands of
3865 /// the cloned recipes.
3866 VPBasicBlock *clone() override;
3867
3868 /// Returns the predecessor block at index \p Idx with the predecessors as per
3869 /// the corresponding plain CFG. If the block is an entry block to a region,
3870 /// the first predecessor is the single predecessor of a region, and the
3871 /// second predecessor is the exiting block of the region.
3872 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
3873
3874protected:
3875 /// Execute the recipes in the IR basic block \p BB.
3876 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3877
3878 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3879 /// generated for this VPBB.
3880 void connectToPredecessors(VPTransformState &State);
3881
3882private:
3883 /// Create an IR BasicBlock to hold the output instructions generated by this
3884 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3885 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
3886};
3887
3888inline const VPBasicBlock *
3891}
3892
3893/// A special type of VPBasicBlock that wraps an existing IR basic block.
3894/// Recipes of the block get added before the first non-phi instruction in the
3895/// wrapped block.
3896/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3897/// preheader block.
3899 friend class VPlan;
3900
3901 BasicBlock *IRBB;
3902
3903 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
3905 : VPBasicBlock(VPIRBasicBlockSC,
3906 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3907 IRBB(IRBB) {}
3908
3909public:
3910 ~VPIRBasicBlock() override {}
3911
3912 static inline bool classof(const VPBlockBase *V) {
3913 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3914 }
3915
3916 /// The method which generates the output IR instructions that correspond to
3917 /// this VPBasicBlock, thereby "executing" the VPlan.
3918 void execute(VPTransformState *State) override;
3919
3920 VPIRBasicBlock *clone() override;
3921
3922 BasicBlock *getIRBasicBlock() const { return IRBB; }
3923};
3924
3925/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3926/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3927/// A VPRegionBlock may indicate that its contents are to be replicated several
3928/// times. This is designed to support predicated scalarization, in which a
3929/// scalar if-then code structure needs to be generated VF * UF times. Having
3930/// this replication indicator helps to keep a single model for multiple
3931/// candidate VF's. The actual replication takes place only once the desired VF
3932/// and UF have been determined.
3934 friend class VPlan;
3935
3936 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3937 VPBlockBase *Entry;
3938
3939 /// Hold the Single Exiting block of the SESE region modelled by the
3940 /// VPRegionBlock.
3941 VPBlockBase *Exiting;
3942
3943 /// An indicator whether this region is to generate multiple replicated
3944 /// instances of output IR corresponding to its VPBlockBases.
3945 bool IsReplicator;
3946
3947 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
3948 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
3949 const std::string &Name = "", bool IsReplicator = false)
3950 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3951 IsReplicator(IsReplicator) {
3952 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3953 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3954 Entry->setParent(this);
3955 Exiting->setParent(this);
3956 }
3957 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3958 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3959 IsReplicator(IsReplicator) {}
3960
3961public:
3962 ~VPRegionBlock() override {}
3963
3964 /// Method to support type inquiry through isa, cast, and dyn_cast.
3965 static inline bool classof(const VPBlockBase *V) {
3966 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3967 }
3968
3969 const VPBlockBase *getEntry() const { return Entry; }
3970 VPBlockBase *getEntry() { return Entry; }
3971
3972 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3973 /// EntryBlock must have no predecessors.
3974 void setEntry(VPBlockBase *EntryBlock) {
3975 assert(EntryBlock->getPredecessors().empty() &&
3976 "Entry block cannot have predecessors.");
3977 Entry = EntryBlock;
3978 EntryBlock->setParent(this);
3979 }
3980
3981 const VPBlockBase *getExiting() const { return Exiting; }
3982 VPBlockBase *getExiting() { return Exiting; }
3983
3984 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3985 /// ExitingBlock must have no successors.
3986 void setExiting(VPBlockBase *ExitingBlock) {
3987 assert(ExitingBlock->getSuccessors().empty() &&
3988 "Exit block cannot have successors.");
3989 Exiting = ExitingBlock;
3990 ExitingBlock->setParent(this);
3991 }
3992
3993 /// Returns the pre-header VPBasicBlock of the loop region.
3995 assert(!isReplicator() && "should only get pre-header of loop regions");
3996 return getSinglePredecessor()->getExitingBasicBlock();
3997 }
3998
3999 /// An indicator whether this region is to generate multiple replicated
4000 /// instances of output IR corresponding to its VPBlockBases.
4001 bool isReplicator() const { return IsReplicator; }
4002
4003 /// The method which generates the output IR instructions that correspond to
4004 /// this VPRegionBlock, thereby "executing" the VPlan.
4005 void execute(VPTransformState *State) override;
4006
4007 // Return the cost of this region.
4008 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4009
4010#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4011 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4012 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4013 /// consequtive numbers.
4014 ///
4015 /// Note that the numbering is applied to the whole VPlan, so printing
4016 /// individual regions is consistent with the whole VPlan printing.
4017 void print(raw_ostream &O, const Twine &Indent,
4018 VPSlotTracker &SlotTracker) const override;
4019 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4020#endif
4021
4022 /// Clone all blocks in the single-entry single-exit region of the block and
4023 /// their recipes without updating the operands of the cloned recipes.
4024 VPRegionBlock *clone() override;
4025
4026 /// Remove the current region from its VPlan, connecting its predecessor to
4027 /// its entry, and its exiting block to its successor.
4028 void dissolveToCFGLoop();
4029};
4030
4031/// VPlan models a candidate for vectorization, encoding various decisions take
4032/// to produce efficient output IR, including which branches, basic-blocks and
4033/// output IR instructions to generate, and their cost. VPlan holds a
4034/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4035/// VPBasicBlock.
4036class VPlan {
4037 friend class VPlanPrinter;
4038 friend class VPSlotTracker;
4039
4040 /// VPBasicBlock corresponding to the original preheader. Used to place
4041 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4042 /// rest of VPlan execution.
4043 /// When this VPlan is used for the epilogue vector loop, the entry will be
4044 /// replaced by a new entry block created during skeleton creation.
4045 VPBasicBlock *Entry;
4046
4047 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4048 VPIRBasicBlock *ScalarHeader;
4049
4050 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4051 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4052 /// e.g. if the scalar epilogue always executes.
4054
4055 /// Holds the VFs applicable to this VPlan.
4057
4058 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4059 /// any UF.
4061
4062 /// Holds the name of the VPlan, for printing.
4063 std::string Name;
4064
4065 /// Represents the trip count of the original loop, for folding
4066 /// the tail.
4067 VPValue *TripCount = nullptr;
4068
4069 /// Represents the backedge taken count of the original loop, for folding
4070 /// the tail. It equals TripCount - 1.
4071 VPValue *BackedgeTakenCount = nullptr;
4072
4073 /// Represents the vector trip count.
4074 VPValue VectorTripCount;
4075
4076 /// Represents the vectorization factor of the loop.
4077 VPValue VF;
4078
4079 /// Represents the loop-invariant VF * UF of the vector loop region.
4080 VPValue VFxUF;
4081
4082 /// Holds a mapping between Values and their corresponding VPValue inside
4083 /// VPlan.
4084 Value2VPValueTy Value2VPValue;
4085
4086 /// Contains all the external definitions created for this VPlan. External
4087 /// definitions are VPValues that hold a pointer to their underlying IR.
4089
4090 /// Mapping from SCEVs to the VPValues representing their expansions.
4091 /// NOTE: This mapping is temporary and will be removed once all users have
4092 /// been modeled in VPlan directly.
4093 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
4094
4095 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4096 /// VPlan is destroyed.
4097 SmallVector<VPBlockBase *> CreatedBlocks;
4098
4099 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4100 /// wrapping the original header of the scalar loop.
4101 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4102 : Entry(Entry), ScalarHeader(ScalarHeader) {
4103 Entry->setPlan(this);
4104 assert(ScalarHeader->getNumSuccessors() == 0 &&
4105 "scalar header must be a leaf node");
4106 }
4107
4108public:
4109 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4110 /// original preheader and scalar header of \p L, to be used as entry and
4111 /// scalar header blocks of the new VPlan.
4112 VPlan(Loop *L);
4113
4114 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4115 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4116 VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC) {
4117 setEntry(createVPBasicBlock("preheader"));
4118 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4119 TripCount = TC;
4120 }
4121
4123
4125 Entry = VPBB;
4126 VPBB->setPlan(this);
4127 }
4128
4129 /// Generate the IR code for this VPlan.
4130 void execute(VPTransformState *State);
4131
4132 /// Return the cost of this plan.
4134
4135 VPBasicBlock *getEntry() { return Entry; }
4136 const VPBasicBlock *getEntry() const { return Entry; }
4137
4138 /// Returns the preheader of the vector loop region, if one exists, or null
4139 /// otherwise.
4141 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4142 return VectorRegion
4143 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4144 : nullptr;
4145 }
4146
4147 /// Returns the VPRegionBlock of the vector loop.
4150
4151 /// Returns the 'middle' block of the plan, that is the block that selects
4152 /// whether to execute the scalar tail loop or the exit block from the loop
4153 /// latch. If there is an early exit from the vector loop, the middle block
4154 /// conceptully has the early exit block as third successor, split accross 2
4155 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4156 /// tail loop or the exit bock. If the scalar tail loop or exit block are
4157 /// known to always execute, the middle block may branch directly to that
4158 /// block. This function cannot be called once the vector loop region has been
4159 /// removed.
4161 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4162 assert(
4163 LoopRegion &&
4164 "cannot call the function after vector loop region has been removed");
4165 auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor());
4166 if (RegionSucc->getSingleSuccessor() ||
4167 is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
4168 return RegionSucc;
4169 // There is an early exit. The successor of RegionSucc is the middle block.
4170 return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
4171 }
4172
4174 return const_cast<VPlan *>(this)->getMiddleBlock();
4175 }
4176
4177 /// Return the VPBasicBlock for the preheader of the scalar loop.
4179 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4180 }
4181
4182 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4183 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4184
4185 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4186 /// the original scalar loop.
4187 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4188
4189 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4190 /// exit block.
4192
4193 /// Returns true if \p VPBB is an exit block.
4194 bool isExitBlock(VPBlockBase *VPBB);
4195
4196 /// The trip count of the original loop.
4198 assert(TripCount && "trip count needs to be set before accessing it");
4199 return TripCount;
4200 }
4201
4202 /// Set the trip count assuming it is currently null; if it is not - use
4203 /// resetTripCount().
4204 void setTripCount(VPValue *NewTripCount) {
4205 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4206 TripCount = NewTripCount;
4207 }
4208
4209 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4210 /// the original trip count have been replaced.
4211 void resetTripCount(VPValue *NewTripCount) {
4212 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4213 "TripCount must be set when resetting");
4214 TripCount = NewTripCount;
4215 }
4216
4217 /// The backedge taken count of the original loop.
4219 if (!BackedgeTakenCount)
4220 BackedgeTakenCount = new VPValue();
4221 return BackedgeTakenCount;
4222 }
4223
4224 /// The vector trip count.
4225 VPValue &getVectorTripCount() { return VectorTripCount; }
4226
4227 /// Returns the VF of the vector loop region.
4228 VPValue &getVF() { return VF; };
4229
4230 /// Returns VF * UF of the vector loop region.
4231 VPValue &getVFxUF() { return VFxUF; }
4232
4235 }
4236
4237 void addVF(ElementCount VF) { VFs.insert(VF); }
4238
4240 assert(hasVF(VF) && "Cannot set VF not already in plan");
4241 VFs.clear();
4242 VFs.insert(VF);
4243 }
4244
4245 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4246 bool hasScalableVF() const {
4247 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4248 }
4249
4250 /// Returns an iterator range over all VFs of the plan.
4253 return VFs;
4254 }
4255
4256 bool hasScalarVFOnly() const {
4257 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4258 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4259 "Plan with scalar VF should only have a single VF");
4260 return HasScalarVFOnly;
4261 }
4262
4263 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4264
4265 unsigned getUF() const {
4266 assert(UFs.size() == 1 && "Expected a single UF");
4267 return UFs[0];
4268 }
4269
4270 void setUF(unsigned UF) {
4271 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4272 UFs.clear();
4273 UFs.insert(UF);
4274 }
4275
4276 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4277 /// concrete UF.
4278 bool isUnrolled() const { return UFs.size() == 1; }
4279
4280 /// Return a string with the name of the plan and the applicable VFs and UFs.
4281 std::string getName() const;
4282
4283 void setName(const Twine &newName) { Name = newName.str(); }
4284
4285 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4286 /// yet) for \p V.
4288 assert(V && "Trying to get or add the VPValue of a null Value");
4289 auto [It, Inserted] = Value2VPValue.try_emplace(V);
4290 if (Inserted) {
4291 VPValue *VPV = new VPValue(V);
4292 VPLiveIns.push_back(VPV);
4293 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4294 It->second = VPV;
4295 }
4296
4297 assert(It->second->isLiveIn() && "Only live-ins should be in mapping");
4298 return It->second;
4299 }
4300
4301 /// Return a VPValue wrapping i1 true.
4303 LLVMContext &Ctx = getContext();
4305 }
4306
4307 /// Return a VPValue wrapping i1 false.
4309 LLVMContext &Ctx = getContext();
4311 }
4312
4313 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4314 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
4315
4316 /// Return the list of live-in VPValues available in the VPlan.
4318 assert(all_of(Value2VPValue,
4319 [this](const auto &P) {
4320 return is_contained(VPLiveIns, P.second);
4321 }) &&
4322 "all VPValues in Value2VPValue must also be in VPLiveIns");
4323 return VPLiveIns;
4324 }
4325
4326#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4327 /// Print the live-ins of this VPlan to \p O.
4328 void printLiveIns(raw_ostream &O) const;
4329
4330 /// Print this VPlan to \p O.
4331 void print(raw_ostream &O) const;
4332
4333 /// Print this VPlan in DOT format to \p O.
4334 void printDOT(raw_ostream &O) const;
4335
4336 /// Dump the plan to stderr (for debugging).
4337 LLVM_DUMP_METHOD void dump() const;
4338#endif
4339
4340 /// Returns the canonical induction recipe of the vector loop.
4343 if (EntryVPBB->empty()) {
4344 // VPlan native path.
4345 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4346 }
4347 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4348 }
4349
4350 VPValue *getSCEVExpansion(const SCEV *S) const {
4351 return SCEVToExpansion.lookup(S);
4352 }
4353
4354 void addSCEVExpansion(const SCEV *S, VPValue *V) {
4355 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
4356 SCEVToExpansion[S] = V;
4357 }
4358
4359 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4360 /// recipes to refer to the clones, and return it.
4361 VPlan *duplicate();
4362
4363 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4364 /// present. The returned block is owned by the VPlan and deleted once the
4365 /// VPlan is destroyed.
4367 VPRecipeBase *Recipe = nullptr) {
4368 auto *VPB = new VPBasicBlock(Name, Recipe);
4369 CreatedBlocks.push_back(VPB);
4370 return VPB;
4371 }
4372
4373 /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p
4374 /// IsReplicator is true, the region is a replicate region. The returned block
4375 /// is owned by the VPlan and deleted once the VPlan is destroyed.
4377 const std::string &Name = "",
4378 bool IsReplicator = false) {
4379 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator);
4380 CreatedBlocks.push_back(VPB);
4381 return VPB;
4382 }
4383
4384 /// Create a new loop VPRegionBlock with \p Name and entry and exiting blocks set
4385 /// to nullptr. The returned block is owned by the VPlan and deleted once the
4386 /// VPlan is destroyed.
4387 VPRegionBlock *createVPRegionBlock(const std::string &Name = "") {
4388 auto *VPB = new VPRegionBlock(Name);
4389 CreatedBlocks.push_back(VPB);
4390 return VPB;
4391 }
4392
4393 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4394 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4395 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4397
4398 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4399 /// instructions in \p IRBB, except its terminator which is managed by the
4400 /// successors of the block in VPlan. The returned block is owned by the VPlan
4401 /// and deleted once the VPlan is destroyed.
4403
4404 /// Returns true if the VPlan is based on a loop with an early exit. That is
4405 /// the case if the VPlan has either more than one exit block or a single exit
4406 /// block with multiple predecessors (one for the exit via the latch and one
4407 /// via the other early exit).
4408 bool hasEarlyExit() const {
4409 return count_if(ExitBlocks,
4410 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4411 1 ||
4412 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4413 }
4414
4415 /// Returns true if the scalar tail may execute after the vector loop. Note
4416 /// that this relies on unneeded branches to the scalar tail loop being
4417 /// removed.
4418 bool hasScalarTail() const {
4419 return !(!getScalarPreheader()->hasPredecessors() ||
4421 }
4422};
4423
4424#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4426 Plan.print(OS);
4427 return OS;
4428}
4429#endif
4430
4431} // end namespace llvm
4432
4433#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ShadowStackGC > C("shadow-stack", "Very portable GC for uncooperative code generators")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:410
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:638
#define LLVM_ABI_FOR_TEST
Definition: Compiler.h:218
dxil translate DXIL Translate Metadata
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1328
Hexagon Common GEP
iv users
Definition: IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
const SmallVectorImpl< MachineOperand > & Cond
static bool mayHaveSideEffects(MachineInstr &MI)
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:499
static const uint32_t IV[8]
Definition: blake3_impl.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:131
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:448
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:612
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:23
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:868
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:875
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
static DebugLoc getUnknown()
Definition: DebugLoc.h:162
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:187
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:229
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:156
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:312
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:949
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:524
uint32_t getFactor() const
Definition: VectorUtils.h:540
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:594
bool isReverse() const
Definition: VectorUtils.h:539
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
An instruction for reading from memory.
Definition: Instructions.h:180
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
Metadata node.
Definition: Metadata.h:1077
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
Definition: ModRef.h:221
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
Definition: ModRef.h:218
Root of the metadata hierarchy.
Definition: Metadata.h:63
This class represents an analyzed expression in the program.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:104
void clear()
Completely clear the SetVector.
Definition: SetVector.h:284
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:279
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:168
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:269
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:757
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
iterator erase(const_iterator CI)
Definition: SmallVector.h:738
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:3458
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3466
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:3460
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3745
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:3773
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:3820
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:3775
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:3772
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3798
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition: VPlan.h:3762
iterator end()
Definition: VPlan.h:3782
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:3780
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:3774
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3833
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition: VPlan.cpp:813
~VPBasicBlock() override
Definition: VPlan.h:3766
const_reverse_iterator rbegin() const
Definition: VPlan.h:3786
reverse_iterator rend()
Definition: VPlan.h:3787
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition: VPlan.h:3760
VPRecipeBase & back()
Definition: VPlan.h:3795
const VPRecipeBase & front() const
Definition: VPlan.h:3792
const_iterator begin() const
Definition: VPlan.h:3781
VPRecipeBase & front()
Definition: VPlan.h:3793
const VPRecipeBase & back() const
Definition: VPlan.h:3794
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3811
bool empty() const
Definition: VPlan.h:3791
const_iterator end() const
Definition: VPlan.h:3783
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3806
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:3801
reverse_iterator rbegin()
Definition: VPlan.h:3785
size_t size() const
Definition: VPlan.h:3790
const_reverse_iterator rend() const
Definition: VPlan.h:3788
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:2390
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2444
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2419
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2424
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2414
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition: VPlan.h:2429
VPBlendRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2401
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:2396
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition: VPlan.h:300
VPRegionBlock * getParent()
Definition: VPlan.h:173
VPBlocksTy & getPredecessors()
Definition: VPlan.h:205
iterator_range< VPBlockBase ** > predecessors()
Definition: VPlan.h:202
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:377
void setName(const Twine &newName)
Definition: VPlan.h:166
size_t getNumSuccessors() const
Definition: VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition: VPlan.h:223
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition: VPlan.h:322
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:349
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:258
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition: VPlan.h:335
size_t getNumPredecessors() const
Definition: VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:291
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition: VPlan.h:328
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:204
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:158
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:174
const VPRegionBlock * getParent() const
Definition: VPlan.h:174
const std::string & getName() const
Definition: VPlan.h:164
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:310
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:248
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:215
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:242
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:307
unsigned getVPBlockID() const
Definition: VPlan.h:171
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition: VPlan.h:356
void swapPredecessors()
Swap predecessors of the block.
Definition: VPlan.h:314
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:150
VPBlocksTy & getSuccessors()
Definition: VPlan.h:199
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:160
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:271
void setParent(VPRegionBlock *P)
Definition: VPlan.h:184
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:198
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlanUtils.h:110
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2915
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:2936
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2920
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2944
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition: VPlan.h:2917
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:3401
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:3440
~VPCanonicalIVPHIRecipe() override=default
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3408
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:3403
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3433
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:3428
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3416
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition: VPlan.h:3447
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:300
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:3566
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition: VPlan.h:3605
VPValue * getStepValue() const
Definition: VPlan.h:3622
Type * getScalarType() const
Definition: VPlan.h:3617
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3593
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3585
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3625
VPValue * getStartValue() const
Definition: VPlan.h:3621
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3577
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:3489
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3496
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3502
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition: VPlan.h:3508
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition: VPlan.h:3491
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3515
Recipe to expand a SCEV expression.
Definition: VPlan.h:3364
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition: VPlan.h:3377
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition: VPlan.h:3382
VPExpandSCEVRecipe(const SCEV *Expr)
Definition: VPlan.h:3368
const SCEV * getSCEV() const
Definition: VPlan.h:3394
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3373
~VPExpandSCEVRecipe() override=default
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
Definition: VPlan.h:2960
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition: VPlan.h:3045
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition: VPlan.h:3032
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3014
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition: VPlan.h:3005
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition: VPlan.h:2996
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition: VPlan.h:3000
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition: VPlan.h:2998
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:1964
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition: VPlan.h:1971
static bool classof(const VPValue *V)
Definition: VPlan.h:1981
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2012
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition: VPlan.h:2017
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2001
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:2009
VPValue * getStartValue() const
Definition: VPlan.h:2004
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:1977
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:2021
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:1966
~VPHeaderPHIRecipe() override=default
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
Definition: VPlan.h:1679
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1690
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1707
unsigned getOpcode() const
Definition: VPlan.h:1703
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:1684
~VPHistogramRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3898
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:493
BasicBlock * getIRBasicBlock() const
Definition: VPlan.h:3922
~VPIRBasicBlock() override
Definition: VPlan.h:3910
static bool classof(const VPBlockBase *V)
Definition: VPlan.h:3912
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.cpp:518
Class to record and manage LLVM IR flags.
Definition: VPlan.h:600
FastMathFlagsTy FMFs
Definition: VPlan.h:664
bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition: VPlan.h:710
VPIRFlags(WrapFlagsTy WrapFlags)
Definition: VPlan.h:705
WrapFlagsTy WrapFlags
Definition: VPlan.h:658
CmpInst::Predicate CmpPredicate
Definition: VPlan.h:657
void printFlags(raw_ostream &O) const
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:662
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:816
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition: VPlan.h:659
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:798
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition: VPlan.h:821
void transferFlags(VPIRFlags &Other)
Definition: VPlan.h:720
ExactFlagsTy ExactFlags
Definition: VPlan.h:661
bool hasNoSignedWrap() const
Definition: VPlan.h:840
bool isDisjoint() const
Definition: VPlan.h:851
VPIRFlags(FastMathFlags FMFs)
Definition: VPlan.h:708
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition: VPlan.h:713
VPIRFlags(CmpInst::Predicate Pred)
Definition: VPlan.h:702
bool isNonNeg() const
Definition: VPlan.h:823
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:810
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition: VPlan.h:813
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:660
unsigned AllFlags
Definition: VPlan.h:665
void setPredicate(CmpInst::Predicate Pred)
Definition: VPlan.h:804
bool hasNoUnsignedWrap() const
Definition: VPlan.h:829
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:663
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition: VPlan.h:726
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition: VPlan.h:761
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition: VPlan.h:716
VPIRFlags(Instruction &I)
Definition: VPlan.h:671
A recipe to wrap on original IR instruction not to be modified during execution, except for PHIs.
Definition: VPlan.h:1342
Instruction & getInstruction() const
Definition: VPlan.h:1373
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition: VPlan.h:1387
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void extractLastLaneOfFirstOperand(VPBuilder &Builder)
Update the recipes first operand to the last lane of the operand using Builder.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1393
VPIRInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1360
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition: VPlan.h:1381
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition: VPlan.h:1348
Helper to manage IR metadata for recipes.
Definition: VPlan.h:935
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition: VPlan.h:943
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetada object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata & operator=(const VPIRMetadata &Other)
Definition: VPlan.h:952
VPIRMetadata(const VPIRMetadata &Other)
Copy constructor for cloning.
Definition: VPlan.h:950
void addMetadata(unsigned Kind, MDNode *Node)
Add metadata with kind Kind and Node.
Definition: VPlan.h:961
void applyMetadata(Instruction &I) const
Add all metadata to I.
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
Definition: VPlan.h:1185
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition: VPlan.h:1229
static bool classof(const VPUser *R)
Definition: VPlan.h:1213
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1195
Type * getResultType() const
Definition: VPlan.h:1235
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1190
VPInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1217
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:976
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition: VPlan.h:1098
bool hasResult() const
Definition: VPlan.h:1137
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition: VPlan.h:1053
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition: VPlan.h:1009
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition: VPlan.h:1043
@ ExtractPenultimateElement
Definition: VPlan.h:1019
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition: VPlan.h:1056
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition: VPlan.h:1047
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition: VPlan.h:1006
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition: VPlan.h:1003
@ VScale
Returns the value for vscale.
Definition: VPlan.h:1058
@ CanonicalIVIncrementForPart
Definition: VPlan.h:996
@ CalculateTripCountMinusVF
Definition: VPlan.h:994
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition: VPlan.h:1177
unsigned getOpcode() const
Definition: VPlan.h:1117
A common base class for interleaved memory operations.
Definition: VPlan.h:2460
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition: VPlan.h:2523
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition: VPlan.h:2529
static bool classof(const VPUser *U)
Definition: VPlan.h:2505
virtual bool onlyFirstLaneUsed(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition: VPlan.h:2472
Instruction * getInsertPos() const
Definition: VPlan.h:2527
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2500
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition: VPlan.h:2525
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2517
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2546
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2511
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition: VPlan.h:2598
~VPInterleaveEVLRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition: VPlan.h:2632
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2639
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2613
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:2600
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2557
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2590
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2584
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2567
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition: VPlan.h:2559
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlanHelpers.h:125
A recipe for forming partial reductions.
Definition: VPlan.h:2734
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0, VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
Definition: VPlan.h:2742
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1, VPValue *Cond, unsigned ScaleFactor, Instruction *ReductionInst=nullptr)
Definition: VPlan.h:2746
~VPPartialReductionRecipe() override=default
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by.
Definition: VPlan.h:2780
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition: VPlan.h:2777
VPPartialReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2761
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition: VPlan.h:1246
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition: VPlan.h:1268
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition: VPlan.h:1263
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition: VPlan.h:3889
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition: VPlan.h:1288
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition: VPlan.h:1255
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition: VPlan.h:1274
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition: VPlan.h:1277
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:3072
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:3104
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3080
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition: VPlan.h:3091
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:3076
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:394
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:477
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition: VPlan.h:488
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:415
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:482
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:457
virtual VPRecipeBase * clone()=0
Clone the current recipe.
const VPBasicBlock * getParent() const
Definition: VPlan.h:416
static bool classof(const VPUser *U)
Definition: VPlan.h:462
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:405
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
Definition: VPlan.h:2793
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2825
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2822
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:2795
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2806
~VPReductionEVLRecipe() override=default
A recipe for handling reduction phis.
Definition: VPlan.h:2318
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:2375
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2344
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by.
Definition: VPlan.h:2358
~VPReductionPHIRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2381
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false, unsigned VFScaleFactor=1)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition: VPlan.h:2334
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition: VPlan.h:2369
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:2378
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition: VPlan.h:2372
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2647
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2719
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2691
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:2676
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2723
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2725
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition: VPlan.h:2715
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2717
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2721
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:2669
VPReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2685
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered, DebugLoc DL)
Definition: VPlan.h:2655
static bool classof(const VPUser *U)
Definition: VPlan.h:2696
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3933
const VPBlockBase * getEntry() const
Definition: VPlan.h:3969
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:4001
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3986
VPBlockBase * getExiting()
Definition: VPlan.h:3982
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3974
const VPBlockBase * getExiting() const
Definition: VPlan.h:3981
VPBlockBase * getEntry()
Definition: VPlan.h:3970
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:3994
~VPRegionBlock() override
Definition: VPlan.h:3962
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3965
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2837
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, VPIRMetadata Metadata={})
Definition: VPlan.h:2845
bool isSingleScalar() const
Definition: VPlan.h:2882
~VPReplicateRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2887
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2894
bool isPredicated() const
Definition: VPlan.h:2884
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2857
unsigned getOpcode() const
Definition: VPlan.h:2911
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2906
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:3635
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3690
VPValue * getStepValue() const
Definition: VPlan.h:3687
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition: VPlan.h:3675
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:3646
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition: VPlan.h:3667
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3658
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition: VPlan.h:3639
~VPScalarIVStepsRecipe() override=default
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:521
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:527
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:586
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:531
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:589
static bool classof(const VPUser *U)
Definition: VPlan.h:578
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:523
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class can be used to assign names to VPValues.
Definition: VPlanHelpers.h:382
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition: VPlan.h:923
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:197
operand_range operands()
Definition: VPlanValue.h:265
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:241
unsigned getNumOperands() const
Definition: VPlanValue.h:235
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:236
iterator_range< const_operand_iterator > const_operand_range
Definition: VPlanValue.h:259
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition: VPlan.cpp:1403
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:125
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:174
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:85
unsigned getNumUsers() const
Definition: VPlanValue.h:113
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:169
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
Definition: VPlan.h:1830
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1867
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1853
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1874
const VPValue * getVFValue() const
Definition: VPlan.h:1849
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition: VPlan.h:1860
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1838
A recipe to compute the pointers for widened memory accesses of IndexTy.
Definition: VPlan.h:1889
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isFirstPart() const
Return true if this VPVectorPointerRecipe corresponds to part 0.
Definition: VPlan.h:1924
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1893
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1910
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1903
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition: VPlan.h:1927
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1917
A recipe for widening Call instructions using library calls.
Definition: VPlan.h:1626
const_operand_range args() const
Definition: VPlan.h:1665
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1646
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:1633
operand_range args()
Definition: VPlan.h:1664
Function * getCalledScalarFunction() const
Definition: VPlan.h:1660
~VPWidenCallRecipe() override=default
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:3530
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition: VPlan.h:3550
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3537
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:3532
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1480
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition: VPlan.h:1488
Instruction::CastOps getOpcode() const
Definition: VPlan.h:1530
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:1496
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1533
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1507
A recipe for handling GEP instructions.
Definition: VPlan.h:1766
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1816
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands)
Definition: VPlan.h:1782
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition: VPlan.h:1803
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1792
~VPWidenGEPRecipe() override=default
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
Definition: VPlan.h:2029
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2091
static bool classof(const VPValue *V)
Definition: VPlan.h:2045
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition: VPlan.h:2061
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2076
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition: VPlan.h:2069
PHINode * getPHINode() const
Definition: VPlan.h:2071
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2033
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2057
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2074
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:2083
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2040
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2050
const VPValue * getVFValue() const
Definition: VPlan.h:2064
const VPValue * getStepValue() const
Definition: VPlan.h:2058
virtual void execute(VPTransformState &State) override=0
Generate the phi nodes.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:2104
const TruncInst * getTruncInst() const
Definition: VPlan.h:2169
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:2144
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, DebugLoc DL)
Definition: VPlan.h:2120
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2136
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2168
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2111
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition: VPlan.h:2185
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition: VPlan.h:2164
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2177
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition: VPlan.h:1537
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:1563
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition: VPlan.h:1598
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition: VPlan.h:1607
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:1554
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition: VPlan.h:1613
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1580
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition: VPlan.h:1610
~VPWidenIntrinsicRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1601
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
A common base class for widening memory operations.
Definition: VPlan.h:3114
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:3122
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:3162
static bool classof(const VPUser *U)
Definition: VPlan.h:3156
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition: VPlan.h:3182
Instruction & Ingredient
Definition: VPlan.h:3116
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3145
Instruction & getIngredient() const
Definition: VPlan.h:3190
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:3119
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:3149
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:3176
bool isMasked() const
Returns true if the recipe is masked.
Definition: VPlan.h:3172
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition: VPlan.h:3135
void setMask(VPValue *Mask)
Definition: VPlan.h:3127
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:3169
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:3166
A recipe for widened phis.
Definition: VPlan.h:2240
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition: VPlan.h:2245
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition: VPlan.h:2250
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2258
~VPWidenPHIRecipe() override=default
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2209
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition: VPlan.h:2219
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition: VPlan.h:2197
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition: VPlan.h:1437
VPWidenRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1453
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands)
Definition: VPlan.h:1447
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, const VPIRMetadata &Metadata, DebugLoc DL)
Definition: VPlan.h:1441
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition: VPlan.h:1470
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlanHelpers.h:416
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:4036
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:1135
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:1111
bool hasVF(ElementCount VF) const
Definition: VPlan.h:4245
LLVMContext & getContext() const
Definition: VPlan.h:4233
VPBasicBlock * getEntry()
Definition: VPlan.h:4135
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
Definition: VPlan.h:4376
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:4225
void setName(const Twine &newName)
Definition: VPlan.h:4283
bool hasScalableVF() const
Definition: VPlan.h:4246
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:4231
VPValue & getVF()
Returns the VF of the vector loop region.
Definition: VPlan.h:4228
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:4197
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition: VPlan.h:4302
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:4218
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition: VPlan.h:4252
VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition: VPlan.h:4116
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition: VPlan.cpp:943
LLVM_ABI_FOR_TEST ~VPlan()
Definition: VPlan.cpp:920
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition: VPlan.cpp:951
const VPBasicBlock * getEntry() const
Definition: VPlan.h:4136
unsigned getUF() const
Definition: VPlan.h:4265
VPRegionBlock * createVPRegionBlock(const std::string &Name="")
Create a new loop VPRegionBlock with Name and entry and exiting blocks set to nullptr.
Definition: VPlan.h:4387
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition: VPlan.cpp:1249
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:4354
bool hasUF(unsigned UF) const
Definition: VPlan.h:4263
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition: VPlan.h:4187
void setVF(ElementCount VF)
Definition: VPlan.h:4239
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition: VPlan.h:4278
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.cpp:1040
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition: VPlan.h:4408
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition: VPlan.cpp:1022
const VPBasicBlock * getMiddleBlock() const
Definition: VPlan.h:4173
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition: VPlan.h:4204
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition: VPlan.h:4211
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition: VPlan.h:4160
void setEntry(VPBasicBlock *VPBB)
Definition: VPlan.h:4124
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition: VPlan.h:4366
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition: VPlan.cpp:1255
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition: VPlan.h:4308
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:4287
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:1141
bool hasScalarVFOnly() const
Definition: VPlan.h:4256
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition: VPlan.h:4178
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:958
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition: VPlan.h:4317
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:4341
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:1094
void addVF(ElementCount VF)
Definition: VPlan.h:4237
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition: VPlan.h:4183
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition: VPlan.h:4314
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:4350
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition: VPlan.cpp:1056
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition: VPlan.h:4140
void setUF(unsigned UF)
Definition: VPlan.h:4270
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition: VPlan.h:4418
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition: VPlan.cpp:1182
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
An ilist node that can access its parent list.
Definition: ilist_node.h:327
Increasing range of size_t indices.
Definition: STLExtras.h:2444
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:125
void pop_back()
Definition: ilist.h:255
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:123
base_list_type::const_iterator const_iterator
Definition: ilist.h:122
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:81
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
Definition: Intrinsics.cpp:743
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:860
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition: STLExtras.h:870
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
SDValue getStoredValue(SDValue Op)
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition: Casting.h:720
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:386
auto dyn_cast_or_null(const Y &Val)
Definition: Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:77
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:345
@ Other
Any other memory.
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
@ Mul
Product of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1973
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:312
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1980
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
#define N
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition: VPlan.h:3708
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition: VPlan.h:3729
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition: VPlan.h:3713
This struct provides a method for customizing the way a cast is performed.
Definition: Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition: VPlan.h:3700
This struct provides a way to check if a given cast is possible.
Definition: Casting.h:253
static bool isPossible(const From &f)
Definition: Casting.h:254
Struct to hold various analysis needed for cost computations.
Definition: VPlanHelpers.h:344
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:2283
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2289
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:2284
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2307
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
DisjointFlagsTy(bool IsDisjoint)
Definition: VPlan.h:630
NonNegFlagsTy(bool IsNonNeg)
Definition: VPlan.h:635
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:625
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:618
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
Definition: VPlan.h:1410
PHINode & getIRPhi()
Definition: VPlan.h:1418
VPIRPhi(PHINode &PN)
Definition: VPlan.h:1411
static bool classof(const VPRecipeBase *U)
Definition: VPlan.h:1413
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition: VPlan.h:1429
static bool classof(const VPUser *U)
Definition: VPlan.h:1306
VPPhi * clone() override
Clone the current recipe.
Definition: VPlan.h:1321
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition: VPlan.h:1336
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1303
static bool classof(const VPSingleDefRecipe *SDR)
Definition: VPlan.h:1316
static bool classof(const VPValue *V)
Definition: VPlan.h:1311
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition: VPlan.h:869
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:883
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, Instruction &I)
Definition: VPlan.h:874
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:878
std::optional< InstructionCost > getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
static bool classof(const VPValue *V)
Definition: VPlan.h:903
static bool classof(const VPSingleDefRecipe *U)
Definition: VPlan.h:908
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
static bool classof(const VPUser *U)
Definition: VPlan.h:898
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition: VPlan.h:870
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlanHelpers.h:205
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:3236
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3249
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3237
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3265
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:3196
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3224
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition: VPlan.h:3197
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3206
A recipe for widening select instructions.
Definition: VPlan.h:1720
bool isInvariantCond() const
Definition: VPlan.h:1753
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1727
VPWidenSelectRecipe(SelectInst &I, ArrayRef< VPValue * > Operands)
Definition: VPlan.h:1721
VPValue * getCond() const
Definition: VPlan.h:1749
unsigned getOpcode() const
Definition: VPlan.h:1747
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1758
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:3318
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3330
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3349
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3319
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3333
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:3276
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3306
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3294
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3285
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition: VPlan.h:3277