LLVM 21.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanHelpers.h"
18#include "VPlanPatternMatch.h"
19#include "VPlanUtils.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Twine.h"
25#include "llvm/IR/BasicBlock.h"
26#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/Type.h"
31#include "llvm/IR/Value.h"
35#include "llvm/Support/Debug.h"
40#include <cassert>
41
42using namespace llvm;
43
45
46namespace llvm {
48}
50
51#define LV_NAME "loop-vectorize"
52#define DEBUG_TYPE LV_NAME
53
55 switch (getVPDefID()) {
56 case VPInstructionSC:
57 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
58 case VPInterleaveSC:
59 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
60 case VPWidenStoreEVLSC:
61 case VPWidenStoreSC:
62 return true;
63 case VPReplicateSC:
64 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
65 ->mayWriteToMemory();
66 case VPWidenCallSC:
67 return !cast<VPWidenCallRecipe>(this)
68 ->getCalledScalarFunction()
69 ->onlyReadsMemory();
70 case VPWidenIntrinsicSC:
71 return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
72 case VPBranchOnMaskSC:
73 case VPScalarIVStepsSC:
74 case VPPredInstPHISC:
75 return false;
76 case VPBlendSC:
77 case VPReductionEVLSC:
78 case VPReductionSC:
79 case VPVectorPointerSC:
80 case VPWidenCanonicalIVSC:
81 case VPWidenCastSC:
82 case VPWidenGEPSC:
83 case VPWidenIntOrFpInductionSC:
84 case VPWidenLoadEVLSC:
85 case VPWidenLoadSC:
86 case VPWidenPHISC:
87 case VPWidenSC:
88 case VPWidenEVLSC:
89 case VPWidenSelectSC: {
90 const Instruction *I =
91 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
92 (void)I;
93 assert((!I || !I->mayWriteToMemory()) &&
94 "underlying instruction may write to memory");
95 return false;
96 }
97 default:
98 return true;
99 }
100}
101
103 switch (getVPDefID()) {
104 case VPInstructionSC:
105 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
106 case VPWidenLoadEVLSC:
107 case VPWidenLoadSC:
108 return true;
109 case VPReplicateSC:
110 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
111 ->mayReadFromMemory();
112 case VPWidenCallSC:
113 return !cast<VPWidenCallRecipe>(this)
114 ->getCalledScalarFunction()
115 ->onlyWritesMemory();
116 case VPWidenIntrinsicSC:
117 return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
118 case VPBranchOnMaskSC:
119 case VPPredInstPHISC:
120 case VPScalarIVStepsSC:
121 case VPWidenStoreEVLSC:
122 case VPWidenStoreSC:
123 return false;
124 case VPBlendSC:
125 case VPReductionEVLSC:
126 case VPReductionSC:
127 case VPVectorPointerSC:
128 case VPWidenCanonicalIVSC:
129 case VPWidenCastSC:
130 case VPWidenGEPSC:
131 case VPWidenIntOrFpInductionSC:
132 case VPWidenPHISC:
133 case VPWidenSC:
134 case VPWidenEVLSC:
135 case VPWidenSelectSC: {
136 const Instruction *I =
137 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
138 (void)I;
139 assert((!I || !I->mayReadFromMemory()) &&
140 "underlying instruction may read from memory");
141 return false;
142 }
143 default:
144 return true;
145 }
146}
147
149 switch (getVPDefID()) {
150 case VPDerivedIVSC:
151 case VPPredInstPHISC:
152 case VPScalarCastSC:
153 case VPReverseVectorPointerSC:
154 return false;
155 case VPInstructionSC:
156 return mayWriteToMemory();
157 case VPWidenCallSC: {
158 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
159 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
160 }
161 case VPWidenIntrinsicSC:
162 return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
163 case VPBlendSC:
164 case VPReductionEVLSC:
165 case VPReductionSC:
166 case VPScalarIVStepsSC:
167 case VPVectorPointerSC:
168 case VPWidenCanonicalIVSC:
169 case VPWidenCastSC:
170 case VPWidenGEPSC:
171 case VPWidenIntOrFpInductionSC:
172 case VPWidenPHISC:
173 case VPWidenPointerInductionSC:
174 case VPWidenSC:
175 case VPWidenEVLSC:
176 case VPWidenSelectSC: {
177 const Instruction *I =
178 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
179 (void)I;
180 assert((!I || !I->mayHaveSideEffects()) &&
181 "underlying instruction has side-effects");
182 return false;
183 }
184 case VPInterleaveSC:
185 return mayWriteToMemory();
186 case VPWidenLoadEVLSC:
187 case VPWidenLoadSC:
188 case VPWidenStoreEVLSC:
189 case VPWidenStoreSC:
190 assert(
191 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
193 "mayHaveSideffects result for ingredient differs from this "
194 "implementation");
195 return mayWriteToMemory();
196 case VPReplicateSC: {
197 auto *R = cast<VPReplicateRecipe>(this);
198 return R->getUnderlyingInstr()->mayHaveSideEffects();
199 }
200 default:
201 return true;
202 }
203}
204
206 assert(!Parent && "Recipe already in some VPBasicBlock");
207 assert(InsertPos->getParent() &&
208 "Insertion position not in any VPBasicBlock");
209 InsertPos->getParent()->insert(this, InsertPos->getIterator());
210}
211
214 assert(!Parent && "Recipe already in some VPBasicBlock");
215 assert(I == BB.end() || I->getParent() == &BB);
216 BB.insert(this, I);
217}
218
220 assert(!Parent && "Recipe already in some VPBasicBlock");
221 assert(InsertPos->getParent() &&
222 "Insertion position not in any VPBasicBlock");
223 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
224}
225
227 assert(getParent() && "Recipe not in any VPBasicBlock");
229 Parent = nullptr;
230}
231
233 assert(getParent() && "Recipe not in any VPBasicBlock");
235}
236
239 insertAfter(InsertPos);
240}
241
245 insertBefore(BB, I);
246}
247
249 // Get the underlying instruction for the recipe, if there is one. It is used
250 // to
251 // * decide if cost computation should be skipped for this recipe,
252 // * apply forced target instruction cost.
253 Instruction *UI = nullptr;
254 if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
255 UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
256 else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
257 UI = IG->getInsertPos();
258 else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
259 UI = &WidenMem->getIngredient();
260
261 InstructionCost RecipeCost;
262 if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
263 RecipeCost = 0;
264 } else {
265 RecipeCost = computeCost(VF, Ctx);
266 if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
267 RecipeCost.isValid())
269 }
270
271 LLVM_DEBUG({
272 dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
273 dump();
274 });
275 return RecipeCost;
276}
277
279 VPCostContext &Ctx) const {
280 llvm_unreachable("subclasses should implement computeCost");
281}
282
285 VPCostContext &Ctx) const {
286 std::optional<unsigned> Opcode = std::nullopt;
288
289 // If the partial reduction is predicated, a select will be operand 0 rather
290 // than the binary op
291 using namespace llvm::VPlanPatternMatch;
292 if (match(getOperand(0), m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
293 BinOpR = BinOpR->getOperand(1)->getDefiningRecipe();
294
295 if (auto *WidenR = dyn_cast<VPWidenRecipe>(BinOpR))
296 Opcode = std::make_optional(WidenR->getOpcode());
297
298 VPRecipeBase *ExtAR = BinOpR->getOperand(0)->getDefiningRecipe();
299 VPRecipeBase *ExtBR = BinOpR->getOperand(1)->getDefiningRecipe();
300
301 auto *PhiType = Ctx.Types.inferScalarType(getOperand(1));
302 auto *InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->getOperand(0)
303 : BinOpR->getOperand(0));
304 auto *InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->getOperand(0)
305 : BinOpR->getOperand(1));
306
307 auto GetExtendKind = [](VPRecipeBase *R) {
308 // The extend could come from outside the plan.
309 if (!R)
311 auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R);
312 if (!WidenCastR)
314 if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)
316 if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)
319 };
320
321 return Ctx.TTI.getPartialReductionCost(getOpcode(), InputTypeA, InputTypeB,
322 PhiType, VF, GetExtendKind(ExtAR),
323 GetExtendKind(ExtBR), Opcode);
324}
325
328 auto &Builder = State.Builder;
329
330 assert(getOpcode() == Instruction::Add &&
331 "Unhandled partial reduction opcode");
332
333 Value *BinOpVal = State.get(getOperand(0));
334 Value *PhiVal = State.get(getOperand(1));
335 assert(PhiVal && BinOpVal && "Phi and Mul must be set");
336
337 Type *RetTy = PhiVal->getType();
338
339 CallInst *V = Builder.CreateIntrinsic(
340 RetTy, Intrinsic::experimental_vector_partial_reduce_add,
341 {PhiVal, BinOpVal}, nullptr, "partial.reduce");
342
343 State.set(this, V);
344}
345
346#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
348 VPSlotTracker &SlotTracker) const {
349 O << Indent << "PARTIAL-REDUCE ";
351 O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
353}
354#endif
355
357 assert(OpType == OperationType::FPMathOp &&
358 "recipe doesn't have fast math flags");
359 FastMathFlags Res;
360 Res.setAllowReassoc(FMFs.AllowReassoc);
361 Res.setNoNaNs(FMFs.NoNaNs);
362 Res.setNoInfs(FMFs.NoInfs);
363 Res.setNoSignedZeros(FMFs.NoSignedZeros);
364 Res.setAllowReciprocal(FMFs.AllowReciprocal);
365 Res.setAllowContract(FMFs.AllowContract);
366 Res.setApproxFunc(FMFs.ApproxFunc);
367 return Res;
368}
369
370#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
372#endif
373
374template <unsigned PartOpIdx>
375VPValue *
377 if (U.getNumOperands() == PartOpIdx + 1)
378 return U.getOperand(PartOpIdx);
379 return nullptr;
380}
381
382template <unsigned PartOpIdx>
384 if (auto *UnrollPartOp = getUnrollPartOperand(U))
385 return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
386 return 0;
387}
388
391 const Twine &Name)
392 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
393 Pred, DL),
394 Opcode(Opcode), Name(Name.str()) {
395 assert(Opcode == Instruction::ICmp &&
396 "only ICmp predicates supported at the moment");
397}
398
400 std::initializer_list<VPValue *> Operands,
401 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
402 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
403 Opcode(Opcode), Name(Name.str()) {
404 // Make sure the VPInstruction is a floating-point operation.
405 assert(isFPMathOp() && "this op can't take fast-math flags");
406}
407
408bool VPInstruction::doesGeneratePerAllLanes() const {
409 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
410}
411
412bool VPInstruction::canGenerateScalarForFirstLane() const {
414 return true;
416 return true;
417 switch (Opcode) {
418 case Instruction::ICmp:
419 case Instruction::Select:
427 return true;
428 default:
429 return false;
430 }
431}
432
433Value *VPInstruction::generatePerLane(VPTransformState &State,
434 const VPLane &Lane) {
435 IRBuilderBase &Builder = State.Builder;
436
438 "only PtrAdd opcodes are supported for now");
439 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
440 State.get(getOperand(1), Lane), Name);
441}
442
443Value *VPInstruction::generate(VPTransformState &State) {
444 IRBuilderBase &Builder = State.Builder;
445
447 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
448 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
449 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
450 auto *Res =
451 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
452 if (auto *I = dyn_cast<Instruction>(Res))
453 setFlags(I);
454 return Res;
455 }
456
457 switch (getOpcode()) {
458 case VPInstruction::Not: {
459 Value *A = State.get(getOperand(0));
460 return Builder.CreateNot(A, Name);
461 }
462 case Instruction::ICmp: {
463 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
464 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
465 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
466 return Builder.CreateCmp(getPredicate(), A, B, Name);
467 }
468 case Instruction::Select: {
469 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
470 Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
471 Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
472 Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
473 return Builder.CreateSelect(Cond, Op1, Op2, Name);
474 }
476 // Get first lane of vector induction variable.
477 Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
478 // Get the original loop tripcount.
479 Value *ScalarTC = State.get(getOperand(1), VPLane(0));
480
481 // If this part of the active lane mask is scalar, generate the CMP directly
482 // to avoid unnecessary extracts.
483 if (State.VF.isScalar())
484 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
485 Name);
486
487 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
488 auto *PredTy = VectorType::get(Int1Ty, State.VF);
489 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
490 {PredTy, ScalarTC->getType()},
491 {VIVElem0, ScalarTC}, nullptr, Name);
492 }
494 // Generate code to combine the previous and current values in vector v3.
495 //
496 // vector.ph:
497 // v_init = vector(..., ..., ..., a[-1])
498 // br vector.body
499 //
500 // vector.body
501 // i = phi [0, vector.ph], [i+4, vector.body]
502 // v1 = phi [v_init, vector.ph], [v2, vector.body]
503 // v2 = a[i, i+1, i+2, i+3];
504 // v3 = vector(v1(3), v2(0, 1, 2))
505
506 auto *V1 = State.get(getOperand(0));
507 if (!V1->getType()->isVectorTy())
508 return V1;
509 Value *V2 = State.get(getOperand(1));
510 return Builder.CreateVectorSplice(V1, V2, -1, Name);
511 }
513 unsigned UF = getParent()->getPlan()->getUF();
514 Value *ScalarTC = State.get(getOperand(0), VPLane(0));
515 Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
516 Value *Sub = Builder.CreateSub(ScalarTC, Step);
517 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
518 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
519 return Builder.CreateSelect(Cmp, Sub, Zero);
520 }
522 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
523 // be outside of the main loop.
524 Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
525 // Compute EVL
526 assert(AVL->getType()->isIntegerTy() &&
527 "Requested vector length should be an integer.");
528
529 assert(State.VF.isScalable() && "Expected scalable vector factor.");
530 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
531
532 Value *EVL = State.Builder.CreateIntrinsic(
533 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
534 {AVL, VFArg, State.Builder.getTrue()});
535 return EVL;
536 }
538 unsigned Part = getUnrollPart(*this);
539 auto *IV = State.get(getOperand(0), VPLane(0));
540 assert(Part != 0 && "Must have a positive part");
541 // The canonical IV is incremented by the vectorization factor (num of
542 // SIMD elements) times the unroll part.
543 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
544 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
546 }
548 Value *Cond = State.get(getOperand(0), VPLane(0));
549 // Replace the temporary unreachable terminator with a new conditional
550 // branch, hooking it up to backward destination for exiting blocks now and
551 // to forward destination(s) later when they are created.
552 BranchInst *CondBr =
553 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
554 CondBr->setSuccessor(0, nullptr);
556
557 if (!getParent()->isExiting())
558 return CondBr;
559
560 VPRegionBlock *ParentRegion = getParent()->getParent();
561 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
562 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
563 return CondBr;
564 }
566 // First create the compare.
567 Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
568 Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
569 Value *Cond = Builder.CreateICmpEQ(IV, TC);
570
571 // Now create the branch.
572 auto *Plan = getParent()->getPlan();
573 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
574 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
575
576 // Replace the temporary unreachable terminator with a new conditional
577 // branch, hooking it up to backward destination (the header) now and to the
578 // forward destination (the exit/middle block) later when it is created.
579 // Note that CreateCondBr expects a valid BB as first argument, so we need
580 // to set it to nullptr later.
581 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
582 State.CFG.VPBB2IRBB[Header]);
583 CondBr->setSuccessor(0, nullptr);
585 return CondBr;
586 }
588 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
589 // and will be removed by breaking up the recipe further.
590 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
591 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
592 // Get its reduction variable descriptor.
593 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
594
595 RecurKind RK = RdxDesc.getRecurrenceKind();
596
597 Type *PhiTy = OrigPhi->getType();
598 // The recipe's operands are the reduction phi, followed by one operand for
599 // each part of the reduction.
600 unsigned UF = getNumOperands() - 1;
601 VectorParts RdxParts(UF);
602 for (unsigned Part = 0; Part < UF; ++Part)
603 RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
604
605 // If the vector reduction can be performed in a smaller type, we truncate
606 // then extend the loop exit value to enable InstCombine to evaluate the
607 // entire expression in the smaller type.
608 // TODO: Handle this in truncateToMinBW.
609 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
610 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
611 for (unsigned Part = 0; Part < UF; ++Part)
612 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
613 }
614 // Reduce all of the unrolled parts into a single vector.
615 Value *ReducedPartRdx = RdxParts[0];
616 unsigned Op = RdxDesc.getOpcode();
618 Op = Instruction::Or;
619
620 if (PhiR->isOrdered()) {
621 ReducedPartRdx = RdxParts[UF - 1];
622 } else {
623 // Floating-point operations should have some FMF to enable the reduction.
625 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
626 for (unsigned Part = 1; Part < UF; ++Part) {
627 Value *RdxPart = RdxParts[Part];
628 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
629 ReducedPartRdx = Builder.CreateBinOp(
630 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
632 ReducedPartRdx =
633 createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
634 else
635 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
636 }
637 }
638
639 // Create the reduction after the loop. Note that inloop reductions create
640 // the target reduction in the loop using a Reduction recipe.
641 if ((State.VF.isVector() ||
644 !PhiR->isInLoop()) {
645 ReducedPartRdx =
646 createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
647 // If the reduction can be performed in a smaller type, we need to extend
648 // the reduction to the wider type before we branch to the original loop.
649 if (PhiTy != RdxDesc.getRecurrenceType())
650 ReducedPartRdx = RdxDesc.isSigned()
651 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
652 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
653 }
654
655 return ReducedPartRdx;
656 }
658 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
659 unsigned Offset = CI->getZExtValue();
660 assert(Offset > 0 && "Offset from end must be positive");
661 Value *Res;
662 if (State.VF.isVector()) {
663 assert(Offset <= State.VF.getKnownMinValue() &&
664 "invalid offset to extract from");
665 // Extract lane VF - Offset from the operand.
666 Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
667 } else {
668 assert(Offset <= 1 && "invalid offset to extract from");
669 Res = State.get(getOperand(0));
670 }
671 if (isa<ExtractElementInst>(Res))
672 Res->setName(Name);
673 return Res;
674 }
676 Value *A = State.get(getOperand(0));
677 Value *B = State.get(getOperand(1));
678 return Builder.CreateLogicalAnd(A, B, Name);
679 }
682 "can only generate first lane for PtrAdd");
683 Value *Ptr = State.get(getOperand(0), VPLane(0));
684 Value *Addend = State.get(getOperand(1), VPLane(0));
685 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
686 }
688 auto *NewPhi =
689 Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
690 for (const auto &[IncVPV, PredVPBB] :
691 zip(operands(), getParent()->getPredecessors())) {
692 Value *IncV = State.get(IncVPV, /* IsScalar */ true);
693 BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(cast<VPBasicBlock>(PredVPBB));
694 NewPhi->addIncoming(IncV, PredBB);
695 }
696 return NewPhi;
697 }
699 Value *A = State.get(getOperand(0));
700 return Builder.CreateOrReduce(A);
701 }
703 Value *Vec = State.get(getOperand(0));
704 Value *Mask = State.get(getOperand(1));
705 Value *Ctz = Builder.CreateCountTrailingZeroElems(
706 Builder.getInt64Ty(), Mask, true, "first.active.lane");
707 return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value");
708 }
709 default:
710 llvm_unreachable("Unsupported opcode for instruction");
711 }
712}
713
715 VPCostContext &Ctx) const {
717 if (!getUnderlyingValue()) {
718 // TODO: Compute cost for VPInstructions without underlying values once
719 // the legacy cost model has been retired.
720 return 0;
721 }
722
723 assert(!doesGeneratePerAllLanes() &&
724 "Should only generate a vector value or single scalar, not scalars "
725 "for all lanes.");
726 Type *ResTy = Ctx.Types.inferScalarType(this);
728 ResTy = toVectorTy(ResTy, VF);
729
730 return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
731 }
732
733 switch (getOpcode()) {
735 auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
737 Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind);
738 }
739 default:
740 // TODO: Compute cost other VPInstructions once the legacy cost model has
741 // been retired.
743 "unexpected VPInstruction witht underlying value");
744 return 0;
745 }
746}
747
753}
754
757}
758
759#if !defined(NDEBUG)
760bool VPInstruction::isFPMathOp() const {
761 // Inspired by FPMathOperator::classof. Notable differences are that we don't
762 // support Call, PHI and Select opcodes here yet.
763 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
764 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
765 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
766 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
767}
768#endif
769
771 assert(!State.Lane && "VPInstruction executing an Lane");
773 assert((hasFastMathFlags() == isFPMathOp() ||
774 getOpcode() == Instruction::Select) &&
775 "Recipe not a FPMathOp but has fast-math flags?");
776 if (hasFastMathFlags())
779 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
782 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
783 if (GeneratesPerAllLanes) {
784 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
785 Lane != NumLanes; ++Lane) {
786 Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
787 assert(GeneratedValue && "generatePerLane must produce a value");
788 State.set(this, GeneratedValue, VPLane(Lane));
789 }
790 return;
791 }
792
793 Value *GeneratedValue = generate(State);
794 if (!hasResult())
795 return;
796 assert(GeneratedValue && "generate must produce a value");
797 assert(
798 (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
799 State.VF.isScalar()) &&
800 "scalar value but not only first lane defined");
801 State.set(this, GeneratedValue,
802 /*IsScalar*/ GeneratesPerFirstLaneOnly);
803}
804
807 return false;
808 switch (getOpcode()) {
809 case Instruction::ICmp:
810 case Instruction::Select:
820 return false;
821 default:
822 return true;
823 }
824}
825
827 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
829 return vputils::onlyFirstLaneUsed(this);
830
831 switch (getOpcode()) {
832 default:
833 return false;
834 case Instruction::ICmp:
835 case Instruction::Select:
836 case Instruction::Or:
838 // TODO: Cover additional opcodes.
839 return vputils::onlyFirstLaneUsed(this);
847 return true;
848 };
849 llvm_unreachable("switch should return");
850}
851
853 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
855 return vputils::onlyFirstPartUsed(this);
856
857 switch (getOpcode()) {
858 default:
859 return false;
860 case Instruction::ICmp:
861 case Instruction::Select:
862 return vputils::onlyFirstPartUsed(this);
866 return true;
867 };
868 llvm_unreachable("switch should return");
869}
870
871#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
873 VPSlotTracker SlotTracker(getParent()->getPlan());
874 print(dbgs(), "", SlotTracker);
875}
876
878 VPSlotTracker &SlotTracker) const {
879 O << Indent << "EMIT ";
880
881 if (hasResult()) {
883 O << " = ";
884 }
885
886 switch (getOpcode()) {
888 O << "not";
889 break;
891 O << "combined load";
892 break;
894 O << "combined store";
895 break;
897 O << "active lane mask";
898 break;
900 O << "resume-phi";
901 break;
903 O << "EXPLICIT-VECTOR-LENGTH";
904 break;
906 O << "first-order splice";
907 break;
909 O << "branch-on-cond";
910 break;
912 O << "TC > VF ? TC - VF : 0";
913 break;
915 O << "VF * Part +";
916 break;
918 O << "branch-on-count";
919 break;
921 O << "extract-from-end";
922 break;
924 O << "compute-reduction-result";
925 break;
927 O << "logical-and";
928 break;
930 O << "ptradd";
931 break;
933 O << "any-of";
934 break;
936 O << "extract-first-active";
937 break;
938 default:
940 }
941
942 printFlags(O);
944
945 if (auto DL = getDebugLoc()) {
946 O << ", !dbg ";
947 DL.print(O);
948 }
949}
950#endif
951
953 assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
954 "Only PHINodes can have extra operands");
955 for (const auto &[Idx, Op] : enumerate(operands())) {
956 VPValue *ExitValue = Op;
957 auto Lane = vputils::isUniformAfterVectorization(ExitValue)
961 auto *PredVPBB = Pred->getExitingBasicBlock();
962 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
963 // Set insertion point in PredBB in case an extract needs to be generated.
964 // TODO: Model extracts explicitly.
965 State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
966 Value *V = State.get(ExitValue, VPLane(Lane));
967 auto *Phi = cast<PHINode>(&I);
968 // If there is no existing block for PredBB in the phi, add a new incoming
969 // value. Otherwise update the existing incoming value for PredBB.
970 if (Phi->getBasicBlockIndex(PredBB) == -1)
971 Phi->addIncoming(V, PredBB);
972 else
973 Phi->setIncomingValueForBlock(PredBB, V);
974 }
975
976 // Advance the insert point after the wrapped IR instruction. This allows
977 // interleaving VPIRInstructions and other recipes.
978 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
979}
980
982 VPCostContext &Ctx) const {
983 // The recipe wraps an existing IR instruction on the border of VPlan's scope,
984 // hence it does not contribute to the cost-modeling for the VPlan.
985 return 0;
986}
987
989 assert(isa<PHINode>(getInstruction()) &&
990 "can only add exiting operands to phi nodes");
991 assert(getNumOperands() == 1 && "must have a single operand");
992 VPValue *Exiting = getOperand(0);
993 if (!Exiting->isLiveIn()) {
995 auto &Plan = *getParent()->getPlan();
996 Exiting = Builder.createNaryOp(
998 {Exiting,
999 Plan.getOrAddLiveIn(ConstantInt::get(IntegerType::get(Ctx, 32), 1))});
1000 }
1001 setOperand(0, Exiting);
1002}
1003
1004#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1006 VPSlotTracker &SlotTracker) const {
1007 O << Indent << "IR " << I;
1008
1009 if (getNumOperands() != 0) {
1010 O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
1012 enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
1013 Op.value()->printAsOperand(O, SlotTracker);
1014 O << " from ";
1015 getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
1016 });
1017 O << ")";
1018 }
1019}
1020#endif
1021
1023 assert(State.VF.isVector() && "not widening");
1025
1026 FunctionType *VFTy = Variant->getFunctionType();
1027 // Add return type if intrinsic is overloaded on it.
1029 for (const auto &I : enumerate(arg_operands())) {
1030 Value *Arg;
1031 // Some vectorized function variants may also take a scalar argument,
1032 // e.g. linear parameters for pointers. This needs to be the scalar value
1033 // from the start of the respective part when interleaving.
1034 if (!VFTy->getParamType(I.index())->isVectorTy())
1035 Arg = State.get(I.value(), VPLane(0));
1036 else
1037 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
1038 Args.push_back(Arg);
1039 }
1040
1041 assert(Variant != nullptr && "Can't create vector function.");
1042
1043 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1045 if (CI)
1046 CI->getOperandBundlesAsDefs(OpBundles);
1047
1048 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
1049 setFlags(V);
1050
1051 if (!V->getType()->isVoidTy())
1052 State.set(this, V);
1053 State.addMetadata(V, CI);
1054}
1055
1057 VPCostContext &Ctx) const {
1058 return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
1059 Variant->getFunctionType()->params(),
1060 Ctx.CostKind);
1061}
1062
1063#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1065 VPSlotTracker &SlotTracker) const {
1066 O << Indent << "WIDEN-CALL ";
1067
1068 Function *CalledFn = getCalledScalarFunction();
1069 if (CalledFn->getReturnType()->isVoidTy())
1070 O << "void ";
1071 else {
1073 O << " = ";
1074 }
1075
1076 O << "call";
1077 printFlags(O);
1078 O << " @" << CalledFn->getName() << "(";
1080 Op->printAsOperand(O, SlotTracker);
1081 });
1082 O << ")";
1083
1084 O << " (using library function";
1085 if (Variant->hasName())
1086 O << ": " << Variant->getName();
1087 O << ")";
1088}
1089#endif
1090
1092 assert(State.VF.isVector() && "not widening");
1094
1095 SmallVector<Type *, 2> TysForDecl;
1096 // Add return type if intrinsic is overloaded on it.
1097 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
1098 TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
1100 for (const auto &I : enumerate(operands())) {
1101 // Some intrinsics have a scalar argument - don't replace it with a
1102 // vector.
1103 Value *Arg;
1104 if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
1105 State.TTI))
1106 Arg = State.get(I.value(), VPLane(0));
1107 else
1108 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
1109 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
1110 State.TTI))
1111 TysForDecl.push_back(Arg->getType());
1112 Args.push_back(Arg);
1113 }
1114
1115 // Use vector version of the intrinsic.
1116 Module *M = State.Builder.GetInsertBlock()->getModule();
1117 Function *VectorF =
1118 Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1119 assert(VectorF &&
1120 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
1121
1122 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1124 if (CI)
1125 CI->getOperandBundlesAsDefs(OpBundles);
1126
1127 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1128
1129 setFlags(V);
1130
1131 if (!V->getType()->isVoidTy())
1132 State.set(this, V);
1133 State.addMetadata(V, CI);
1134}
1135
1137 VPCostContext &Ctx) const {
1138 // Some backends analyze intrinsic arguments to determine cost. Use the
1139 // underlying value for the operand if it has one. Otherwise try to use the
1140 // operand of the underlying call instruction, if there is one. Otherwise
1141 // clear Arguments.
1142 // TODO: Rework TTI interface to be independent of concrete IR values.
1144 for (const auto &[Idx, Op] : enumerate(operands())) {
1145 auto *V = Op->getUnderlyingValue();
1146 if (!V) {
1147 // Push all the VP Intrinsic's ops into the Argments even if is nullptr.
1148 // Some VP Intrinsic's cost will assert the number of parameters.
1149 // Mainly appears in the following two scenarios:
1150 // 1. EVL Op is nullptr
1151 // 2. The Argmunt of the VP Intrinsic is also the VP Intrinsic
1152 if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
1153 Arguments.push_back(V);
1154 continue;
1155 }
1156 if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
1157 Arguments.push_back(UI->getArgOperand(Idx));
1158 continue;
1159 }
1160 Arguments.clear();
1161 break;
1162 }
1163 Arguments.push_back(V);
1164 }
1165
1166 Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1167 SmallVector<Type *> ParamTys;
1168 for (unsigned I = 0; I != getNumOperands(); ++I)
1169 ParamTys.push_back(
1171
1172 // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
1174 IntrinsicCostAttributes CostAttrs(
1175 VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
1176 dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
1177 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);
1178}
1179
1181 return Intrinsic::getBaseName(VectorIntrinsicID);
1182}
1183
1185 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1186 // Vector predication intrinsics only demand the the first lane the last
1187 // operand (the EVL operand).
1188 return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1189 Op == getOperand(getNumOperands() - 1);
1190}
1191
1192#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1194 VPSlotTracker &SlotTracker) const {
1195 O << Indent << "WIDEN-INTRINSIC ";
1196 if (ResultTy->isVoidTy()) {
1197 O << "void ";
1198 } else {
1200 O << " = ";
1201 }
1202
1203 O << "call";
1204 printFlags(O);
1205 O << getIntrinsicName() << "(";
1206
1208 Op->printAsOperand(O, SlotTracker);
1209 });
1210 O << ")";
1211}
1212#endif
1213
1216 IRBuilderBase &Builder = State.Builder;
1217
1218 Value *Address = State.get(getOperand(0));
1219 Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
1220 VectorType *VTy = cast<VectorType>(Address->getType());
1221
1222 // The histogram intrinsic requires a mask even if the recipe doesn't;
1223 // if the mask operand was omitted then all lanes should be executed and
1224 // we just need to synthesize an all-true mask.
1225 Value *Mask = nullptr;
1226 if (VPValue *VPMask = getMask())
1227 Mask = State.get(VPMask);
1228 else
1229 Mask =
1230 Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));
1231
1232 // If this is a subtract, we want to invert the increment amount. We may
1233 // add a separate intrinsic in future, but for now we'll try this.
1234 if (Opcode == Instruction::Sub)
1235 IncAmt = Builder.CreateNeg(IncAmt);
1236 else
1237 assert(Opcode == Instruction::Add && "only add or sub supported for now");
1238
1239 State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1240 {VTy, IncAmt->getType()},
1241 {Address, IncAmt, Mask});
1242}
1243
1245 VPCostContext &Ctx) const {
1246 // FIXME: Take the gather and scatter into account as well. For now we're
1247 // generating the same cost as the fallback path, but we'll likely
1248 // need to create a new TTI method for determining the cost, including
1249 // whether we can use base + vec-of-smaller-indices or just
1250 // vec-of-pointers.
1251 assert(VF.isVector() && "Invalid VF for histogram cost");
1252 Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
1253 VPValue *IncAmt = getOperand(1);
1254 Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
1255 VectorType *VTy = VectorType::get(IncTy, VF);
1256
1257 // Assume that a non-constant update value (or a constant != 1) requires
1258 // a multiply, and add that into the cost.
1259 InstructionCost MulCost =
1260 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind);
1261 if (IncAmt->isLiveIn()) {
1262 ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue());
1263
1264 if (CI && CI->getZExtValue() == 1)
1265 MulCost = TTI::TCC_Free;
1266 }
1267
1268 // Find the cost of the histogram operation itself.
1269 Type *PtrTy = VectorType::get(AddressTy, VF);
1270 Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
1271 IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
1273 {PtrTy, IncTy, MaskTy});
1274
1275 // Add the costs together with the add/sub operation.
1276 return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost +
1277 Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);
1278}
1279
1280#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1282 VPSlotTracker &SlotTracker) const {
1283 O << Indent << "WIDEN-HISTOGRAM buckets: ";
1285
1286 if (Opcode == Instruction::Sub)
1287 O << ", dec: ";
1288 else {
1289 assert(Opcode == Instruction::Add);
1290 O << ", inc: ";
1291 }
1293
1294 if (VPValue *Mask = getMask()) {
1295 O << ", mask: ";
1296 Mask->printAsOperand(O, SlotTracker);
1297 }
1298}
1299
1301 VPSlotTracker &SlotTracker) const {
1302 O << Indent << "WIDEN-SELECT ";
1304 O << " = select ";
1305 printFlags(O);
1307 O << ", ";
1309 O << ", ";
1311 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
1312}
1313#endif
1314
1317
1318 // The condition can be loop invariant but still defined inside the
1319 // loop. This means that we can't just use the original 'cond' value.
1320 // We have to take the 'vectorized' value and pick the first lane.
1321 // Instcombine will make this a no-op.
1322 auto *InvarCond =
1323 isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
1324
1325 Value *Cond = InvarCond ? InvarCond : State.get(getCond());
1326 Value *Op0 = State.get(getOperand(1));
1327 Value *Op1 = State.get(getOperand(2));
1328 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
1329 State.set(this, Sel);
1330 if (isa<FPMathOperator>(Sel))
1331 setFlags(cast<Instruction>(Sel));
1332 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1333}
1334
1336 VPCostContext &Ctx) const {
1337 SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
1338 bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
1339 Type *ScalarTy = Ctx.Types.inferScalarType(this);
1340 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1341
1342 VPValue *Op0, *Op1;
1343 using namespace llvm::VPlanPatternMatch;
1344 if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
1345 (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
1346 match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
1347 // select x, y, false --> x & y
1348 // select x, true, y --> x | y
1349 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
1350 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
1351
1353 if (all_of(operands(),
1354 [](VPValue *Op) { return Op->getUnderlyingValue(); }))
1355 Operands.append(SI->op_begin(), SI->op_end());
1356 bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));
1357 return Ctx.TTI.getArithmeticInstrCost(
1358 IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy,
1359 Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);
1360 }
1361
1362 Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1363 if (!ScalarCond)
1364 CondTy = VectorType::get(CondTy, VF);
1365
1367 if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
1368 Pred = Cmp->getPredicate();
1369 return Ctx.TTI.getCmpSelInstrCost(
1370 Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind,
1371 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, SI);
1372}
1373
1374VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
1375 const FastMathFlags &FMF) {
1376 AllowReassoc = FMF.allowReassoc();
1377 NoNaNs = FMF.noNaNs();
1378 NoInfs = FMF.noInfs();
1379 NoSignedZeros = FMF.noSignedZeros();
1380 AllowReciprocal = FMF.allowReciprocal();
1381 AllowContract = FMF.allowContract();
1382 ApproxFunc = FMF.approxFunc();
1383}
1384
1385#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1387 switch (OpType) {
1388 case OperationType::Cmp:
1390 break;
1391 case OperationType::DisjointOp:
1393 O << " disjoint";
1394 break;
1395 case OperationType::PossiblyExactOp:
1396 if (ExactFlags.IsExact)
1397 O << " exact";
1398 break;
1399 case OperationType::OverflowingBinOp:
1400 if (WrapFlags.HasNUW)
1401 O << " nuw";
1402 if (WrapFlags.HasNSW)
1403 O << " nsw";
1404 break;
1405 case OperationType::FPMathOp:
1407 break;
1408 case OperationType::GEPOp:
1409 if (GEPFlags.isInBounds())
1410 O << " inbounds";
1412 O << " nusw";
1414 O << " nuw";
1415 break;
1416 case OperationType::NonNegOp:
1417 if (NonNegFlags.NonNeg)
1418 O << " nneg";
1419 break;
1420 case OperationType::Other:
1421 break;
1422 }
1423 if (getNumOperands() > 0)
1424 O << " ";
1425}
1426#endif
1427
1430 auto &Builder = State.Builder;
1431 switch (Opcode) {
1432 case Instruction::Call:
1433 case Instruction::Br:
1434 case Instruction::PHI:
1435 case Instruction::GetElementPtr:
1436 case Instruction::Select:
1437 llvm_unreachable("This instruction is handled by a different recipe.");
1438 case Instruction::UDiv:
1439 case Instruction::SDiv:
1440 case Instruction::SRem:
1441 case Instruction::URem:
1442 case Instruction::Add:
1443 case Instruction::FAdd:
1444 case Instruction::Sub:
1445 case Instruction::FSub:
1446 case Instruction::FNeg:
1447 case Instruction::Mul:
1448 case Instruction::FMul:
1449 case Instruction::FDiv:
1450 case Instruction::FRem:
1451 case Instruction::Shl:
1452 case Instruction::LShr:
1453 case Instruction::AShr:
1454 case Instruction::And:
1455 case Instruction::Or:
1456 case Instruction::Xor: {
1457 // Just widen unops and binops.
1459 for (VPValue *VPOp : operands())
1460 Ops.push_back(State.get(VPOp));
1461
1462 Value *V = Builder.CreateNAryOp(Opcode, Ops);
1463
1464 if (auto *VecOp = dyn_cast<Instruction>(V))
1465 setFlags(VecOp);
1466
1467 // Use this vector value for all users of the original instruction.
1468 State.set(this, V);
1469 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1470 break;
1471 }
1472 case Instruction::Freeze: {
1473 Value *Op = State.get(getOperand(0));
1474
1475 Value *Freeze = Builder.CreateFreeze(Op);
1476 State.set(this, Freeze);
1477 break;
1478 }
1479 case Instruction::ICmp:
1480 case Instruction::FCmp: {
1481 // Widen compares. Generate vector compares.
1482 bool FCmp = Opcode == Instruction::FCmp;
1483 Value *A = State.get(getOperand(0));
1484 Value *B = State.get(getOperand(1));
1485 Value *C = nullptr;
1486 if (FCmp) {
1487 // Propagate fast math flags.
1488 C = Builder.CreateFCmpFMF(
1489 getPredicate(), A, B,
1490 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1491 } else {
1492 C = Builder.CreateICmp(getPredicate(), A, B);
1493 }
1494 State.set(this, C);
1495 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1496 break;
1497 }
1498 default:
1499 // This instruction is not vectorized by simple widening.
1500 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1501 << Instruction::getOpcodeName(Opcode));
1502 llvm_unreachable("Unhandled instruction!");
1503 } // end of switch.
1504
1505#if !defined(NDEBUG)
1506 // Verify that VPlan type inference results agree with the type of the
1507 // generated values.
1509 State.get(this)->getType() &&
1510 "inferred type and type from generated instructions do not match");
1511#endif
1512}
1513
1515 VPCostContext &Ctx) const {
1516 switch (Opcode) {
1517 case Instruction::FNeg: {
1518 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1519 return Ctx.TTI.getArithmeticInstrCost(
1520 Opcode, VectorTy, Ctx.CostKind,
1521 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
1522 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
1523 }
1524
1525 case Instruction::UDiv:
1526 case Instruction::SDiv:
1527 case Instruction::SRem:
1528 case Instruction::URem:
1529 // More complex computation, let the legacy cost-model handle this for now.
1530 return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
1531 case Instruction::Add:
1532 case Instruction::FAdd:
1533 case Instruction::Sub:
1534 case Instruction::FSub:
1535 case Instruction::Mul:
1536 case Instruction::FMul:
1537 case Instruction::FDiv:
1538 case Instruction::FRem:
1539 case Instruction::Shl:
1540 case Instruction::LShr:
1541 case Instruction::AShr:
1542 case Instruction::And:
1543 case Instruction::Or:
1544 case Instruction::Xor: {
1545 VPValue *RHS = getOperand(1);
1546 // Certain instructions can be cheaper to vectorize if they have a constant
1547 // second vector operand. One example of this are shifts on x86.
1550 if (RHS->isLiveIn())
1551 RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());
1552
1553 if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
1556 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1557 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1558
1560 if (CtxI)
1561 Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
1562 return Ctx.TTI.getArithmeticInstrCost(
1563 Opcode, VectorTy, Ctx.CostKind,
1564 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
1565 RHSInfo, Operands, CtxI, &Ctx.TLI);
1566 }
1567 case Instruction::Freeze: {
1568 // This opcode is unknown. Assume that it is the same as 'mul'.
1569 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1570 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy,
1571 Ctx.CostKind);
1572 }
1573 case Instruction::ICmp:
1574 case Instruction::FCmp: {
1575 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1576 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1577 return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
1578 Ctx.CostKind,
1579 {TTI::OK_AnyValue, TTI::OP_None},
1580 {TTI::OK_AnyValue, TTI::OP_None}, CtxI);
1581 }
1582 default:
1583 llvm_unreachable("Unsupported opcode for instruction");
1584 }
1585}
1586
1588 unsigned Opcode = getOpcode();
1589 // TODO: Support other opcodes
1590 if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1591 llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1592
1594
1595 assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1596 "VPWidenEVLRecipe should not be used for scalars");
1597
1598 VPValue *EVL = getEVL();
1599 Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1600 IRBuilderBase &BuilderIR = State.Builder;
1601 VectorBuilder Builder(BuilderIR);
1602 Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1603
1605 for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1606 VPValue *VPOp = getOperand(I);
1607 Ops.push_back(State.get(VPOp));
1608 }
1609
1610 Builder.setMask(Mask).setEVL(EVLArg);
1611 Value *VPInst =
1612 Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1613 // Currently vp-intrinsics only accept FMF flags.
1614 // TODO: Enable other flags when support is added.
1615 if (isa<FPMathOperator>(VPInst))
1616 setFlags(cast<Instruction>(VPInst));
1617
1618 State.set(this, VPInst);
1619 State.addMetadata(VPInst,
1620 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1621}
1622
1623#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1625 VPSlotTracker &SlotTracker) const {
1626 O << Indent << "WIDEN ";
1628 O << " = " << Instruction::getOpcodeName(Opcode);
1629 printFlags(O);
1631}
1632
1634 VPSlotTracker &SlotTracker) const {
1635 O << Indent << "WIDEN ";
1637 O << " = vp." << Instruction::getOpcodeName(getOpcode());
1638 printFlags(O);
1640}
1641#endif
1642
1645 auto &Builder = State.Builder;
1646 /// Vectorize casts.
1647 assert(State.VF.isVector() && "Not vectorizing?");
1648 Type *DestTy = VectorType::get(getResultType(), State.VF);
1649 VPValue *Op = getOperand(0);
1650 Value *A = State.get(Op);
1651 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1652 State.set(this, Cast);
1653 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1654 if (auto *CastOp = dyn_cast<Instruction>(Cast))
1655 setFlags(CastOp);
1656}
1657
1659 VPCostContext &Ctx) const {
1660 // TODO: In some cases, VPWidenCastRecipes are created but not considered in
1661 // the legacy cost model, including truncates/extends when evaluating a
1662 // reduction in a smaller type.
1663 if (!getUnderlyingValue())
1664 return 0;
1665 // Computes the CastContextHint from a recipes that may access memory.
1666 auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
1667 if (VF.isScalar())
1669 if (isa<VPInterleaveRecipe>(R))
1671 if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
1672 return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
1674 const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
1675 if (WidenMemoryRecipe == nullptr)
1677 if (!WidenMemoryRecipe->isConsecutive())
1679 if (WidenMemoryRecipe->isReverse())
1681 if (WidenMemoryRecipe->isMasked())
1684 };
1685
1686 VPValue *Operand = getOperand(0);
1688 // For Trunc/FPTrunc, get the context from the only user.
1689 if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1691 if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
1692 CCH = ComputeCCH(StoreRecipe);
1693 }
1694 // For Z/Sext, get the context from the operand.
1695 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1696 Opcode == Instruction::FPExt) {
1697 if (Operand->isLiveIn())
1699 else if (Operand->getDefiningRecipe())
1700 CCH = ComputeCCH(Operand->getDefiningRecipe());
1701 }
1702
1703 auto *SrcTy =
1704 cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
1705 auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
1706 // Arm TTI will use the underlying instruction to determine the cost.
1707 return Ctx.TTI.getCastInstrCost(
1708 Opcode, DestTy, SrcTy, CCH, Ctx.CostKind,
1709 dyn_cast_if_present<Instruction>(getUnderlyingValue()));
1710}
1711
1712#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1714 VPSlotTracker &SlotTracker) const {
1715 O << Indent << "WIDEN-CAST ";
1717 O << " = " << Instruction::getOpcodeName(Opcode);
1718 printFlags(O);
1720 O << " to " << *getResultType();
1721}
1722#endif
1723
1725 VPCostContext &Ctx) const {
1726 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
1727}
1728
1729/// This function adds
1730/// (0 * Step, 1 * Step, 2 * Step, ...)
1731/// to each vector element of Val.
1732/// \p Opcode is relevant for FP induction variable.
1733static Value *getStepVector(Value *Val, Value *Step,
1735 IRBuilderBase &Builder) {
1736 assert(VF.isVector() && "only vector VFs are supported");
1737
1738 // Create and check the types.
1739 auto *ValVTy = cast<VectorType>(Val->getType());
1740 ElementCount VLen = ValVTy->getElementCount();
1741
1742 Type *STy = Val->getType()->getScalarType();
1743 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1744 "Induction Step must be an integer or FP");
1745 assert(Step->getType() == STy && "Step has wrong type");
1746
1748
1749 // Create a vector of consecutive numbers from zero to VF.
1750 VectorType *InitVecValVTy = ValVTy;
1751 if (STy->isFloatingPointTy()) {
1752 Type *InitVecValSTy =
1754 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1755 }
1756 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1757
1758 if (STy->isIntegerTy()) {
1759 Step = Builder.CreateVectorSplat(VLen, Step);
1760 assert(Step->getType() == Val->getType() && "Invalid step vec");
1761 // FIXME: The newly created binary instructions should contain nsw/nuw
1762 // flags, which can be found from the original scalar operations.
1763 Step = Builder.CreateMul(InitVec, Step);
1764 return Builder.CreateAdd(Val, Step, "induction");
1765 }
1766
1767 // Floating point induction.
1768 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1769 "Binary Opcode should be specified for FP induction");
1770 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1771
1772 Step = Builder.CreateVectorSplat(VLen, Step);
1773 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1774 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1775}
1776
1777/// A helper function that returns an integer or floating-point constant with
1778/// value C.
1780 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1781 : ConstantFP::get(Ty, C);
1782}
1783
1785 assert(!State.Lane && "Int or FP induction being replicated.");
1786
1787 Value *Start = getStartValue()->getLiveInIRValue();
1789 TruncInst *Trunc = getTruncInst();
1790 IRBuilderBase &Builder = State.Builder;
1791 assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
1792 "Types must match");
1793 assert(State.VF.isVector() && "must have vector VF");
1794
1795 // The value from the original loop to which we are mapping the new induction
1796 // variable.
1797 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
1798
1799 // Fast-math-flags propagate from the original induction instruction.
1800 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1801 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1802 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1803
1804 // Now do the actual transformations, and start with fetching the step value.
1805 Value *Step = State.get(getStepValue(), VPLane(0));
1806
1807 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1808 "Expected either an induction phi-node or a truncate of it!");
1809
1810 // Construct the initial value of the vector IV in the vector loop preheader
1811 auto CurrIP = Builder.saveIP();
1812 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1813 Builder.SetInsertPoint(VectorPH->getTerminator());
1814 if (isa<TruncInst>(EntryVal)) {
1815 assert(Start->getType()->isIntegerTy() &&
1816 "Truncation requires an integer type");
1817 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1818 Step = Builder.CreateTrunc(Step, TruncType);
1819 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1820 }
1821
1822 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1823 Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
1824 State.VF, State.Builder);
1825
1826 // We create vector phi nodes for both integer and floating-point induction
1827 // variables. Here, we determine the kind of arithmetic we will perform.
1830 if (Step->getType()->isIntegerTy()) {
1831 AddOp = Instruction::Add;
1832 MulOp = Instruction::Mul;
1833 } else {
1834 AddOp = ID.getInductionOpcode();
1835 MulOp = Instruction::FMul;
1836 }
1837
1838 Value *SplatVF;
1839 if (VPValue *SplatVFOperand = getSplatVFValue()) {
1840 // The recipe has been unrolled. In that case, fetch the splat value for the
1841 // induction increment.
1842 SplatVF = State.get(SplatVFOperand);
1843 } else {
1844 // Multiply the vectorization factor by the step using integer or
1845 // floating-point arithmetic as appropriate.
1846 Type *StepType = Step->getType();
1847 Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
1848 if (Step->getType()->isFloatingPointTy())
1849 RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
1850 else
1851 RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
1852 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1853
1854 // Create a vector splat to use in the induction update.
1855 SplatVF = Builder.CreateVectorSplat(State.VF, Mul);
1856 }
1857
1858 Builder.restoreIP(CurrIP);
1859
1860 // We may need to add the step a number of times, depending on the unroll
1861 // factor. The last of those goes into the PHI.
1862 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1863 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1864 VecInd->setDebugLoc(getDebugLoc());
1865 State.set(this, VecInd);
1866
1867 Instruction *LastInduction = cast<Instruction>(
1868 Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
1869 if (isa<TruncInst>(EntryVal))
1870 State.addMetadata(LastInduction, EntryVal);
1871 LastInduction->setDebugLoc(getDebugLoc());
1872
1873 VecInd->addIncoming(SteppedStart, VectorPH);
1874 // Add induction update using an incorrect block temporarily. The phi node
1875 // will be fixed after VPlan execution. Note that at this point the latch
1876 // block cannot be used, as it does not exist yet.
1877 // TODO: Model increment value in VPlan, by turning the recipe into a
1878 // multi-def and a subclass of VPHeaderPHIRecipe.
1879 VecInd->addIncoming(LastInduction, VectorPH);
1880}
1881
1882#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1884 VPSlotTracker &SlotTracker) const {
1885 O << Indent;
1887 O << " = WIDEN-INDUCTION ";
1889
1890 if (auto *TI = getTruncInst())
1891 O << " (truncated to " << *TI->getType() << ")";
1892}
1893#endif
1894
1896 // The step may be defined by a recipe in the preheader (e.g. if it requires
1897 // SCEV expansion), but for the canonical induction the step is required to be
1898 // 1, which is represented as live-in.
1900 return false;
1901 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1902 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1903 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1904 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1905 getScalarType() == CanIV->getScalarType();
1906}
1907
1908#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1910 VPSlotTracker &SlotTracker) const {
1911 O << Indent;
1913 O << " = DERIVED-IV ";
1915 O << " + ";
1917 O << " * ";
1919}
1920#endif
1921
1923 // Fast-math-flags propagate from the original induction instruction.
1925 if (hasFastMathFlags())
1927
1928 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1929 /// variable on which to base the steps, \p Step is the size of the step.
1930
1931 Value *BaseIV = State.get(getOperand(0), VPLane(0));
1932 Value *Step = State.get(getStepValue(), VPLane(0));
1933 IRBuilderBase &Builder = State.Builder;
1934
1935 // Ensure step has the same type as that of scalar IV.
1936 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1937 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1938
1939 // We build scalar steps for both integer and floating-point induction
1940 // variables. Here, we determine the kind of arithmetic we will perform.
1943 if (BaseIVTy->isIntegerTy()) {
1944 AddOp = Instruction::Add;
1945 MulOp = Instruction::Mul;
1946 } else {
1947 AddOp = InductionOpcode;
1948 MulOp = Instruction::FMul;
1949 }
1950
1951 // Determine the number of scalars we need to generate for each unroll
1952 // iteration.
1953 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1954 // Compute the scalar steps and save the results in State.
1955 Type *IntStepTy =
1956 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1957 Type *VecIVTy = nullptr;
1958 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1959 if (!FirstLaneOnly && State.VF.isScalable()) {
1960 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1961 UnitStepVec =
1962 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1963 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1964 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1965 }
1966
1967 unsigned StartLane = 0;
1968 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1969 if (State.Lane) {
1970 StartLane = State.Lane->getKnownLane();
1971 EndLane = StartLane + 1;
1972 }
1973 Value *StartIdx0 =
1974 createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
1975
1976 if (!FirstLaneOnly && State.VF.isScalable()) {
1977 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1978 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1979 if (BaseIVTy->isFloatingPointTy())
1980 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1981 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1982 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1983 State.set(this, Add);
1984 // It's useful to record the lane values too for the known minimum number
1985 // of elements so we do those below. This improves the code quality when
1986 // trying to extract the first element, for example.
1987 }
1988
1989 if (BaseIVTy->isFloatingPointTy())
1990 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1991
1992 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1993 Value *StartIdx = Builder.CreateBinOp(
1994 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1995 // The step returned by `createStepForVF` is a runtime-evaluated value
1996 // when VF is scalable. Otherwise, it should be folded into a Constant.
1997 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1998 "Expected StartIdx to be folded to a constant when VF is not "
1999 "scalable");
2000 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
2001 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
2002 State.set(this, Add, VPLane(Lane));
2003 }
2004}
2005
2006#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2008 VPSlotTracker &SlotTracker) const {
2009 O << Indent;
2011 O << " = SCALAR-STEPS ";
2013}
2014#endif
2015
2017 assert(State.VF.isVector() && "not widening");
2018 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
2019 // Construct a vector GEP by widening the operands of the scalar GEP as
2020 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
2021 // results in a vector of pointers when at least one operand of the GEP
2022 // is vector-typed. Thus, to keep the representation compact, we only use
2023 // vector-typed operands for loop-varying values.
2024
2025 if (areAllOperandsInvariant()) {
2026 // If we are vectorizing, but the GEP has only loop-invariant operands,
2027 // the GEP we build (by only using vector-typed operands for
2028 // loop-varying values) would be a scalar pointer. Thus, to ensure we
2029 // produce a vector of pointers, we need to either arbitrarily pick an
2030 // operand to broadcast, or broadcast a clone of the original GEP.
2031 // Here, we broadcast a clone of the original.
2032 //
2033 // TODO: If at some point we decide to scalarize instructions having
2034 // loop-invariant operands, this special case will no longer be
2035 // required. We would add the scalarization decision to
2036 // collectLoopScalars() and teach getVectorValue() to broadcast
2037 // the lane-zero scalar value.
2039 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
2040 Ops.push_back(State.get(getOperand(I), VPLane(0)));
2041
2042 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
2043 ArrayRef(Ops).drop_front(), "",
2045 Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
2046 State.set(this, Splat);
2047 State.addMetadata(Splat, GEP);
2048 } else {
2049 // If the GEP has at least one loop-varying operand, we are sure to
2050 // produce a vector of pointers unless VF is scalar.
2051 // The pointer operand of the new GEP. If it's loop-invariant, we
2052 // won't broadcast it.
2053 auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
2054 : State.get(getOperand(0));
2055
2056 // Collect all the indices for the new GEP. If any index is
2057 // loop-invariant, we won't broadcast it.
2059 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
2060 VPValue *Operand = getOperand(I);
2061 if (isIndexLoopInvariant(I - 1))
2062 Indices.push_back(State.get(Operand, VPLane(0)));
2063 else
2064 Indices.push_back(State.get(Operand));
2065 }
2066
2067 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
2068 // but it should be a vector, otherwise.
2069 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
2070 Indices, "", getGEPNoWrapFlags());
2071 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
2072 "NewGEP is not a pointer vector");
2073 State.set(this, NewGEP);
2074 State.addMetadata(NewGEP, GEP);
2075 }
2076}
2077
2078#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2080 VPSlotTracker &SlotTracker) const {
2081 O << Indent << "WIDEN-GEP ";
2082 O << (isPointerLoopInvariant() ? "Inv" : "Var");
2083 for (size_t I = 0; I < getNumOperands() - 1; ++I)
2084 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
2085
2086 O << " ";
2088 O << " = getelementptr";
2089 printFlags(O);
2091}
2092#endif
2093
2094static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
2095 unsigned CurrentPart, IRBuilderBase &Builder) {
2096 // Use i32 for the gep index type when the value is constant,
2097 // or query DataLayout for a more suitable index type otherwise.
2098 const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2099 return IsScalable && (IsReverse || CurrentPart > 0)
2100 ? DL.getIndexType(Builder.getPtrTy(0))
2101 : Builder.getInt32Ty();
2102}
2103
2105 auto &Builder = State.Builder;
2107 unsigned CurrentPart = getUnrollPart(*this);
2108 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
2109 CurrentPart, Builder);
2110
2111 // The wide store needs to start at the last vector element.
2112 Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
2113 if (IndexTy != RunTimeVF->getType())
2114 RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
2115 // NumElt = -CurrentPart * RunTimeVF
2116 Value *NumElt = Builder.CreateMul(
2117 ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
2118 // LastLane = 1 - RunTimeVF
2119 Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
2120 Value *Ptr = State.get(getOperand(0), VPLane(0));
2121 Value *ResultPtr =
2122 Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
2123 ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
2125
2126 State.set(this, ResultPtr, /*IsScalar*/ true);
2127}
2128
2129#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2131 VPSlotTracker &SlotTracker) const {
2132 O << Indent;
2134 O << " = reverse-vector-pointer";
2135 printFlags(O);
2137}
2138#endif
2139
2141 auto &Builder = State.Builder;
2143 unsigned CurrentPart = getUnrollPart(*this);
2144 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2145 CurrentPart, Builder);
2146 Value *Ptr = State.get(getOperand(0), VPLane(0));
2147
2148 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2149 Value *ResultPtr =
2150 Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
2151
2152 State.set(this, ResultPtr, /*IsScalar*/ true);
2153}
2154
2155#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2157 VPSlotTracker &SlotTracker) const {
2158 O << Indent;
2160 O << " = vector-pointer ";
2161
2163}
2164#endif
2165
2167 assert(isNormalized() && "Expected blend to be normalized!");
2169 // We know that all PHIs in non-header blocks are converted into
2170 // selects, so we don't have to worry about the insertion order and we
2171 // can just use the builder.
2172 // At this point we generate the predication tree. There may be
2173 // duplications since this is a simple recursive scan, but future
2174 // optimizations will clean it up.
2175
2176 unsigned NumIncoming = getNumIncomingValues();
2177
2178 // Generate a sequence of selects of the form:
2179 // SELECT(Mask3, In3,
2180 // SELECT(Mask2, In2,
2181 // SELECT(Mask1, In1,
2182 // In0)))
2183 // Note that Mask0 is never used: lanes for which no path reaches this phi and
2184 // are essentially undef are taken from In0.
2185 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
2186 Value *Result = nullptr;
2187 for (unsigned In = 0; In < NumIncoming; ++In) {
2188 // We might have single edge PHIs (blocks) - use an identity
2189 // 'select' for the first PHI operand.
2190 Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
2191 if (In == 0)
2192 Result = In0; // Initialize with the first incoming value.
2193 else {
2194 // Select between the current value and the previous incoming edge
2195 // based on the incoming mask.
2196 Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
2197 Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
2198 }
2199 }
2200 State.set(this, Result, OnlyFirstLaneUsed);
2201}
2202
2204 VPCostContext &Ctx) const {
2205 // Handle cases where only the first lane is used the same way as the legacy
2206 // cost model.
2208 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
2209
2210 Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
2211 Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2212 return (getNumIncomingValues() - 1) *
2213 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2215}
2216
2217#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2219 VPSlotTracker &SlotTracker) const {
2220 O << Indent << "BLEND ";
2222 O << " =";
2223 if (getNumIncomingValues() == 1) {
2224 // Not a User of any mask: not really blending, this is a
2225 // single-predecessor phi.
2226 O << " ";
2228 } else {
2229 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
2230 O << " ";
2232 if (I == 0)
2233 continue;
2234 O << "/";
2236 }
2237 }
2238}
2239#endif
2240
2242 assert(!State.Lane && "Reduction being replicated.");
2243 Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2244 RecurKind Kind = RdxDesc.getRecurrenceKind();
2245 // Propagate the fast-math flags carried by the underlying instruction.
2247 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2249 Value *NewVecOp = State.get(getVecOp());
2250 if (VPValue *Cond = getCondOp()) {
2251 Value *NewCond = State.get(Cond, State.VF.isScalar());
2252 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
2253 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
2254
2255 Value *Start;
2257 Start = RdxDesc.getRecurrenceStartValue();
2258 else
2259 Start = llvm::getRecurrenceIdentity(Kind, ElementTy,
2260 RdxDesc.getFastMathFlags());
2261 if (State.VF.isVector())
2262 Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
2263
2264 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
2265 NewVecOp = Select;
2266 }
2267 Value *NewRed;
2268 Value *NextInChain;
2269 if (IsOrdered) {
2270 if (State.VF.isVector())
2271 NewRed =
2272 createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);
2273 else
2274 NewRed = State.Builder.CreateBinOp(
2275 (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
2276 PrevInChain = NewRed;
2277 NextInChain = NewRed;
2278 } else {
2279 PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2280 NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);
2282 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
2283 NewRed, PrevInChain);
2284 else
2285 NextInChain = State.Builder.CreateBinOp(
2286 (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
2287 }
2288 State.set(this, NextInChain, /*IsScalar*/ true);
2289}
2290
2292 assert(!State.Lane && "Reduction being replicated.");
2293
2294 auto &Builder = State.Builder;
2295 // Propagate the fast-math flags carried by the underlying instruction.
2296 IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
2298 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2299
2300 RecurKind Kind = RdxDesc.getRecurrenceKind();
2301 Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
2302 Value *VecOp = State.get(getVecOp());
2303 Value *EVL = State.get(getEVL(), VPLane(0));
2304
2305 VectorBuilder VBuilder(Builder);
2306 VBuilder.setEVL(EVL);
2307 Value *Mask;
2308 // TODO: move the all-true mask generation into VectorBuilder.
2309 if (VPValue *CondOp = getCondOp())
2310 Mask = State.get(CondOp);
2311 else
2312 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2313 VBuilder.setMask(Mask);
2314
2315 Value *NewRed;
2316 if (isOrdered()) {
2317 NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
2318 } else {
2319 NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc);
2321 NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
2322 else
2323 NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
2324 NewRed, Prev);
2325 }
2326 State.set(this, NewRed, /*IsScalar*/ true);
2327}
2328
2330 VPCostContext &Ctx) const {
2331 RecurKind RdxKind = RdxDesc.getRecurrenceKind();
2332 Type *ElementTy = Ctx.Types.inferScalarType(this);
2333 auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
2334 unsigned Opcode = RdxDesc.getOpcode();
2335
2336 // TODO: Support any-of and in-loop reductions.
2337 assert(
2339 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2340 "Any-of reduction not implemented in VPlan-based cost model currently.");
2341 assert(
2342 (!cast<VPReductionPHIRecipe>(getOperand(0))->isInLoop() ||
2343 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2344 "In-loop reduction not implemented in VPlan-based cost model currently.");
2345
2346 assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
2347 "Inferred type and recurrence type mismatch.");
2348
2349 // Cost = Reduction cost + BinOp cost
2351 Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind);
2354 return Cost + Ctx.TTI.getMinMaxReductionCost(
2355 Id, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);
2356 }
2357
2358 return Cost + Ctx.TTI.getArithmeticReductionCost(
2359 Opcode, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);
2360}
2361
2362#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2364 VPSlotTracker &SlotTracker) const {
2365 O << Indent << "REDUCE ";
2367 O << " = ";
2369 O << " +";
2370 if (isa<FPMathOperator>(getUnderlyingInstr()))
2372 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2374 if (isConditional()) {
2375 O << ", ";
2377 }
2378 O << ")";
2379 if (RdxDesc.IntermediateStore)
2380 O << " (with final reduction value stored in invariant address sank "
2381 "outside of loop)";
2382}
2383
2385 VPSlotTracker &SlotTracker) const {
2387 O << Indent << "REDUCE ";
2389 O << " = ";
2391 O << " +";
2392 if (isa<FPMathOperator>(getUnderlyingInstr()))
2394 O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2396 O << ", ";
2398 if (isConditional()) {
2399 O << ", ";
2401 }
2402 O << ")";
2403 if (RdxDesc.IntermediateStore)
2404 O << " (with final reduction value stored in invariant address sank "
2405 "outside of loop)";
2406}
2407#endif
2408
2410 // Find if the recipe is used by a widened recipe via an intervening
2411 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
2412 return any_of(users(), [](const VPUser *U) {
2413 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
2414 return any_of(PredR->users(), [PredR](const VPUser *U) {
2415 return !U->usesScalars(PredR);
2416 });
2417 return false;
2418 });
2419}
2420
2422 VPCostContext &Ctx) const {
2423 Instruction *UI = cast<Instruction>(getUnderlyingValue());
2424 // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
2425 // transform, avoid computing their cost multiple times for now.
2426 Ctx.SkipCostComputation.insert(UI);
2427 return Ctx.getLegacyCost(UI, VF);
2428}
2429
2430#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2432 VPSlotTracker &SlotTracker) const {
2433 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
2434
2435 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
2437 O << " = ";
2438 }
2439 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
2440 O << "call";
2441 printFlags(O);
2442 O << "@" << CB->getCalledFunction()->getName() << "(";
2444 O, [&O, &SlotTracker](VPValue *Op) {
2445 Op->printAsOperand(O, SlotTracker);
2446 });
2447 O << ")";
2448 } else {
2450 printFlags(O);
2452 }
2453
2454 if (shouldPack())
2455 O << " (S->V)";
2456}
2457#endif
2458
2459Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2462 "Codegen only implemented for first lane.");
2463 switch (Opcode) {
2464 case Instruction::SExt:
2465 case Instruction::ZExt:
2466 case Instruction::Trunc: {
2467 // Note: SExt/ZExt not used yet.
2468 Value *Op = State.get(getOperand(0), VPLane(0));
2469 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
2470 }
2471 default:
2472 llvm_unreachable("opcode not implemented yet");
2473 }
2474}
2475
2476void VPScalarCastRecipe ::execute(VPTransformState &State) {
2477 State.set(this, generate(State), VPLane(0));
2478}
2479
2480#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2481void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
2482 VPSlotTracker &SlotTracker) const {
2483 O << Indent << "SCALAR-CAST ";
2484 printAsOperand(O, SlotTracker);
2485 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
2486 printOperands(O, SlotTracker);
2487 O << " to " << *ResultTy;
2488}
2489#endif
2490
2492 assert(State.Lane && "Branch on Mask works only on single instance.");
2493
2494
2495 Value *ConditionBit = nullptr;
2496 VPValue *BlockInMask = getMask();
2497 if (BlockInMask)
2498 ConditionBit = State.get(BlockInMask, *State.Lane);
2499 else // Block in mask is all-one.
2500 ConditionBit = State.Builder.getTrue();
2501
2502 // Replace the temporary unreachable terminator with a new conditional branch,
2503 // whose two destinations will be set later when they are created.
2504 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2505 assert(isa<UnreachableInst>(CurrentTerminator) &&
2506 "Expected to replace unreachable terminator with conditional branch.");
2507 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
2508 CondBr->setSuccessor(0, nullptr);
2509 ReplaceInstWithInst(CurrentTerminator, CondBr);
2510}
2511
2513 VPCostContext &Ctx) const {
2514 // The legacy cost model doesn't assign costs to branches for individual
2515 // replicate regions. Match the current behavior in the VPlan cost model for
2516 // now.
2517 return 0;
2518}
2519
2522 assert(State.Lane && "Predicated instruction PHI works per instance.");
2523 Instruction *ScalarPredInst =
2524 cast<Instruction>(State.get(getOperand(0), *State.Lane));
2525 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
2526 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
2527 assert(PredicatingBB && "Predicated block has no single predecessor.");
2528 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
2529 "operand must be VPReplicateRecipe");
2530
2531 // By current pack/unpack logic we need to generate only a single phi node: if
2532 // a vector value for the predicated instruction exists at this point it means
2533 // the instruction has vector users only, and a phi for the vector value is
2534 // needed. In this case the recipe of the predicated instruction is marked to
2535 // also do that packing, thereby "hoisting" the insert-element sequence.
2536 // Otherwise, a phi node for the scalar value is needed.
2537 if (State.hasVectorValue(getOperand(0))) {
2538 Value *VectorValue = State.get(getOperand(0));
2539 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
2540 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
2541 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
2542 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
2543 if (State.hasVectorValue(this))
2544 State.reset(this, VPhi);
2545 else
2546 State.set(this, VPhi);
2547 // NOTE: Currently we need to update the value of the operand, so the next
2548 // predicated iteration inserts its generated value in the correct vector.
2549 State.reset(getOperand(0), VPhi);
2550 } else {
2551 if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())
2552 return;
2553
2554 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
2555 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
2556 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
2557 PredicatingBB);
2558 Phi->addIncoming(ScalarPredInst, PredicatedBB);
2559 if (State.hasScalarValue(this, *State.Lane))
2560 State.reset(this, Phi, *State.Lane);
2561 else
2562 State.set(this, Phi, *State.Lane);
2563 // NOTE: Currently we need to update the value of the operand, so the next
2564 // predicated iteration inserts its generated value in the correct vector.
2565 State.reset(getOperand(0), Phi, *State.Lane);
2566 }
2567}
2568
2569#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2571 VPSlotTracker &SlotTracker) const {
2572 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
2574 O << " = ";
2576}
2577#endif
2578
2580 VPCostContext &Ctx) const {
2582 const Align Alignment =
2584 unsigned AS =
2586
2587 if (!Consecutive) {
2588 // TODO: Using the original IR may not be accurate.
2589 // Currently, ARM will use the underlying IR to calculate gather/scatter
2590 // instruction cost.
2592 assert(!Reverse &&
2593 "Inconsecutive memory access should not have the order.");
2594 return Ctx.TTI.getAddressComputationCost(Ty) +
2596 IsMasked, Alignment, Ctx.CostKind,
2597 &Ingredient);
2598 }
2599
2601 if (IsMasked) {
2602 Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
2603 AS, Ctx.CostKind);
2604 } else {
2605 TTI::OperandValueInfo OpInfo =
2607 Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,
2608 Ctx.CostKind, OpInfo, &Ingredient);
2609 }
2610 if (!Reverse)
2611 return Cost;
2612
2613 return Cost +=
2615 cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
2616}
2617
2619 auto *LI = cast<LoadInst>(&Ingredient);
2620
2621 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2622 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2623 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2624 bool CreateGather = !isConsecutive();
2625
2626 auto &Builder = State.Builder;
2628 Value *Mask = nullptr;
2629 if (auto *VPMask = getMask()) {
2630 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2631 // of a null all-one mask is a null mask.
2632 Mask = State.get(VPMask);
2633 if (isReverse())
2634 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2635 }
2636
2637 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
2638 Value *NewLI;
2639 if (CreateGather) {
2640 NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
2641 "wide.masked.gather");
2642 } else if (Mask) {
2643 NewLI =
2644 Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2645 PoisonValue::get(DataTy), "wide.masked.load");
2646 } else {
2647 NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
2648 }
2649 // Add metadata to the load, but setVectorValue to the reverse shuffle.
2650 State.addMetadata(NewLI, LI);
2651 if (Reverse)
2652 NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
2653 State.set(this, NewLI);
2654}
2655
2656#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2658 VPSlotTracker &SlotTracker) const {
2659 O << Indent << "WIDEN ";
2661 O << " = load ";
2663}
2664#endif
2665
2666/// Use all-true mask for reverse rather than actual mask, as it avoids a
2667/// dependence w/o affecting the result.
2669 Value *EVL, const Twine &Name) {
2670 VectorType *ValTy = cast<VectorType>(Operand->getType());
2671 Value *AllTrueMask =
2672 Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
2673 return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
2674 {Operand, AllTrueMask, EVL}, nullptr, Name);
2675}
2676
2678 auto *LI = cast<LoadInst>(&Ingredient);
2679
2680 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2681 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2682 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2683 bool CreateGather = !isConsecutive();
2684
2685 auto &Builder = State.Builder;
2687 CallInst *NewLI;
2688 Value *EVL = State.get(getEVL(), VPLane(0));
2689 Value *Addr = State.get(getAddr(), !CreateGather);
2690 Value *Mask = nullptr;
2691 if (VPValue *VPMask = getMask()) {
2692 Mask = State.get(VPMask);
2693 if (isReverse())
2694 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2695 } else {
2696 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2697 }
2698
2699 if (CreateGather) {
2700 NewLI =
2701 Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
2702 nullptr, "wide.masked.gather");
2703 } else {
2704 VectorBuilder VBuilder(Builder);
2705 VBuilder.setEVL(EVL).setMask(Mask);
2706 NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
2707 Instruction::Load, DataTy, Addr, "vp.op.load"));
2708 }
2709 NewLI->addParamAttr(
2710 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2711 State.addMetadata(NewLI, LI);
2712 Instruction *Res = NewLI;
2713 if (isReverse())
2714 Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
2715 State.set(this, Res);
2716}
2717
2719 VPCostContext &Ctx) const {
2720 if (!Consecutive || IsMasked)
2721 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2722
2723 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2724 // here because the EVL recipes using EVL to replace the tail mask. But in the
2725 // legacy model, it will always calculate the cost of mask.
2726 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2727 // don't need to compare to the legacy cost model.
2729 const Align Alignment =
2731 unsigned AS =
2734 Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);
2735 if (!Reverse)
2736 return Cost;
2737
2739 cast<VectorType>(Ty), {}, Ctx.CostKind,
2740 0);
2741}
2742
2743#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2745 VPSlotTracker &SlotTracker) const {
2746 O << Indent << "WIDEN ";
2748 O << " = vp.load ";
2750}
2751#endif
2752
2754 auto *SI = cast<StoreInst>(&Ingredient);
2755
2756 VPValue *StoredVPValue = getStoredValue();
2757 bool CreateScatter = !isConsecutive();
2758 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2759
2760 auto &Builder = State.Builder;
2762
2763 Value *Mask = nullptr;
2764 if (auto *VPMask = getMask()) {
2765 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2766 // of a null all-one mask is a null mask.
2767 Mask = State.get(VPMask);
2768 if (isReverse())
2769 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2770 }
2771
2772 Value *StoredVal = State.get(StoredVPValue);
2773 if (isReverse()) {
2774 // If we store to reverse consecutive memory locations, then we need
2775 // to reverse the order of elements in the stored value.
2776 StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
2777 // We don't want to update the value in the map as it might be used in
2778 // another expression. So don't call resetVectorValue(StoredVal).
2779 }
2780 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
2781 Instruction *NewSI = nullptr;
2782 if (CreateScatter)
2783 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
2784 else if (Mask)
2785 NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2786 else
2787 NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2788 State.addMetadata(NewSI, SI);
2789}
2790
2791#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2793 VPSlotTracker &SlotTracker) const {
2794 O << Indent << "WIDEN store ";
2796}
2797#endif
2798
2800 auto *SI = cast<StoreInst>(&Ingredient);
2801
2802 VPValue *StoredValue = getStoredValue();
2803 bool CreateScatter = !isConsecutive();
2804 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2805
2806 auto &Builder = State.Builder;
2808
2809 CallInst *NewSI = nullptr;
2810 Value *StoredVal = State.get(StoredValue);
2811 Value *EVL = State.get(getEVL(), VPLane(0));
2812 if (isReverse())
2813 StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
2814 Value *Mask = nullptr;
2815 if (VPValue *VPMask = getMask()) {
2816 Mask = State.get(VPMask);
2817 if (isReverse())
2818 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2819 } else {
2820 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2821 }
2822 Value *Addr = State.get(getAddr(), !CreateScatter);
2823 if (CreateScatter) {
2824 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
2825 Intrinsic::vp_scatter,
2826 {StoredVal, Addr, Mask, EVL});
2827 } else {
2828 VectorBuilder VBuilder(Builder);
2829 VBuilder.setEVL(EVL).setMask(Mask);
2830 NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2831 Instruction::Store, Type::getVoidTy(EVL->getContext()),
2832 {StoredVal, Addr}));
2833 }
2834 NewSI->addParamAttr(
2835 1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2836 State.addMetadata(NewSI, SI);
2837}
2838
2840 VPCostContext &Ctx) const {
2841 if (!Consecutive || IsMasked)
2842 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2843
2844 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2845 // here because the EVL recipes using EVL to replace the tail mask. But in the
2846 // legacy model, it will always calculate the cost of mask.
2847 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2848 // don't need to compare to the legacy cost model.
2850 const Align Alignment =
2852 unsigned AS =
2855 Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);
2856 if (!Reverse)
2857 return Cost;
2858
2860 cast<VectorType>(Ty), {}, Ctx.CostKind,
2861 0);
2862}
2863
2864#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2866 VPSlotTracker &SlotTracker) const {
2867 O << Indent << "WIDEN vp.store ";
2869}
2870#endif
2871
2873 VectorType *DstVTy, const DataLayout &DL) {
2874 // Verify that V is a vector type with same number of elements as DstVTy.
2875 auto VF = DstVTy->getElementCount();
2876 auto *SrcVecTy = cast<VectorType>(V->getType());
2877 assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
2878 Type *SrcElemTy = SrcVecTy->getElementType();
2879 Type *DstElemTy = DstVTy->getElementType();
2880 assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
2881 "Vector elements must have same size");
2882
2883 // Do a direct cast if element types are castable.
2884 if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
2885 return Builder.CreateBitOrPointerCast(V, DstVTy);
2886 }
2887 // V cannot be directly casted to desired vector type.
2888 // May happen when V is a floating point vector but DstVTy is a vector of
2889 // pointers or vice-versa. Handle this using a two-step bitcast using an
2890 // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
2891 assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
2892 "Only one type should be a pointer type");
2893 assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
2894 "Only one type should be a floating point type");
2895 Type *IntTy =
2896 IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
2897 auto *VecIntTy = VectorType::get(IntTy, VF);
2898 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
2899 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
2900}
2901
2902/// Return a vector containing interleaved elements from multiple
2903/// smaller input vectors.
2905 const Twine &Name) {
2906 unsigned Factor = Vals.size();
2907 assert(Factor > 1 && "Tried to interleave invalid number of vectors");
2908
2909 VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
2910#ifndef NDEBUG
2911 for (Value *Val : Vals)
2912 assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
2913#endif
2914
2915 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2916 // must use intrinsics to interleave.
2917 if (VecTy->isScalableTy()) {
2918 assert(isPowerOf2_32(Factor) && "Unsupported interleave factor for "
2919 "scalable vectors, must be power of 2");
2920 SmallVector<Value *> InterleavingValues(Vals);
2921 // When interleaving, the number of values will be shrunk until we have the
2922 // single final interleaved value.
2923 auto *InterleaveTy = cast<VectorType>(InterleavingValues[0]->getType());
2924 for (unsigned Midpoint = Factor / 2; Midpoint > 0; Midpoint /= 2) {
2925 InterleaveTy = VectorType::getDoubleElementsVectorType(InterleaveTy);
2926 for (unsigned I = 0; I < Midpoint; ++I)
2927 InterleavingValues[I] = Builder.CreateIntrinsic(
2928 InterleaveTy, Intrinsic::vector_interleave2,
2929 {InterleavingValues[I], InterleavingValues[Midpoint + I]},
2930 /*FMFSource=*/nullptr, Name);
2931 }
2932 return InterleavingValues[0];
2933 }
2934
2935 // Fixed length. Start by concatenating all vectors into a wide vector.
2936 Value *WideVec = concatenateVectors(Builder, Vals);
2937
2938 // Interleave the elements into the wide vector.
2939 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
2940 return Builder.CreateShuffleVector(
2941 WideVec, createInterleaveMask(NumElts, Factor), Name);
2942}
2943
2944// Try to vectorize the interleave group that \p Instr belongs to.
2945//
2946// E.g. Translate following interleaved load group (factor = 3):
2947// for (i = 0; i < N; i+=3) {
2948// R = Pic[i]; // Member of index 0
2949// G = Pic[i+1]; // Member of index 1
2950// B = Pic[i+2]; // Member of index 2
2951// ... // do something to R, G, B
2952// }
2953// To:
2954// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
2955// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
2956// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
2957// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
2958//
2959// Or translate following interleaved store group (factor = 3):
2960// for (i = 0; i < N; i+=3) {
2961// ... do something to R, G, B
2962// Pic[i] = R; // Member of index 0
2963// Pic[i+1] = G; // Member of index 1
2964// Pic[i+2] = B; // Member of index 2
2965// }
2966// To:
2967// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
2968// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
2969// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
2970// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
2971// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
2973 assert(!State.Lane && "Interleave group being replicated.");
2974 const InterleaveGroup<Instruction> *Group = IG;
2975 Instruction *Instr = Group->getInsertPos();
2976
2977 // Prepare for the vector type of the interleaved load/store.
2978 Type *ScalarTy = getLoadStoreType(Instr);
2979 unsigned InterleaveFactor = Group->getFactor();
2980 auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
2981
2982 // TODO: extend the masked interleaved-group support to reversed access.
2983 VPValue *BlockInMask = getMask();
2984 assert((!BlockInMask || !Group->isReverse()) &&
2985 "Reversed masked interleave-group not supported.");
2986
2987 VPValue *Addr = getAddr();
2988 Value *ResAddr = State.get(Addr, VPLane(0));
2989 if (auto *I = dyn_cast<Instruction>(ResAddr))
2990 State.setDebugLocFrom(I->getDebugLoc());
2991
2992 // If the group is reverse, adjust the index to refer to the last vector lane
2993 // instead of the first. We adjust the index from the first vector lane,
2994 // rather than directly getting the pointer for lane VF - 1, because the
2995 // pointer operand of the interleaved access is supposed to be uniform.
2996 if (Group->isReverse()) {
2997 Value *RuntimeVF =
2998 getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2999 Value *Index =
3000 State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
3001 Index = State.Builder.CreateMul(Index,
3002 State.Builder.getInt32(Group->getFactor()));
3003 Index = State.Builder.CreateNeg(Index);
3004
3005 bool InBounds = false;
3006 if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
3007 InBounds = Gep->isInBounds();
3008 ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
3009 }
3010
3011 State.setDebugLocFrom(Instr->getDebugLoc());
3012 Value *PoisonVec = PoisonValue::get(VecTy);
3013
3014 auto CreateGroupMask = [&BlockInMask, &State,
3015 &InterleaveFactor](Value *MaskForGaps) -> Value * {
3016 if (State.VF.isScalable()) {
3017 assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
3018 assert(isPowerOf2_32(InterleaveFactor) &&
3019 "Unsupported deinterleave factor for scalable vectors");
3020 auto *ResBlockInMask = State.get(BlockInMask);
3021 SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
3022 return interleaveVectors(State.Builder, Ops, "interleaved.mask");
3023 }
3024
3025 if (!BlockInMask)
3026 return MaskForGaps;
3027
3028 Value *ResBlockInMask = State.get(BlockInMask);
3029 Value *ShuffledMask = State.Builder.CreateShuffleVector(
3030 ResBlockInMask,
3031 createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
3032 "interleaved.mask");
3033 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
3034 ShuffledMask, MaskForGaps)
3035 : ShuffledMask;
3036 };
3037
3038 const DataLayout &DL = Instr->getDataLayout();
3039 // Vectorize the interleaved load group.
3040 if (isa<LoadInst>(Instr)) {
3041 Value *MaskForGaps = nullptr;
3042 if (NeedsMaskForGaps) {
3043 MaskForGaps = createBitMaskForGaps(State.Builder,
3044 State.VF.getKnownMinValue(), *Group);
3045 assert(MaskForGaps && "Mask for Gaps is required but it is null");
3046 }
3047
3048 Instruction *NewLoad;
3049 if (BlockInMask || MaskForGaps) {
3050 Value *GroupMask = CreateGroupMask(MaskForGaps);
3051 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
3052 Group->getAlign(), GroupMask,
3053 PoisonVec, "wide.masked.vec");
3054 } else
3055 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
3056 Group->getAlign(), "wide.vec");
3057 Group->addMetadata(NewLoad);
3058
3060 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3061 if (VecTy->isScalableTy()) {
3062 assert(isPowerOf2_32(InterleaveFactor) &&
3063 "Unsupported deinterleave factor for scalable vectors");
3064
3065 // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3066 // so must use intrinsics to deinterleave.
3067 SmallVector<Value *> DeinterleavedValues(InterleaveFactor);
3068 DeinterleavedValues[0] = NewLoad;
3069 // For the case of InterleaveFactor > 2, we will have to do recursive
3070 // deinterleaving, because the current available deinterleave intrinsic
3071 // supports only Factor of 2, otherwise it will bailout after first
3072 // iteration.
3073 // When deinterleaving, the number of values will double until we
3074 // have "InterleaveFactor".
3075 for (unsigned NumVectors = 1; NumVectors < InterleaveFactor;
3076 NumVectors *= 2) {
3077 // Deinterleave the elements within the vector
3078 SmallVector<Value *> TempDeinterleavedValues(NumVectors);
3079 for (unsigned I = 0; I < NumVectors; ++I) {
3080 auto *DiTy = DeinterleavedValues[I]->getType();
3081 TempDeinterleavedValues[I] = State.Builder.CreateIntrinsic(
3082 Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
3083 /*FMFSource=*/nullptr, "strided.vec");
3084 }
3085 // Extract the deinterleaved values:
3086 for (unsigned I = 0; I < 2; ++I)
3087 for (unsigned J = 0; J < NumVectors; ++J)
3088 DeinterleavedValues[NumVectors * I + J] =
3089 State.Builder.CreateExtractValue(TempDeinterleavedValues[J], I);
3090 }
3091
3092#ifndef NDEBUG
3093 for (Value *Val : DeinterleavedValues)
3094 assert(Val && "NULL Deinterleaved Value");
3095#endif
3096 for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
3097 Instruction *Member = Group->getMember(I);
3098 Value *StridedVec = DeinterleavedValues[I];
3099 if (!Member) {
3100 // This value is not needed as it's not used
3101 cast<Instruction>(StridedVec)->eraseFromParent();
3102 continue;
3103 }
3104 // If this member has different type, cast the result type.
3105 if (Member->getType() != ScalarTy) {
3106 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3107 StridedVec =
3108 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3109 }
3110
3111 if (Group->isReverse())
3112 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3113
3114 State.set(VPDefs[J], StridedVec);
3115 ++J;
3116 }
3117
3118 return;
3119 }
3120
3121 // For each member in the group, shuffle out the appropriate data from the
3122 // wide loads.
3123 unsigned J = 0;
3124 for (unsigned I = 0; I < InterleaveFactor; ++I) {
3125 Instruction *Member = Group->getMember(I);
3126
3127 // Skip the gaps in the group.
3128 if (!Member)
3129 continue;
3130
3131 auto StrideMask =
3132 createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
3133 Value *StridedVec =
3134 State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
3135
3136 // If this member has different type, cast the result type.
3137 if (Member->getType() != ScalarTy) {
3138 assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
3139 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3140 StridedVec =
3141 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3142 }
3143
3144 if (Group->isReverse())
3145 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3146
3147 State.set(VPDefs[J], StridedVec);
3148 ++J;
3149 }
3150 return;
3151 }
3152
3153 // The sub vector type for current instruction.
3154 auto *SubVT = VectorType::get(ScalarTy, State.VF);
3155
3156 // Vectorize the interleaved store group.
3157 Value *MaskForGaps =
3158 createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
3159 assert((!MaskForGaps || !State.VF.isScalable()) &&
3160 "masking gaps for scalable vectors is not yet supported.");
3161 ArrayRef<VPValue *> StoredValues = getStoredValues();
3162 // Collect the stored vector from each member.
3163 SmallVector<Value *, 4> StoredVecs;
3164 unsigned StoredIdx = 0;
3165 for (unsigned i = 0; i < InterleaveFactor; i++) {
3166 assert((Group->getMember(i) || MaskForGaps) &&
3167 "Fail to get a member from an interleaved store group");
3168 Instruction *Member = Group->getMember(i);
3169
3170 // Skip the gaps in the group.
3171 if (!Member) {
3172 Value *Undef = PoisonValue::get(SubVT);
3173 StoredVecs.push_back(Undef);
3174 continue;
3175 }
3176
3177 Value *StoredVec = State.get(StoredValues[StoredIdx]);
3178 ++StoredIdx;
3179
3180 if (Group->isReverse())
3181 StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
3182
3183 // If this member has different type, cast it to a unified type.
3184
3185 if (StoredVec->getType() != SubVT)
3186 StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3187
3188 StoredVecs.push_back(StoredVec);
3189 }
3190
3191 // Interleave all the smaller vectors into one wider vector.
3192 Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
3193 Instruction *NewStoreInstr;
3194 if (BlockInMask || MaskForGaps) {
3195 Value *GroupMask = CreateGroupMask(MaskForGaps);
3196 NewStoreInstr = State.Builder.CreateMaskedStore(
3197 IVec, ResAddr, Group->getAlign(), GroupMask);
3198 } else
3199 NewStoreInstr =
3200 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
3201
3202 Group->addMetadata(NewStoreInstr);
3203}
3204
3205#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3207 VPSlotTracker &SlotTracker) const {
3208 O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
3209 IG->getInsertPos()->printAsOperand(O, false);
3210 O << ", ";
3212 VPValue *Mask = getMask();
3213 if (Mask) {
3214 O << ", ";
3215 Mask->printAsOperand(O, SlotTracker);
3216 }
3217
3218 unsigned OpIdx = 0;
3219 for (unsigned i = 0; i < IG->getFactor(); ++i) {
3220 if (!IG->getMember(i))
3221 continue;
3222 if (getNumStoreOperands() > 0) {
3223 O << "\n" << Indent << " store ";
3224 getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
3225 O << " to index " << i;
3226 } else {
3227 O << "\n" << Indent << " ";
3229 O << " = load from index " << i;
3230 }
3231 ++OpIdx;
3232 }
3233}
3234#endif
3235
3237 VPCostContext &Ctx) const {
3238 Instruction *InsertPos = getInsertPos();
3239 // Find the VPValue index of the interleave group. We need to skip gaps.
3240 unsigned InsertPosIdx = 0;
3241 for (unsigned Idx = 0; IG->getFactor(); ++Idx)
3242 if (auto *Member = IG->getMember(Idx)) {
3243 if (Member == InsertPos)
3244 break;
3245 InsertPosIdx++;
3246 }
3247 Type *ValTy = Ctx.Types.inferScalarType(
3248 getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
3249 : getStoredValues()[InsertPosIdx]);
3250 auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
3251 unsigned AS = getLoadStoreAddressSpace(InsertPos);
3252
3253 unsigned InterleaveFactor = IG->getFactor();
3254 auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
3255
3256 // Holds the indices of existing members in the interleaved group.
3258 for (unsigned IF = 0; IF < InterleaveFactor; IF++)
3259 if (IG->getMember(IF))
3260 Indices.push_back(IF);
3261
3262 // Calculate the cost of the whole interleaved group.
3264 InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
3265 IG->getAlign(), AS, Ctx.CostKind, getMask(), NeedsMaskForGaps);
3266
3267 if (!IG->isReverse())
3268 return Cost;
3269
3270 return Cost + IG->getNumMembers() *
3272 VectorTy, std::nullopt, Ctx.CostKind,
3273 0);
3274}
3275
3276#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3278 VPSlotTracker &SlotTracker) const {
3279 O << Indent << "EMIT ";
3281 O << " = CANONICAL-INDUCTION ";
3283}
3284#endif
3285
3287 return IsScalarAfterVectorization &&
3288 (!IsScalable || vputils::onlyFirstLaneUsed(this));
3289}
3290
3292 assert(getInductionDescriptor().getKind() ==
3294 "Not a pointer induction according to InductionDescriptor!");
3295 assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
3296 "Unexpected type.");
3298 "Recipe should have been replaced");
3299
3300 unsigned CurrentPart = getUnrollPart(*this);
3301
3302 // Build a pointer phi
3303 Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
3304 Type *ScStValueType = ScalarStartValue->getType();
3305
3306 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3307 PHINode *NewPointerPhi = nullptr;
3308 if (CurrentPart == 0) {
3309 auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
3310 ->getPlan()
3311 ->getVectorLoopRegion()
3312 ->getEntryBasicBlock()
3313 ->front());
3314 PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
3315 NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
3316 CanonicalIV->getIterator());
3317 NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
3318 NewPointerPhi->setDebugLoc(getDebugLoc());
3319 } else {
3320 // The recipe has been unrolled. In that case, fetch the single pointer phi
3321 // shared among all unrolled parts of the recipe.
3322 auto *GEP =
3323 cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
3324 NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
3325 }
3326
3327 // A pointer induction, performed by using a gep
3328 BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
3329 Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
3330 Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3331 Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3332 // Add induction update using an incorrect block temporarily. The phi node
3333 // will be fixed after VPlan execution. Note that at this point the latch
3334 // block cannot be used, as it does not exist yet.
3335 // TODO: Model increment value in VPlan, by turning the recipe into a
3336 // multi-def and a subclass of VPHeaderPHIRecipe.
3337 if (CurrentPart == 0) {
3338 // The recipe represents the first part of the pointer induction. Create the
3339 // GEP to increment the phi across all unrolled parts.
3340 unsigned UF = CurrentPart == 0 ? getParent()->getPlan()->getUF() : 1;
3341 Value *NumUnrolledElems =
3342 State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, UF));
3343
3344 Value *InductionGEP = GetElementPtrInst::Create(
3345 State.Builder.getInt8Ty(), NewPointerPhi,
3346 State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
3347 InductionLoc);
3348
3349 NewPointerPhi->addIncoming(InductionGEP, VectorPH);
3350 }
3351
3352 // Create actual address geps that use the pointer phi as base and a
3353 // vectorized version of the step value (<step*0, ..., step*N>) as offset.
3354 Type *VecPhiType = VectorType::get(PhiType, State.VF);
3355 Value *StartOffsetScalar = State.Builder.CreateMul(
3356 RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
3357 Value *StartOffset =
3358 State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
3359 // Create a vector of consecutive numbers from zero to VF.
3360 StartOffset = State.Builder.CreateAdd(
3361 StartOffset, State.Builder.CreateStepVector(VecPhiType));
3362
3363 assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
3364 "scalar step must be the same across all parts");
3365 Value *GEP = State.Builder.CreateGEP(
3366 State.Builder.getInt8Ty(), NewPointerPhi,
3367 State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
3368 State.VF, ScalarStepValue)),
3369 "vector.gep");
3370 State.set(this, GEP);
3371}
3372
3373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3375 VPSlotTracker &SlotTracker) const {
3376 assert((getNumOperands() == 2 || getNumOperands() == 4) &&
3377 "unexpected number of operands");
3378 O << Indent << "EMIT ";
3380 O << " = WIDEN-POINTER-INDUCTION ";
3382 O << ", ";
3384 if (getNumOperands() == 4) {
3385 O << ", ";
3387 O << ", ";
3389 }
3390}
3391#endif
3392
3394 assert(!State.Lane && "cannot be used in per-lane");
3395 if (State.ExpandedSCEVs.contains(Expr)) {
3396 // SCEV Expr has already been expanded, result must already be set. At the
3397 // moment we have to execute the entry block twice (once before skeleton
3398 // creation to get expanded SCEVs used by the skeleton and once during
3399 // regular VPlan execution).
3401 assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
3402 "Results must match");
3403 return;
3404 }
3405
3406 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3407 SCEVExpander Exp(SE, DL, "induction", /*PreserveLCSSA=*/true);
3408
3409 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
3410 &*State.Builder.GetInsertPoint());
3411 State.ExpandedSCEVs[Expr] = Res;
3412 State.set(this, Res, VPLane(0));
3413}
3414
3415#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3417 VPSlotTracker &SlotTracker) const {
3418 O << Indent << "EMIT ";
3420 O << " = EXPAND SCEV " << *Expr;
3421}
3422#endif
3423
3425 Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);
3426 Type *STy = CanonicalIV->getType();
3427 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
3428 ElementCount VF = State.VF;
3429 Value *VStart = VF.isScalar()
3430 ? CanonicalIV
3431 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
3432 Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
3433 if (VF.isVector()) {
3434 VStep = Builder.CreateVectorSplat(VF, VStep);
3435 VStep =
3436 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
3437 }
3438 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
3439 State.set(this, CanonicalVectorIV);
3440}
3441
3442#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3444 VPSlotTracker &SlotTracker) const {
3445 O << Indent << "EMIT ";
3447 O << " = WIDEN-CANONICAL-INDUCTION ";
3449}
3450#endif
3451
3453 auto &Builder = State.Builder;
3454 // Create a vector from the initial value.
3455 auto *VectorInit = getStartValue()->getLiveInIRValue();
3456
3457 Type *VecTy = State.VF.isScalar()
3458 ? VectorInit->getType()
3459 : VectorType::get(VectorInit->getType(), State.VF);
3460
3461 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3462 if (State.VF.isVector()) {
3463 auto *IdxTy = Builder.getInt32Ty();
3464 auto *One = ConstantInt::get(IdxTy, 1);
3465 IRBuilder<>::InsertPointGuard Guard(Builder);
3466 Builder.SetInsertPoint(VectorPH->getTerminator());
3467 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
3468 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
3469 VectorInit = Builder.CreateInsertElement(
3470 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
3471 }
3472
3473 // Create a phi node for the new recurrence.
3474 PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
3475 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
3476 Phi->addIncoming(VectorInit, VectorPH);
3477 State.set(this, Phi);
3478}
3479
3482 VPCostContext &Ctx) const {
3483 if (VF.isScalar())
3484 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
3485
3486 if (VF.isScalable() && VF.getKnownMinValue() == 1)
3488
3490 std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
3491 Type *VectorTy =
3492 toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
3493
3495 cast<VectorType>(VectorTy), Mask, Ctx.CostKind,
3496 VF.getKnownMinValue() - 1);
3497}
3498
3499#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3501 VPSlotTracker &SlotTracker) const {
3502 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
3504 O << " = phi ";
3506}
3507#endif
3508
3510 auto &Builder = State.Builder;
3511
3512 // If this phi is fed by a scaled reduction then it should output a
3513 // vector with fewer elements than the VF.
3514 ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor);
3515
3516 // Reductions do not have to start at zero. They can start with
3517 // any loop invariant values.
3518 VPValue *StartVPV = getStartValue();
3519 Value *StartV = StartVPV->getLiveInIRValue();
3520
3521 // In order to support recurrences we need to be able to vectorize Phi nodes.
3522 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
3523 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
3524 // this value when we vectorize all of the instructions that use the PHI.
3525 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
3526 Type *VecTy =
3527 ScalarPHI ? StartV->getType() : VectorType::get(StartV->getType(), VF);
3528
3529 BasicBlock *HeaderBB = State.CFG.PrevBB;
3530 assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
3531 "recipe must be in the vector loop header");
3532 auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
3533 Phi->insertBefore(HeaderBB->getFirstInsertionPt());
3534 State.set(this, Phi, IsInLoop);
3535
3536 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3537
3538 Value *Iden = nullptr;
3539 RecurKind RK = RdxDesc.getRecurrenceKind();
3540 unsigned CurrentPart = getUnrollPart(*this);
3541
3544 // MinMax and AnyOf reductions have the start value as their identity.
3545 if (ScalarPHI) {
3546 Iden = StartV;
3547 } else {
3548 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3549 Builder.SetInsertPoint(VectorPH->getTerminator());
3550 StartV = Iden = State.get(StartVPV);
3551 }
3553 // [I|F]FindLastIV will use a sentinel value to initialize the reduction
3554 // phi or the resume value from the main vector loop when vectorizing the
3555 // epilogue loop. In the exit block, ComputeReductionResult will generate
3556 // checks to verify if the reduction result is the sentinel value. If the
3557 // result is the sentinel value, it will be corrected back to the start
3558 // value.
3559 // TODO: The sentinel value is not always necessary. When the start value is
3560 // a constant, and smaller than the start value of the induction variable,
3561 // the start value can be directly used to initialize the reduction phi.
3562 Iden = StartV;
3563 if (!ScalarPHI) {
3564 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3565 Builder.SetInsertPoint(VectorPH->getTerminator());
3566 StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
3567 }
3568 } else {
3569 Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),
3570 RdxDesc.getFastMathFlags());
3571
3572 if (!ScalarPHI) {
3573 if (CurrentPart == 0) {
3574 // Create start and identity vector values for the reduction in the
3575 // preheader.
3576 // TODO: Introduce recipes in VPlan preheader to create initial values.
3577 Iden = Builder.CreateVectorSplat(VF, Iden);
3578 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3579 Builder.SetInsertPoint(VectorPH->getTerminator());
3580 Constant *Zero = Builder.getInt32(0);
3581 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
3582 } else {
3583 Iden = Builder.CreateVectorSplat(VF, Iden);
3584 }
3585 }
3586 }
3587
3588 Phi = cast<PHINode>(State.get(this, IsInLoop));
3589 Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
3590 Phi->addIncoming(StartVal, VectorPH);
3591}
3592
3593#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3595 VPSlotTracker &SlotTracker) const {
3596 O << Indent << "WIDEN-REDUCTION-PHI ";
3597
3599 O << " = phi ";
3601 if (VFScaleFactor != 1)
3602 O << " (VF scaled by 1/" << VFScaleFactor << ")";
3603}
3604#endif
3605
3608 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
3609
3611 Value *Op0 = State.get(getOperand(0));
3612 Type *VecTy = Op0->getType();
3613 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
3614 State.set(this, VecPhi);
3615}
3616
3617#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3619 VPSlotTracker &SlotTracker) const {
3620 O << Indent << "WIDEN-PHI ";
3621
3622 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
3623 // Unless all incoming values are modeled in VPlan print the original PHI
3624 // directly.
3625 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
3626 // values as VPValues.
3627 if (getNumOperands() != OriginalPhi->getNumOperands()) {
3628 O << VPlanIngredient(OriginalPhi);
3629 return;
3630 }
3631
3633 O << " = phi ";
3635}
3636#endif
3637
3638// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3639// remove VPActiveLaneMaskPHIRecipe.
3641 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3642 Value *StartMask = State.get(getOperand(0));
3643 PHINode *Phi =
3644 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
3645 Phi->addIncoming(StartMask, VectorPH);
3646 Phi->setDebugLoc(getDebugLoc());
3647 State.set(this, Phi);
3648}
3649
3650#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3652 VPSlotTracker &SlotTracker) const {
3653 O << Indent << "ACTIVE-LANE-MASK-PHI ";
3654
3656 O << " = phi ";
3658}
3659#endif
3660
3661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3663 VPSlotTracker &SlotTracker) const {
3664 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
3665
3667 O << " = phi ";
3669}
3670#endif
3671
3673 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3674 Value *Start = State.get(getStartValue(), VPLane(0));
3675 PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
3676 Phi->addIncoming(Start, VectorPH);
3677 Phi->setDebugLoc(getDebugLoc());
3678 State.set(this, Phi, /*IsScalar=*/true);
3679}
3680
3681#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3683 VPSlotTracker &SlotTracker) const {
3684 O << Indent << "SCALAR-PHI ";
3686 O << " = phi ";
3688}
3689#endif
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
Hexagon Common GEP
This file provides a LoopVectorizationPlanner class.
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This file contains the declarations of different VPlan-related auxiliary helpers.
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
cl::opt< unsigned > ForceTargetInstructionCost
static Value * getStepVector(Value *Val, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (0 * Step, 1 * Step, 2 * Step, ...) to each vector element of Val.
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, unsigned CurrentPart, IRBuilderBase &Builder)
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:437
InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:381
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:481
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:240
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1499
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:271
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
bool willReturn() const
Determine if the function will return.
Definition: Function.h:668
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:601
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool hasNoUnsignedSignedWrap() const
bool hasNoUnsignedWrap() const
bool isInBounds() const
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:480
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2511
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2106
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2499
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1815
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2051
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1135
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1163
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:485
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1053
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:194
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2574
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1987
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2186
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2093
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:330
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition: IRBuilder.cpp:1119
Value * CreateFCmpFMF(CmpInst::Predicate P, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2398
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1874
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1733
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:424
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:296
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2234
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2404
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2435
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2270
Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")
Create a call to llvm.experimental_cttz_elts.
Definition: IRBuilder.h:1101
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1164
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:968
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2533
LLVMContext & getContext() const
Definition: IRBuilder.h:195
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1370
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:588
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1671
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1688
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:308
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1834
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2380
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1614
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:108
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1404
CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
Definition: IRBuilder.cpp:627
CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
Definition: IRBuilder.cpp:596
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
A struct for saving information about induction variables.
@ IK_PtrInduction
Pointer induction var. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InstructionCost getInvalid(CostType Val=0)
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
bool isBinaryOp() const
Definition: Instruction.h:315
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:312
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:310
bool isUnaryOp() const
Definition: Instruction.h:314
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:508
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:488
uint32_t getFactor() const
Definition: VectorUtils.h:504
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:558
bool isReverse() const
Definition: VectorUtils.h:503
InstTy * getInsertPos() const
Definition: VectorUtils.h:574
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:505
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Type * getRecurrenceType() const
Returns the type of the recurrence.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
@ TCC_Free
Expected to fold away in lowering.
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, PartialReductionExtendKind OpAExtend, PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp=std::nullopt) const
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Reverse
Reverse the order of the vector.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
value_op_iterator value_op_end()
Definition: User.h:309
Value * getOperand(unsigned i) const
Definition: User.h:228
value_op_iterator value_op_begin()
Definition: User.h:306
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3202
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3255
iterator end()
Definition: VPlan.h:3239
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3268
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2189
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2194
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2184
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2180
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:78
VPRegionBlock * getParent()
Definition: VPlan.h:170
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:180
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:201
VPlan * getPlan()
Definition: VPlan.cpp:155
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:160
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2556
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:298
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:116
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:421
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:416
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:394
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:406
unsigned getVPDefID() const
Definition: VPlanValue.h:426
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3132
VPValue * getStartValue() const
Definition: VPlan.h:3131
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1726
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1468
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Instruction & getInstruction() const
Definition: VPlan.h:1057
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void extractLastLaneOfOperand(VPBuilder &Builder)
Update the recipes single operand to the last lane of the operand using Builder.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:863
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:851
@ CanonicalIVIncrementForPart
Definition: VPlan.h:866
@ ComputeReductionResult
Definition: VPlan.h:869
@ CalculateTripCountMinusVF
Definition: VPlan.h:864
bool hasResult() const
Definition: VPlan.h:986
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:966
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2268
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2274
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2281
Instruction * getInsertPos() const
Definition: VPlan.h:2316
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2305
static bool isVPIntrinsic(Intrinsic::ID)
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlanHelpers.h:116
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlanHelpers.h:157
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlanHelpers.h:143
static VPLane getFirstLane()
Definition: VPlanHelpers.h:141
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition: VPlan.h:2149
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:366
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
Definition: VPlan.h:391
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:460
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:577
ExactFlagsTy ExactFlags
Definition: VPlan.h:627
FastMathFlagsTy FMFs
Definition: VPlan.h:630
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:629
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:798
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:759
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:801
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:626
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:628
WrapFlagsTy WrapFlags
Definition: VPlan.h:625
bool hasNoUnsignedWrap() const
Definition: VPlan.h:805
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:792
bool hasNoSignedWrap() const
Definition: VPlan.h:811
FastMathFlags getFastMathFlags() const
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2429
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2387
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2391
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2381
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2393
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2385
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2389
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3379
const VPBlockBase * getEntry() const
Definition: VPlan.h:3415
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
unsigned getOpcode() const
Definition: VPlan.h:2516
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3189
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:563
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
This class can be used to assign names to VPValues.
Definition: VPlanHelpers.h:389
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Definition: VPlanAnalysis.h:65
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:206
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1474
operand_range operands()
Definition: VPlanValue.h:263
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:248
unsigned getNumOperands() const
Definition: VPlanValue.h:242
operand_iterator op_begin()
Definition: VPlanValue.h:259
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:243
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanValue.h:278
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1435
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:125
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1470
friend class VPInstruction
Definition: VPlanValue.h:50
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:144
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:89
user_iterator user_begin()
Definition: VPlanValue.h:134
unsigned getNumUsers() const
Definition: VPlanValue.h:117
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:178
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:173
user_range users()
Definition: VPlanValue.h:138
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1416
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1420
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1239
void execute(VPTransformState &State) override
Produce widened copies of the cast.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1167
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
PHINode * getPHINode() const
Definition: VPlan.h:1782
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1779
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1785
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1857
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1866
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1359
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2627
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2624
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2663
Instruction & Ingredient
Definition: VPlan.h:2618
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2621
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2677
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2670
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2667
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:1911
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:1133
unsigned getUF() const
Definition: VPlan.h:3692
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1094
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
VectorBuilder & setEVL(Value *NewExplicitVectorLength)
Definition: VectorBuilder.h:82
VectorBuilder & setMask(Value *NewMask)
Definition: VectorBuilder.h:78
Value * createVectorInstruction(unsigned Opcode, Type *ReturnTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:674
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Definition: DerivedTypes.h:550
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:756
StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition: Intrinsics.cpp:42
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlanUtils.h:41
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlanUtils.cpp:21
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:16
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition: LoopUtils.cpp:1278
@ Offset
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:854
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:989
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2207
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1076
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:54
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:256
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1341
Value * createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic reduction using a recurrence descriptor Desc Fast-math-flags are propagated using th...
Definition: LoopUtils.cpp:1323
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ Add
Sum of integers.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition: LoopUtils.cpp:1270
DWARFExpression::Operation Op
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
InstructionCost Cost
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Struct to hold various analysis needed for cost computations.
Definition: VPlanHelpers.h:356
LLVMContext & LLVMCtx
Definition: VPlanHelpers.h:360
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1634
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
TargetTransformInfo::TargetCostKind CostKind
Definition: VPlanHelpers.h:363
VPTypeAnalysis Types
Definition: VPlanHelpers.h:359
const TargetLibraryInfo & TLI
Definition: VPlanHelpers.h:358
const TargetTransformInfo & TTI
Definition: VPlanHelpers.h:357
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlanHelpers.h:362
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlanHelpers.h:304
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlanHelpers.h:312
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:349
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlanHelpers.h:196
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlanHelpers.h:229
bool hasVectorValue(VPValue *Def)
Definition: VPlanHelpers.h:227
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlanHelpers.h:349
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlanHelpers.h:352
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:362
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:251
struct llvm::VPTransformState::CFGState CFG
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlanHelpers.h:210
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlanHelpers.h:329
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlanHelpers.h:202
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlanHelpers.h:250
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlanHelpers.h:205
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:373
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
Definition: VPlanHelpers.h:338
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlanHelpers.h:239
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2747
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1511
VPValue * getCond() const
Definition: VPlan.h:1507
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:2826
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2829
void execute(VPTransformState &State) override
Generate a wide store or scatter.
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:2791
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.