LLVM 21.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanHelpers.h"
18#include "VPlanPatternMatch.h"
19#include "VPlanUtils.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Twine.h"
25#include "llvm/IR/BasicBlock.h"
26#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/Type.h"
31#include "llvm/IR/Value.h"
35#include "llvm/Support/Debug.h"
40#include <cassert>
41
42using namespace llvm;
43
45
46namespace llvm {
48}
50
51#define LV_NAME "loop-vectorize"
52#define DEBUG_TYPE LV_NAME
53
55 switch (getVPDefID()) {
56 case VPInstructionSC:
57 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
58 case VPInterleaveSC:
59 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
60 case VPWidenStoreEVLSC:
61 case VPWidenStoreSC:
62 return true;
63 case VPReplicateSC:
64 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
65 ->mayWriteToMemory();
66 case VPWidenCallSC:
67 return !cast<VPWidenCallRecipe>(this)
68 ->getCalledScalarFunction()
69 ->onlyReadsMemory();
70 case VPWidenIntrinsicSC:
71 return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
72 case VPBranchOnMaskSC:
73 case VPScalarIVStepsSC:
74 case VPPredInstPHISC:
75 return false;
76 case VPBlendSC:
77 case VPReductionEVLSC:
78 case VPReductionSC:
79 case VPVectorPointerSC:
80 case VPWidenCanonicalIVSC:
81 case VPWidenCastSC:
82 case VPWidenGEPSC:
83 case VPWidenIntOrFpInductionSC:
84 case VPWidenLoadEVLSC:
85 case VPWidenLoadSC:
86 case VPWidenPHISC:
87 case VPWidenSC:
88 case VPWidenEVLSC:
89 case VPWidenSelectSC: {
90 const Instruction *I =
91 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
92 (void)I;
93 assert((!I || !I->mayWriteToMemory()) &&
94 "underlying instruction may write to memory");
95 return false;
96 }
97 default:
98 return true;
99 }
100}
101
103 switch (getVPDefID()) {
104 case VPInstructionSC:
105 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
106 case VPWidenLoadEVLSC:
107 case VPWidenLoadSC:
108 return true;
109 case VPReplicateSC:
110 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
111 ->mayReadFromMemory();
112 case VPWidenCallSC:
113 return !cast<VPWidenCallRecipe>(this)
114 ->getCalledScalarFunction()
115 ->onlyWritesMemory();
116 case VPWidenIntrinsicSC:
117 return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
118 case VPBranchOnMaskSC:
119 case VPPredInstPHISC:
120 case VPScalarIVStepsSC:
121 case VPWidenStoreEVLSC:
122 case VPWidenStoreSC:
123 return false;
124 case VPBlendSC:
125 case VPReductionEVLSC:
126 case VPReductionSC:
127 case VPVectorPointerSC:
128 case VPWidenCanonicalIVSC:
129 case VPWidenCastSC:
130 case VPWidenGEPSC:
131 case VPWidenIntOrFpInductionSC:
132 case VPWidenPHISC:
133 case VPWidenSC:
134 case VPWidenEVLSC:
135 case VPWidenSelectSC: {
136 const Instruction *I =
137 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
138 (void)I;
139 assert((!I || !I->mayReadFromMemory()) &&
140 "underlying instruction may read from memory");
141 return false;
142 }
143 default:
144 return true;
145 }
146}
147
149 switch (getVPDefID()) {
150 case VPDerivedIVSC:
151 case VPPredInstPHISC:
152 case VPScalarCastSC:
153 case VPReverseVectorPointerSC:
154 return false;
155 case VPInstructionSC:
156 return mayWriteToMemory();
157 case VPWidenCallSC: {
158 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
159 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
160 }
161 case VPWidenIntrinsicSC:
162 return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
163 case VPBlendSC:
164 case VPReductionEVLSC:
165 case VPReductionSC:
166 case VPScalarIVStepsSC:
167 case VPVectorPointerSC:
168 case VPWidenCanonicalIVSC:
169 case VPWidenCastSC:
170 case VPWidenGEPSC:
171 case VPWidenIntOrFpInductionSC:
172 case VPWidenPHISC:
173 case VPWidenPointerInductionSC:
174 case VPWidenSC:
175 case VPWidenEVLSC:
176 case VPWidenSelectSC: {
177 const Instruction *I =
178 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
179 (void)I;
180 assert((!I || !I->mayHaveSideEffects()) &&
181 "underlying instruction has side-effects");
182 return false;
183 }
184 case VPInterleaveSC:
185 return mayWriteToMemory();
186 case VPWidenLoadEVLSC:
187 case VPWidenLoadSC:
188 case VPWidenStoreEVLSC:
189 case VPWidenStoreSC:
190 assert(
191 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
193 "mayHaveSideffects result for ingredient differs from this "
194 "implementation");
195 return mayWriteToMemory();
196 case VPReplicateSC: {
197 auto *R = cast<VPReplicateRecipe>(this);
198 return R->getUnderlyingInstr()->mayHaveSideEffects();
199 }
200 default:
201 return true;
202 }
203}
204
206 assert(!Parent && "Recipe already in some VPBasicBlock");
207 assert(InsertPos->getParent() &&
208 "Insertion position not in any VPBasicBlock");
209 InsertPos->getParent()->insert(this, InsertPos->getIterator());
210}
211
214 assert(!Parent && "Recipe already in some VPBasicBlock");
215 assert(I == BB.end() || I->getParent() == &BB);
216 BB.insert(this, I);
217}
218
220 assert(!Parent && "Recipe already in some VPBasicBlock");
221 assert(InsertPos->getParent() &&
222 "Insertion position not in any VPBasicBlock");
223 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
224}
225
227 assert(getParent() && "Recipe not in any VPBasicBlock");
229 Parent = nullptr;
230}
231
233 assert(getParent() && "Recipe not in any VPBasicBlock");
235}
236
239 insertAfter(InsertPos);
240}
241
245 insertBefore(BB, I);
246}
247
249 // Get the underlying instruction for the recipe, if there is one. It is used
250 // to
251 // * decide if cost computation should be skipped for this recipe,
252 // * apply forced target instruction cost.
253 Instruction *UI = nullptr;
254 if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
255 UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
256 else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
257 UI = IG->getInsertPos();
258 else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
259 UI = &WidenMem->getIngredient();
260
261 InstructionCost RecipeCost;
262 if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
263 RecipeCost = 0;
264 } else {
265 RecipeCost = computeCost(VF, Ctx);
266 if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
267 RecipeCost.isValid())
269 }
270
271 LLVM_DEBUG({
272 dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
273 dump();
274 });
275 return RecipeCost;
276}
277
279 VPCostContext &Ctx) const {
280 llvm_unreachable("subclasses should implement computeCost");
281}
282
285 VPCostContext &Ctx) const {
286 std::optional<unsigned> Opcode = std::nullopt;
288 if (auto *WidenR = dyn_cast<VPWidenRecipe>(BinOpR))
289 Opcode = std::make_optional(WidenR->getOpcode());
290
291 VPRecipeBase *ExtAR = BinOpR->getOperand(0)->getDefiningRecipe();
292 VPRecipeBase *ExtBR = BinOpR->getOperand(1)->getDefiningRecipe();
293
294 auto *PhiType = Ctx.Types.inferScalarType(getOperand(1));
295 auto *InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->getOperand(0)
296 : BinOpR->getOperand(0));
297 auto *InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->getOperand(0)
298 : BinOpR->getOperand(1));
299
300 auto GetExtendKind = [](VPRecipeBase *R) {
301 // The extend could come from outside the plan.
302 if (!R)
304 auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R);
305 if (!WidenCastR)
307 if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)
309 if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)
312 };
313
314 return Ctx.TTI.getPartialReductionCost(getOpcode(), InputTypeA, InputTypeB,
315 PhiType, VF, GetExtendKind(ExtAR),
316 GetExtendKind(ExtBR), Opcode);
317}
318
321 auto &Builder = State.Builder;
322
323 assert(getOpcode() == Instruction::Add &&
324 "Unhandled partial reduction opcode");
325
326 Value *BinOpVal = State.get(getOperand(0));
327 Value *PhiVal = State.get(getOperand(1));
328 assert(PhiVal && BinOpVal && "Phi and Mul must be set");
329
330 Type *RetTy = PhiVal->getType();
331
332 CallInst *V = Builder.CreateIntrinsic(
333 RetTy, Intrinsic::experimental_vector_partial_reduce_add,
334 {PhiVal, BinOpVal}, nullptr, "partial.reduce");
335
336 State.set(this, V);
337}
338
339#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
341 VPSlotTracker &SlotTracker) const {
342 O << Indent << "PARTIAL-REDUCE ";
344 O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
346}
347#endif
348
350 assert(OpType == OperationType::FPMathOp &&
351 "recipe doesn't have fast math flags");
352 FastMathFlags Res;
353 Res.setAllowReassoc(FMFs.AllowReassoc);
354 Res.setNoNaNs(FMFs.NoNaNs);
355 Res.setNoInfs(FMFs.NoInfs);
356 Res.setNoSignedZeros(FMFs.NoSignedZeros);
357 Res.setAllowReciprocal(FMFs.AllowReciprocal);
358 Res.setAllowContract(FMFs.AllowContract);
359 Res.setApproxFunc(FMFs.ApproxFunc);
360 return Res;
361}
362
363#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
365#endif
366
367template <unsigned PartOpIdx>
368VPValue *
370 if (U.getNumOperands() == PartOpIdx + 1)
371 return U.getOperand(PartOpIdx);
372 return nullptr;
373}
374
375template <unsigned PartOpIdx>
377 if (auto *UnrollPartOp = getUnrollPartOperand(U))
378 return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
379 return 0;
380}
381
384 const Twine &Name)
385 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
386 Pred, DL),
387 Opcode(Opcode), Name(Name.str()) {
388 assert(Opcode == Instruction::ICmp &&
389 "only ICmp predicates supported at the moment");
390}
391
393 std::initializer_list<VPValue *> Operands,
394 FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
395 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
396 Opcode(Opcode), Name(Name.str()) {
397 // Make sure the VPInstruction is a floating-point operation.
398 assert(isFPMathOp() && "this op can't take fast-math flags");
399}
400
401bool VPInstruction::doesGeneratePerAllLanes() const {
402 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
403}
404
405bool VPInstruction::canGenerateScalarForFirstLane() const {
407 return true;
409 return true;
410 switch (Opcode) {
411 case Instruction::ICmp:
412 case Instruction::Select:
420 return true;
421 default:
422 return false;
423 }
424}
425
426Value *VPInstruction::generatePerLane(VPTransformState &State,
427 const VPLane &Lane) {
428 IRBuilderBase &Builder = State.Builder;
429
431 "only PtrAdd opcodes are supported for now");
432 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
433 State.get(getOperand(1), Lane), Name);
434}
435
436Value *VPInstruction::generate(VPTransformState &State) {
437 IRBuilderBase &Builder = State.Builder;
438
440 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
441 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
442 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
443 auto *Res =
444 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
445 if (auto *I = dyn_cast<Instruction>(Res))
446 setFlags(I);
447 return Res;
448 }
449
450 switch (getOpcode()) {
451 case VPInstruction::Not: {
452 Value *A = State.get(getOperand(0));
453 return Builder.CreateNot(A, Name);
454 }
455 case Instruction::ICmp: {
456 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
457 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
458 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
459 return Builder.CreateCmp(getPredicate(), A, B, Name);
460 }
461 case Instruction::Select: {
462 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
463 Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
464 Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
465 Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
466 return Builder.CreateSelect(Cond, Op1, Op2, Name);
467 }
469 // Get first lane of vector induction variable.
470 Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
471 // Get the original loop tripcount.
472 Value *ScalarTC = State.get(getOperand(1), VPLane(0));
473
474 // If this part of the active lane mask is scalar, generate the CMP directly
475 // to avoid unnecessary extracts.
476 if (State.VF.isScalar())
477 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
478 Name);
479
480 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
481 auto *PredTy = VectorType::get(Int1Ty, State.VF);
482 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
483 {PredTy, ScalarTC->getType()},
484 {VIVElem0, ScalarTC}, nullptr, Name);
485 }
487 // Generate code to combine the previous and current values in vector v3.
488 //
489 // vector.ph:
490 // v_init = vector(..., ..., ..., a[-1])
491 // br vector.body
492 //
493 // vector.body
494 // i = phi [0, vector.ph], [i+4, vector.body]
495 // v1 = phi [v_init, vector.ph], [v2, vector.body]
496 // v2 = a[i, i+1, i+2, i+3];
497 // v3 = vector(v1(3), v2(0, 1, 2))
498
499 auto *V1 = State.get(getOperand(0));
500 if (!V1->getType()->isVectorTy())
501 return V1;
502 Value *V2 = State.get(getOperand(1));
503 return Builder.CreateVectorSplice(V1, V2, -1, Name);
504 }
506 unsigned UF = getParent()->getPlan()->getUF();
507 Value *ScalarTC = State.get(getOperand(0), VPLane(0));
508 Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
509 Value *Sub = Builder.CreateSub(ScalarTC, Step);
510 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
511 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
512 return Builder.CreateSelect(Cmp, Sub, Zero);
513 }
515 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
516 // be outside of the main loop.
517 Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
518 // Compute EVL
519 assert(AVL->getType()->isIntegerTy() &&
520 "Requested vector length should be an integer.");
521
522 assert(State.VF.isScalable() && "Expected scalable vector factor.");
523 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
524
525 Value *EVL = State.Builder.CreateIntrinsic(
526 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
527 {AVL, VFArg, State.Builder.getTrue()});
528 return EVL;
529 }
531 unsigned Part = getUnrollPart(*this);
532 auto *IV = State.get(getOperand(0), VPLane(0));
533 assert(Part != 0 && "Must have a positive part");
534 // The canonical IV is incremented by the vectorization factor (num of
535 // SIMD elements) times the unroll part.
536 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
537 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
539 }
541 Value *Cond = State.get(getOperand(0), VPLane(0));
542 // Replace the temporary unreachable terminator with a new conditional
543 // branch, hooking it up to backward destination for exiting blocks now and
544 // to forward destination(s) later when they are created.
545 BranchInst *CondBr =
546 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
547 CondBr->setSuccessor(0, nullptr);
549
550 if (!getParent()->isExiting())
551 return CondBr;
552
553 VPRegionBlock *ParentRegion = getParent()->getParent();
554 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
555 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
556 return CondBr;
557 }
559 // First create the compare.
560 Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
561 Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
562 Value *Cond = Builder.CreateICmpEQ(IV, TC);
563
564 // Now create the branch.
565 auto *Plan = getParent()->getPlan();
566 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
567 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
568
569 // Replace the temporary unreachable terminator with a new conditional
570 // branch, hooking it up to backward destination (the header) now and to the
571 // forward destination (the exit/middle block) later when it is created.
572 // Note that CreateCondBr expects a valid BB as first argument, so we need
573 // to set it to nullptr later.
574 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
575 State.CFG.VPBB2IRBB[Header]);
576 CondBr->setSuccessor(0, nullptr);
578 return CondBr;
579 }
581 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
582 // and will be removed by breaking up the recipe further.
583 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
584 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
585 // Get its reduction variable descriptor.
586 const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
587
588 RecurKind RK = RdxDesc.getRecurrenceKind();
589
590 Type *PhiTy = OrigPhi->getType();
591 // The recipe's operands are the reduction phi, followed by one operand for
592 // each part of the reduction.
593 unsigned UF = getNumOperands() - 1;
594 VectorParts RdxParts(UF);
595 for (unsigned Part = 0; Part < UF; ++Part)
596 RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
597
598 // If the vector reduction can be performed in a smaller type, we truncate
599 // then extend the loop exit value to enable InstCombine to evaluate the
600 // entire expression in the smaller type.
601 // TODO: Handle this in truncateToMinBW.
602 if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
603 Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
604 for (unsigned Part = 0; Part < UF; ++Part)
605 RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
606 }
607 // Reduce all of the unrolled parts into a single vector.
608 Value *ReducedPartRdx = RdxParts[0];
609 unsigned Op = RdxDesc.getOpcode();
611 Op = Instruction::Or;
612
613 if (PhiR->isOrdered()) {
614 ReducedPartRdx = RdxParts[UF - 1];
615 } else {
616 // Floating-point operations should have some FMF to enable the reduction.
618 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
619 for (unsigned Part = 1; Part < UF; ++Part) {
620 Value *RdxPart = RdxParts[Part];
621 if (Op != Instruction::ICmp && Op != Instruction::FCmp)
622 ReducedPartRdx = Builder.CreateBinOp(
623 (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
625 ReducedPartRdx =
626 createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
627 else
628 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
629 }
630 }
631
632 // Create the reduction after the loop. Note that inloop reductions create
633 // the target reduction in the loop using a Reduction recipe.
634 if ((State.VF.isVector() ||
637 !PhiR->isInLoop()) {
638 ReducedPartRdx =
639 createReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
640 // If the reduction can be performed in a smaller type, we need to extend
641 // the reduction to the wider type before we branch to the original loop.
642 if (PhiTy != RdxDesc.getRecurrenceType())
643 ReducedPartRdx = RdxDesc.isSigned()
644 ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
645 : Builder.CreateZExt(ReducedPartRdx, PhiTy);
646 }
647
648 return ReducedPartRdx;
649 }
651 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
652 unsigned Offset = CI->getZExtValue();
653 assert(Offset > 0 && "Offset from end must be positive");
654 Value *Res;
655 if (State.VF.isVector()) {
656 assert(Offset <= State.VF.getKnownMinValue() &&
657 "invalid offset to extract from");
658 // Extract lane VF - Offset from the operand.
659 Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
660 } else {
661 assert(Offset <= 1 && "invalid offset to extract from");
662 Res = State.get(getOperand(0));
663 }
664 if (isa<ExtractElementInst>(Res))
665 Res->setName(Name);
666 return Res;
667 }
669 Value *A = State.get(getOperand(0));
670 Value *B = State.get(getOperand(1));
671 return Builder.CreateLogicalAnd(A, B, Name);
672 }
675 "can only generate first lane for PtrAdd");
676 Value *Ptr = State.get(getOperand(0), VPLane(0));
677 Value *Addend = State.get(getOperand(1), VPLane(0));
678 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
679 }
681 Value *IncomingFromVPlanPred =
682 State.get(getOperand(0), /* IsScalar */ true);
683 Value *IncomingFromOtherPreds =
684 State.get(getOperand(1), /* IsScalar */ true);
685 auto *NewPhi =
686 Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
687 BasicBlock *VPlanPred =
688 State.CFG
689 .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getPredecessors()[0])];
690 NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
691 for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
692 if (OtherPred == VPlanPred)
693 continue;
694 NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
695 }
696 return NewPhi;
697 }
699 Value *A = State.get(getOperand(0));
700 return Builder.CreateOrReduce(A);
701 }
703 Value *Vec = State.get(getOperand(0));
704 Value *Mask = State.get(getOperand(1));
705 Value *Ctz = Builder.CreateCountTrailingZeroElems(
706 Builder.getInt64Ty(), Mask, true, "first.active.lane");
707 return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value");
708 }
709 default:
710 llvm_unreachable("Unsupported opcode for instruction");
711 }
712}
713
719}
720
723}
724
725#if !defined(NDEBUG)
726bool VPInstruction::isFPMathOp() const {
727 // Inspired by FPMathOperator::classof. Notable differences are that we don't
728 // support Call, PHI and Select opcodes here yet.
729 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
730 Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
731 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
732 Opcode == Instruction::FCmp || Opcode == Instruction::Select;
733}
734#endif
735
737 assert(!State.Lane && "VPInstruction executing an Lane");
739 assert((hasFastMathFlags() == isFPMathOp() ||
740 getOpcode() == Instruction::Select) &&
741 "Recipe not a FPMathOp but has fast-math flags?");
742 if (hasFastMathFlags())
745 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
748 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
749 if (GeneratesPerAllLanes) {
750 for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
751 Lane != NumLanes; ++Lane) {
752 Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
753 assert(GeneratedValue && "generatePerLane must produce a value");
754 State.set(this, GeneratedValue, VPLane(Lane));
755 }
756 return;
757 }
758
759 Value *GeneratedValue = generate(State);
760 if (!hasResult())
761 return;
762 assert(GeneratedValue && "generate must produce a value");
763 assert(
764 (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
765 State.VF.isScalar()) &&
766 "scalar value but not only first lane defined");
767 State.set(this, GeneratedValue,
768 /*IsScalar*/ GeneratesPerFirstLaneOnly);
769}
770
773 return false;
774 switch (getOpcode()) {
775 case Instruction::ICmp:
776 case Instruction::Select:
786 return false;
787 default:
788 return true;
789 }
790}
791
793 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
795 return vputils::onlyFirstLaneUsed(this);
796
797 switch (getOpcode()) {
798 default:
799 return false;
800 case Instruction::ICmp:
801 case Instruction::Select:
802 case Instruction::Or:
804 // TODO: Cover additional opcodes.
805 return vputils::onlyFirstLaneUsed(this);
813 return true;
814 };
815 llvm_unreachable("switch should return");
816}
817
819 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
821 return vputils::onlyFirstPartUsed(this);
822
823 switch (getOpcode()) {
824 default:
825 return false;
826 case Instruction::ICmp:
827 case Instruction::Select:
828 return vputils::onlyFirstPartUsed(this);
832 return true;
833 };
834 llvm_unreachable("switch should return");
835}
836
837#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
839 VPSlotTracker SlotTracker(getParent()->getPlan());
840 print(dbgs(), "", SlotTracker);
841}
842
844 VPSlotTracker &SlotTracker) const {
845 O << Indent << "EMIT ";
846
847 if (hasResult()) {
849 O << " = ";
850 }
851
852 switch (getOpcode()) {
854 O << "not";
855 break;
857 O << "combined load";
858 break;
860 O << "combined store";
861 break;
863 O << "active lane mask";
864 break;
866 O << "resume-phi";
867 break;
869 O << "EXPLICIT-VECTOR-LENGTH";
870 break;
872 O << "first-order splice";
873 break;
875 O << "branch-on-cond";
876 break;
878 O << "TC > VF ? TC - VF : 0";
879 break;
881 O << "VF * Part +";
882 break;
884 O << "branch-on-count";
885 break;
887 O << "extract-from-end";
888 break;
890 O << "compute-reduction-result";
891 break;
893 O << "logical-and";
894 break;
896 O << "ptradd";
897 break;
899 O << "any-of";
900 break;
902 O << "extract-first-active";
903 break;
904 default:
906 }
907
908 printFlags(O);
910
911 if (auto DL = getDebugLoc()) {
912 O << ", !dbg ";
913 DL.print(O);
914 }
915}
916#endif
917
919 assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
920 "Only PHINodes can have extra operands");
921 for (const auto &[Idx, Op] : enumerate(operands())) {
922 VPValue *ExitValue = Op;
923 auto Lane = vputils::isUniformAfterVectorization(ExitValue)
927 auto *PredVPBB = Pred->getExitingBasicBlock();
928 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
929 // Set insertion point in PredBB in case an extract needs to be generated.
930 // TODO: Model extracts explicitly.
931 State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
932 Value *V = State.get(ExitValue, VPLane(Lane));
933 auto *Phi = cast<PHINode>(&I);
934 // If there is no existing block for PredBB in the phi, add a new incoming
935 // value. Otherwise update the existing incoming value for PredBB.
936 if (Phi->getBasicBlockIndex(PredBB) == -1)
937 Phi->addIncoming(V, PredBB);
938 else
939 Phi->setIncomingValueForBlock(PredBB, V);
940 }
941
942 // Advance the insert point after the wrapped IR instruction. This allows
943 // interleaving VPIRInstructions and other recipes.
944 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
945}
946
948 VPCostContext &Ctx) const {
949 // The recipe wraps an existing IR instruction on the border of VPlan's scope,
950 // hence it does not contribute to the cost-modeling for the VPlan.
951 return 0;
952}
953
955 assert(isa<PHINode>(getInstruction()) &&
956 "can only add exiting operands to phi nodes");
957 assert(getNumOperands() == 1 && "must have a single operand");
958 VPValue *Exiting = getOperand(0);
959 if (!Exiting->isLiveIn()) {
961 auto &Plan = *getParent()->getPlan();
962 Exiting = Builder.createNaryOp(
964 {Exiting,
965 Plan.getOrAddLiveIn(ConstantInt::get(IntegerType::get(Ctx, 32), 1))});
966 }
967 setOperand(0, Exiting);
968}
969
970#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
972 VPSlotTracker &SlotTracker) const {
973 O << Indent << "IR " << I;
974
975 if (getNumOperands() != 0) {
976 O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
978 enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
979 Op.value()->printAsOperand(O, SlotTracker);
980 O << " from ";
981 getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
982 });
983 O << ")";
984 }
985}
986#endif
987
989 assert(State.VF.isVector() && "not widening");
991
992 FunctionType *VFTy = Variant->getFunctionType();
993 // Add return type if intrinsic is overloaded on it.
995 for (const auto &I : enumerate(arg_operands())) {
996 Value *Arg;
997 // Some vectorized function variants may also take a scalar argument,
998 // e.g. linear parameters for pointers. This needs to be the scalar value
999 // from the start of the respective part when interleaving.
1000 if (!VFTy->getParamType(I.index())->isVectorTy())
1001 Arg = State.get(I.value(), VPLane(0));
1002 else
1003 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
1004 Args.push_back(Arg);
1005 }
1006
1007 assert(Variant != nullptr && "Can't create vector function.");
1008
1009 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1011 if (CI)
1012 CI->getOperandBundlesAsDefs(OpBundles);
1013
1014 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
1015 setFlags(V);
1016
1017 if (!V->getType()->isVoidTy())
1018 State.set(this, V);
1019 State.addMetadata(V, CI);
1020}
1021
1023 VPCostContext &Ctx) const {
1024 return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
1025 Variant->getFunctionType()->params(),
1026 Ctx.CostKind);
1027}
1028
1029#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1031 VPSlotTracker &SlotTracker) const {
1032 O << Indent << "WIDEN-CALL ";
1033
1034 Function *CalledFn = getCalledScalarFunction();
1035 if (CalledFn->getReturnType()->isVoidTy())
1036 O << "void ";
1037 else {
1039 O << " = ";
1040 }
1041
1042 O << "call";
1043 printFlags(O);
1044 O << " @" << CalledFn->getName() << "(";
1046 Op->printAsOperand(O, SlotTracker);
1047 });
1048 O << ")";
1049
1050 O << " (using library function";
1051 if (Variant->hasName())
1052 O << ": " << Variant->getName();
1053 O << ")";
1054}
1055#endif
1056
1058 assert(State.VF.isVector() && "not widening");
1060
1061 SmallVector<Type *, 2> TysForDecl;
1062 // Add return type if intrinsic is overloaded on it.
1063 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
1064 TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
1066 for (const auto &I : enumerate(operands())) {
1067 // Some intrinsics have a scalar argument - don't replace it with a
1068 // vector.
1069 Value *Arg;
1070 if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
1071 State.TTI))
1072 Arg = State.get(I.value(), VPLane(0));
1073 else
1074 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
1075 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
1076 State.TTI))
1077 TysForDecl.push_back(Arg->getType());
1078 Args.push_back(Arg);
1079 }
1080
1081 // Use vector version of the intrinsic.
1082 Module *M = State.Builder.GetInsertBlock()->getModule();
1083 Function *VectorF =
1084 Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1085 assert(VectorF &&
1086 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
1087
1088 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1090 if (CI)
1091 CI->getOperandBundlesAsDefs(OpBundles);
1092
1093 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1094
1095 setFlags(V);
1096
1097 if (!V->getType()->isVoidTy())
1098 State.set(this, V);
1099 State.addMetadata(V, CI);
1100}
1101
1103 VPCostContext &Ctx) const {
1104 // Some backends analyze intrinsic arguments to determine cost. Use the
1105 // underlying value for the operand if it has one. Otherwise try to use the
1106 // operand of the underlying call instruction, if there is one. Otherwise
1107 // clear Arguments.
1108 // TODO: Rework TTI interface to be independent of concrete IR values.
1110 for (const auto &[Idx, Op] : enumerate(operands())) {
1111 auto *V = Op->getUnderlyingValue();
1112 if (!V) {
1113 // Push all the VP Intrinsic's ops into the Argments even if is nullptr.
1114 // Some VP Intrinsic's cost will assert the number of parameters.
1115 // Mainly appears in the following two scenarios:
1116 // 1. EVL Op is nullptr
1117 // 2. The Argmunt of the VP Intrinsic is also the VP Intrinsic
1118 if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
1119 Arguments.push_back(V);
1120 continue;
1121 }
1122 if (auto *UI = dyn_cast_or_null<CallBase>(getUnderlyingValue())) {
1123 Arguments.push_back(UI->getArgOperand(Idx));
1124 continue;
1125 }
1126 Arguments.clear();
1127 break;
1128 }
1129 Arguments.push_back(V);
1130 }
1131
1132 Type *RetTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1133 SmallVector<Type *> ParamTys;
1134 for (unsigned I = 0; I != getNumOperands(); ++I)
1135 ParamTys.push_back(
1137
1138 // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
1140 IntrinsicCostAttributes CostAttrs(
1141 VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
1142 dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
1143 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);
1144}
1145
1147 return Intrinsic::getBaseName(VectorIntrinsicID);
1148}
1149
1151 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1152 // Vector predication intrinsics only demand the the first lane the last
1153 // operand (the EVL operand).
1154 return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1155 Op == getOperand(getNumOperands() - 1);
1156}
1157
1158#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1160 VPSlotTracker &SlotTracker) const {
1161 O << Indent << "WIDEN-INTRINSIC ";
1162 if (ResultTy->isVoidTy()) {
1163 O << "void ";
1164 } else {
1166 O << " = ";
1167 }
1168
1169 O << "call";
1170 printFlags(O);
1171 O << getIntrinsicName() << "(";
1172
1174 Op->printAsOperand(O, SlotTracker);
1175 });
1176 O << ")";
1177}
1178#endif
1179
1182 IRBuilderBase &Builder = State.Builder;
1183
1184 Value *Address = State.get(getOperand(0));
1185 Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
1186 VectorType *VTy = cast<VectorType>(Address->getType());
1187
1188 // The histogram intrinsic requires a mask even if the recipe doesn't;
1189 // if the mask operand was omitted then all lanes should be executed and
1190 // we just need to synthesize an all-true mask.
1191 Value *Mask = nullptr;
1192 if (VPValue *VPMask = getMask())
1193 Mask = State.get(VPMask);
1194 else
1195 Mask =
1196 Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));
1197
1198 // If this is a subtract, we want to invert the increment amount. We may
1199 // add a separate intrinsic in future, but for now we'll try this.
1200 if (Opcode == Instruction::Sub)
1201 IncAmt = Builder.CreateNeg(IncAmt);
1202 else
1203 assert(Opcode == Instruction::Add && "only add or sub supported for now");
1204
1205 State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1206 {VTy, IncAmt->getType()},
1207 {Address, IncAmt, Mask});
1208}
1209
1211 VPCostContext &Ctx) const {
1212 // FIXME: Take the gather and scatter into account as well. For now we're
1213 // generating the same cost as the fallback path, but we'll likely
1214 // need to create a new TTI method for determining the cost, including
1215 // whether we can use base + vec-of-smaller-indices or just
1216 // vec-of-pointers.
1217 assert(VF.isVector() && "Invalid VF for histogram cost");
1218 Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
1219 VPValue *IncAmt = getOperand(1);
1220 Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
1221 VectorType *VTy = VectorType::get(IncTy, VF);
1222
1223 // Assume that a non-constant update value (or a constant != 1) requires
1224 // a multiply, and add that into the cost.
1225 InstructionCost MulCost =
1226 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind);
1227 if (IncAmt->isLiveIn()) {
1228 ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue());
1229
1230 if (CI && CI->getZExtValue() == 1)
1231 MulCost = TTI::TCC_Free;
1232 }
1233
1234 // Find the cost of the histogram operation itself.
1235 Type *PtrTy = VectorType::get(AddressTy, VF);
1236 Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
1237 IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
1239 {PtrTy, IncTy, MaskTy});
1240
1241 // Add the costs together with the add/sub operation.
1242 return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost +
1243 Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);
1244}
1245
1246#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1248 VPSlotTracker &SlotTracker) const {
1249 O << Indent << "WIDEN-HISTOGRAM buckets: ";
1251
1252 if (Opcode == Instruction::Sub)
1253 O << ", dec: ";
1254 else {
1255 assert(Opcode == Instruction::Add);
1256 O << ", inc: ";
1257 }
1259
1260 if (VPValue *Mask = getMask()) {
1261 O << ", mask: ";
1262 Mask->printAsOperand(O, SlotTracker);
1263 }
1264}
1265
1267 VPSlotTracker &SlotTracker) const {
1268 O << Indent << "WIDEN-SELECT ";
1270 O << " = select ";
1271 printFlags(O);
1273 O << ", ";
1275 O << ", ";
1277 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
1278}
1279#endif
1280
1283
1284 // The condition can be loop invariant but still defined inside the
1285 // loop. This means that we can't just use the original 'cond' value.
1286 // We have to take the 'vectorized' value and pick the first lane.
1287 // Instcombine will make this a no-op.
1288 auto *InvarCond =
1289 isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
1290
1291 Value *Cond = InvarCond ? InvarCond : State.get(getCond());
1292 Value *Op0 = State.get(getOperand(1));
1293 Value *Op1 = State.get(getOperand(2));
1294 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
1295 State.set(this, Sel);
1296 if (isa<FPMathOperator>(Sel))
1297 setFlags(cast<Instruction>(Sel));
1298 State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1299}
1300
1302 VPCostContext &Ctx) const {
1303 SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
1304 bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
1305 Type *ScalarTy = Ctx.Types.inferScalarType(this);
1306 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1307
1308 VPValue *Op0, *Op1;
1309 using namespace llvm::VPlanPatternMatch;
1310 if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
1311 (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
1312 match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
1313 // select x, y, false --> x & y
1314 // select x, true, y --> x | y
1315 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
1316 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
1317
1319 if (all_of(operands(),
1320 [](VPValue *Op) { return Op->getUnderlyingValue(); }))
1321 Operands.append(SI->op_begin(), SI->op_end());
1322 bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));
1323 return Ctx.TTI.getArithmeticInstrCost(
1324 IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy,
1325 Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);
1326 }
1327
1328 Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1329 if (!ScalarCond)
1330 CondTy = VectorType::get(CondTy, VF);
1331
1333 if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
1334 Pred = Cmp->getPredicate();
1335 return Ctx.TTI.getCmpSelInstrCost(
1336 Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind,
1337 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, SI);
1338}
1339
1340VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
1341 const FastMathFlags &FMF) {
1342 AllowReassoc = FMF.allowReassoc();
1343 NoNaNs = FMF.noNaNs();
1344 NoInfs = FMF.noInfs();
1345 NoSignedZeros = FMF.noSignedZeros();
1346 AllowReciprocal = FMF.allowReciprocal();
1347 AllowContract = FMF.allowContract();
1348 ApproxFunc = FMF.approxFunc();
1349}
1350
1351#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1353 switch (OpType) {
1354 case OperationType::Cmp:
1356 break;
1357 case OperationType::DisjointOp:
1359 O << " disjoint";
1360 break;
1361 case OperationType::PossiblyExactOp:
1362 if (ExactFlags.IsExact)
1363 O << " exact";
1364 break;
1365 case OperationType::OverflowingBinOp:
1366 if (WrapFlags.HasNUW)
1367 O << " nuw";
1368 if (WrapFlags.HasNSW)
1369 O << " nsw";
1370 break;
1371 case OperationType::FPMathOp:
1373 break;
1374 case OperationType::GEPOp:
1375 if (GEPFlags.isInBounds())
1376 O << " inbounds";
1378 O << " nusw";
1380 O << " nuw";
1381 break;
1382 case OperationType::NonNegOp:
1383 if (NonNegFlags.NonNeg)
1384 O << " nneg";
1385 break;
1386 case OperationType::Other:
1387 break;
1388 }
1389 if (getNumOperands() > 0)
1390 O << " ";
1391}
1392#endif
1393
1396 auto &Builder = State.Builder;
1397 switch (Opcode) {
1398 case Instruction::Call:
1399 case Instruction::Br:
1400 case Instruction::PHI:
1401 case Instruction::GetElementPtr:
1402 case Instruction::Select:
1403 llvm_unreachable("This instruction is handled by a different recipe.");
1404 case Instruction::UDiv:
1405 case Instruction::SDiv:
1406 case Instruction::SRem:
1407 case Instruction::URem:
1408 case Instruction::Add:
1409 case Instruction::FAdd:
1410 case Instruction::Sub:
1411 case Instruction::FSub:
1412 case Instruction::FNeg:
1413 case Instruction::Mul:
1414 case Instruction::FMul:
1415 case Instruction::FDiv:
1416 case Instruction::FRem:
1417 case Instruction::Shl:
1418 case Instruction::LShr:
1419 case Instruction::AShr:
1420 case Instruction::And:
1421 case Instruction::Or:
1422 case Instruction::Xor: {
1423 // Just widen unops and binops.
1425 for (VPValue *VPOp : operands())
1426 Ops.push_back(State.get(VPOp));
1427
1428 Value *V = Builder.CreateNAryOp(Opcode, Ops);
1429
1430 if (auto *VecOp = dyn_cast<Instruction>(V))
1431 setFlags(VecOp);
1432
1433 // Use this vector value for all users of the original instruction.
1434 State.set(this, V);
1435 State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1436 break;
1437 }
1438 case Instruction::Freeze: {
1439 Value *Op = State.get(getOperand(0));
1440
1441 Value *Freeze = Builder.CreateFreeze(Op);
1442 State.set(this, Freeze);
1443 break;
1444 }
1445 case Instruction::ICmp:
1446 case Instruction::FCmp: {
1447 // Widen compares. Generate vector compares.
1448 bool FCmp = Opcode == Instruction::FCmp;
1449 Value *A = State.get(getOperand(0));
1450 Value *B = State.get(getOperand(1));
1451 Value *C = nullptr;
1452 if (FCmp) {
1453 // Propagate fast math flags.
1454 C = Builder.CreateFCmpFMF(
1455 getPredicate(), A, B,
1456 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1457 } else {
1458 C = Builder.CreateICmp(getPredicate(), A, B);
1459 }
1460 State.set(this, C);
1461 State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1462 break;
1463 }
1464 default:
1465 // This instruction is not vectorized by simple widening.
1466 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
1467 << Instruction::getOpcodeName(Opcode));
1468 llvm_unreachable("Unhandled instruction!");
1469 } // end of switch.
1470
1471#if !defined(NDEBUG)
1472 // Verify that VPlan type inference results agree with the type of the
1473 // generated values.
1475 State.get(this)->getType() &&
1476 "inferred type and type from generated instructions do not match");
1477#endif
1478}
1479
1481 VPCostContext &Ctx) const {
1482 switch (Opcode) {
1483 case Instruction::FNeg: {
1484 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1485 return Ctx.TTI.getArithmeticInstrCost(
1486 Opcode, VectorTy, Ctx.CostKind,
1487 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
1488 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
1489 }
1490
1491 case Instruction::UDiv:
1492 case Instruction::SDiv:
1493 case Instruction::SRem:
1494 case Instruction::URem:
1495 // More complex computation, let the legacy cost-model handle this for now.
1496 return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
1497 case Instruction::Add:
1498 case Instruction::FAdd:
1499 case Instruction::Sub:
1500 case Instruction::FSub:
1501 case Instruction::Mul:
1502 case Instruction::FMul:
1503 case Instruction::FDiv:
1504 case Instruction::FRem:
1505 case Instruction::Shl:
1506 case Instruction::LShr:
1507 case Instruction::AShr:
1508 case Instruction::And:
1509 case Instruction::Or:
1510 case Instruction::Xor: {
1511 VPValue *RHS = getOperand(1);
1512 // Certain instructions can be cheaper to vectorize if they have a constant
1513 // second vector operand. One example of this are shifts on x86.
1516 if (RHS->isLiveIn())
1517 RHSInfo = Ctx.TTI.getOperandInfo(RHS->getLiveInIRValue());
1518
1519 if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
1522 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1523 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1524
1526 if (CtxI)
1527 Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
1528 return Ctx.TTI.getArithmeticInstrCost(
1529 Opcode, VectorTy, Ctx.CostKind,
1530 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
1531 RHSInfo, Operands, CtxI, &Ctx.TLI);
1532 }
1533 case Instruction::Freeze: {
1534 // This opcode is unknown. Assume that it is the same as 'mul'.
1535 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1536 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy,
1537 Ctx.CostKind);
1538 }
1539 case Instruction::ICmp:
1540 case Instruction::FCmp: {
1541 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1542 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1543 return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
1544 Ctx.CostKind,
1545 {TTI::OK_AnyValue, TTI::OP_None},
1546 {TTI::OK_AnyValue, TTI::OP_None}, CtxI);
1547 }
1548 default:
1549 llvm_unreachable("Unsupported opcode for instruction");
1550 }
1551}
1552
1554 unsigned Opcode = getOpcode();
1555 // TODO: Support other opcodes
1556 if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
1557 llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1558
1560
1561 assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1562 "VPWidenEVLRecipe should not be used for scalars");
1563
1564 VPValue *EVL = getEVL();
1565 Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1566 IRBuilderBase &BuilderIR = State.Builder;
1567 VectorBuilder Builder(BuilderIR);
1568 Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1569
1571 for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1572 VPValue *VPOp = getOperand(I);
1573 Ops.push_back(State.get(VPOp));
1574 }
1575
1576 Builder.setMask(Mask).setEVL(EVLArg);
1577 Value *VPInst =
1578 Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
1579 // Currently vp-intrinsics only accept FMF flags.
1580 // TODO: Enable other flags when support is added.
1581 if (isa<FPMathOperator>(VPInst))
1582 setFlags(cast<Instruction>(VPInst));
1583
1584 State.set(this, VPInst);
1585 State.addMetadata(VPInst,
1586 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1587}
1588
1589#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1591 VPSlotTracker &SlotTracker) const {
1592 O << Indent << "WIDEN ";
1594 O << " = " << Instruction::getOpcodeName(Opcode);
1595 printFlags(O);
1597}
1598
1600 VPSlotTracker &SlotTracker) const {
1601 O << Indent << "WIDEN ";
1603 O << " = vp." << Instruction::getOpcodeName(getOpcode());
1604 printFlags(O);
1606}
1607#endif
1608
1611 auto &Builder = State.Builder;
1612 /// Vectorize casts.
1613 assert(State.VF.isVector() && "Not vectorizing?");
1614 Type *DestTy = VectorType::get(getResultType(), State.VF);
1615 VPValue *Op = getOperand(0);
1616 Value *A = State.get(Op);
1617 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
1618 State.set(this, Cast);
1619 State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1620 if (auto *CastOp = dyn_cast<Instruction>(Cast))
1621 setFlags(CastOp);
1622}
1623
1625 VPCostContext &Ctx) const {
1626 // TODO: In some cases, VPWidenCastRecipes are created but not considered in
1627 // the legacy cost model, including truncates/extends when evaluating a
1628 // reduction in a smaller type.
1629 if (!getUnderlyingValue())
1630 return 0;
1631 // Computes the CastContextHint from a recipes that may access memory.
1632 auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
1633 if (VF.isScalar())
1635 if (isa<VPInterleaveRecipe>(R))
1637 if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
1638 return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
1640 const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
1641 if (WidenMemoryRecipe == nullptr)
1643 if (!WidenMemoryRecipe->isConsecutive())
1645 if (WidenMemoryRecipe->isReverse())
1647 if (WidenMemoryRecipe->isMasked())
1650 };
1651
1652 VPValue *Operand = getOperand(0);
1654 // For Trunc/FPTrunc, get the context from the only user.
1655 if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1657 if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
1658 CCH = ComputeCCH(StoreRecipe);
1659 }
1660 // For Z/Sext, get the context from the operand.
1661 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
1662 Opcode == Instruction::FPExt) {
1663 if (Operand->isLiveIn())
1665 else if (Operand->getDefiningRecipe())
1666 CCH = ComputeCCH(Operand->getDefiningRecipe());
1667 }
1668
1669 auto *SrcTy =
1670 cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
1671 auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
1672 // Arm TTI will use the underlying instruction to determine the cost.
1673 return Ctx.TTI.getCastInstrCost(
1674 Opcode, DestTy, SrcTy, CCH, Ctx.CostKind,
1675 dyn_cast_if_present<Instruction>(getUnderlyingValue()));
1676}
1677
1678#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1680 VPSlotTracker &SlotTracker) const {
1681 O << Indent << "WIDEN-CAST ";
1683 O << " = " << Instruction::getOpcodeName(Opcode);
1684 printFlags(O);
1686 O << " to " << *getResultType();
1687}
1688#endif
1689
1691 VPCostContext &Ctx) const {
1692 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
1693}
1694
1695/// This function adds
1696/// (0 * Step, 1 * Step, 2 * Step, ...)
1697/// to each vector element of Val.
1698/// \p Opcode is relevant for FP induction variable.
1699static Value *getStepVector(Value *Val, Value *Step,
1701 IRBuilderBase &Builder) {
1702 assert(VF.isVector() && "only vector VFs are supported");
1703
1704 // Create and check the types.
1705 auto *ValVTy = cast<VectorType>(Val->getType());
1706 ElementCount VLen = ValVTy->getElementCount();
1707
1708 Type *STy = Val->getType()->getScalarType();
1709 assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
1710 "Induction Step must be an integer or FP");
1711 assert(Step->getType() == STy && "Step has wrong type");
1712
1714
1715 // Create a vector of consecutive numbers from zero to VF.
1716 VectorType *InitVecValVTy = ValVTy;
1717 if (STy->isFloatingPointTy()) {
1718 Type *InitVecValSTy =
1720 InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
1721 }
1722 Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
1723
1724 if (STy->isIntegerTy()) {
1725 Step = Builder.CreateVectorSplat(VLen, Step);
1726 assert(Step->getType() == Val->getType() && "Invalid step vec");
1727 // FIXME: The newly created binary instructions should contain nsw/nuw
1728 // flags, which can be found from the original scalar operations.
1729 Step = Builder.CreateMul(InitVec, Step);
1730 return Builder.CreateAdd(Val, Step, "induction");
1731 }
1732
1733 // Floating point induction.
1734 assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
1735 "Binary Opcode should be specified for FP induction");
1736 InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
1737
1738 Step = Builder.CreateVectorSplat(VLen, Step);
1739 Value *MulOp = Builder.CreateFMul(InitVec, Step);
1740 return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
1741}
1742
1743/// A helper function that returns an integer or floating-point constant with
1744/// value C.
1746 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
1747 : ConstantFP::get(Ty, C);
1748}
1749
1751 assert(!State.Lane && "Int or FP induction being replicated.");
1752
1753 Value *Start = getStartValue()->getLiveInIRValue();
1755 TruncInst *Trunc = getTruncInst();
1756 IRBuilderBase &Builder = State.Builder;
1757 assert(getPHINode()->getType() == ID.getStartValue()->getType() &&
1758 "Types must match");
1759 assert(State.VF.isVector() && "must have vector VF");
1760
1761 // The value from the original loop to which we are mapping the new induction
1762 // variable.
1763 Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode();
1764
1765 // Fast-math-flags propagate from the original induction instruction.
1766 IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1767 if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
1768 Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1769
1770 // Now do the actual transformations, and start with fetching the step value.
1771 Value *Step = State.get(getStepValue(), VPLane(0));
1772
1773 assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
1774 "Expected either an induction phi-node or a truncate of it!");
1775
1776 // Construct the initial value of the vector IV in the vector loop preheader
1777 auto CurrIP = Builder.saveIP();
1778 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1779 Builder.SetInsertPoint(VectorPH->getTerminator());
1780 if (isa<TruncInst>(EntryVal)) {
1781 assert(Start->getType()->isIntegerTy() &&
1782 "Truncation requires an integer type");
1783 auto *TruncType = cast<IntegerType>(EntryVal->getType());
1784 Step = Builder.CreateTrunc(Step, TruncType);
1785 Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
1786 }
1787
1788 Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
1789 Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
1790 State.VF, State.Builder);
1791
1792 // We create vector phi nodes for both integer and floating-point induction
1793 // variables. Here, we determine the kind of arithmetic we will perform.
1796 if (Step->getType()->isIntegerTy()) {
1797 AddOp = Instruction::Add;
1798 MulOp = Instruction::Mul;
1799 } else {
1800 AddOp = ID.getInductionOpcode();
1801 MulOp = Instruction::FMul;
1802 }
1803
1804 Value *SplatVF;
1805 if (VPValue *SplatVFOperand = getSplatVFValue()) {
1806 // The recipe has been unrolled. In that case, fetch the splat value for the
1807 // induction increment.
1808 SplatVF = State.get(SplatVFOperand);
1809 } else {
1810 // Multiply the vectorization factor by the step using integer or
1811 // floating-point arithmetic as appropriate.
1812 Type *StepType = Step->getType();
1813 Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
1814 if (Step->getType()->isFloatingPointTy())
1815 RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
1816 else
1817 RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
1818 Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
1819
1820 // Create a vector splat to use in the induction update.
1821 SplatVF = Builder.CreateVectorSplat(State.VF, Mul);
1822 }
1823
1824 Builder.restoreIP(CurrIP);
1825
1826 // We may need to add the step a number of times, depending on the unroll
1827 // factor. The last of those goes into the PHI.
1828 PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
1829 VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
1830 VecInd->setDebugLoc(getDebugLoc());
1831 State.set(this, VecInd);
1832
1833 Instruction *LastInduction = cast<Instruction>(
1834 Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next"));
1835 if (isa<TruncInst>(EntryVal))
1836 State.addMetadata(LastInduction, EntryVal);
1837 LastInduction->setDebugLoc(getDebugLoc());
1838
1839 VecInd->addIncoming(SteppedStart, VectorPH);
1840 // Add induction update using an incorrect block temporarily. The phi node
1841 // will be fixed after VPlan execution. Note that at this point the latch
1842 // block cannot be used, as it does not exist yet.
1843 // TODO: Model increment value in VPlan, by turning the recipe into a
1844 // multi-def and a subclass of VPHeaderPHIRecipe.
1845 VecInd->addIncoming(LastInduction, VectorPH);
1846}
1847
1848#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1850 VPSlotTracker &SlotTracker) const {
1851 O << Indent;
1853 O << " = WIDEN-INDUCTION ";
1855
1856 if (auto *TI = getTruncInst())
1857 O << " (truncated to " << *TI->getType() << ")";
1858}
1859#endif
1860
1862 // The step may be defined by a recipe in the preheader (e.g. if it requires
1863 // SCEV expansion), but for the canonical induction the step is required to be
1864 // 1, which is represented as live-in.
1866 return false;
1867 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
1868 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1869 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1870 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1871 getScalarType() == CanIV->getScalarType();
1872}
1873
1874#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1876 VPSlotTracker &SlotTracker) const {
1877 O << Indent;
1879 O << " = DERIVED-IV ";
1881 O << " + ";
1883 O << " * ";
1885}
1886#endif
1887
1889 // Fast-math-flags propagate from the original induction instruction.
1891 if (hasFastMathFlags())
1893
1894 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
1895 /// variable on which to base the steps, \p Step is the size of the step.
1896
1897 Value *BaseIV = State.get(getOperand(0), VPLane(0));
1898 Value *Step = State.get(getStepValue(), VPLane(0));
1899 IRBuilderBase &Builder = State.Builder;
1900
1901 // Ensure step has the same type as that of scalar IV.
1902 Type *BaseIVTy = BaseIV->getType()->getScalarType();
1903 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1904
1905 // We build scalar steps for both integer and floating-point induction
1906 // variables. Here, we determine the kind of arithmetic we will perform.
1909 if (BaseIVTy->isIntegerTy()) {
1910 AddOp = Instruction::Add;
1911 MulOp = Instruction::Mul;
1912 } else {
1913 AddOp = InductionOpcode;
1914 MulOp = Instruction::FMul;
1915 }
1916
1917 // Determine the number of scalars we need to generate for each unroll
1918 // iteration.
1919 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
1920 // Compute the scalar steps and save the results in State.
1921 Type *IntStepTy =
1922 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
1923 Type *VecIVTy = nullptr;
1924 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
1925 if (!FirstLaneOnly && State.VF.isScalable()) {
1926 VecIVTy = VectorType::get(BaseIVTy, State.VF);
1927 UnitStepVec =
1928 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
1929 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
1930 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
1931 }
1932
1933 unsigned StartLane = 0;
1934 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
1935 if (State.Lane) {
1936 StartLane = State.Lane->getKnownLane();
1937 EndLane = StartLane + 1;
1938 }
1939 Value *StartIdx0 =
1940 createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
1941
1942 if (!FirstLaneOnly && State.VF.isScalable()) {
1943 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
1944 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
1945 if (BaseIVTy->isFloatingPointTy())
1946 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
1947 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
1948 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
1949 State.set(this, Add);
1950 // It's useful to record the lane values too for the known minimum number
1951 // of elements so we do those below. This improves the code quality when
1952 // trying to extract the first element, for example.
1953 }
1954
1955 if (BaseIVTy->isFloatingPointTy())
1956 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
1957
1958 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1959 Value *StartIdx = Builder.CreateBinOp(
1960 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
1961 // The step returned by `createStepForVF` is a runtime-evaluated value
1962 // when VF is scalable. Otherwise, it should be folded into a Constant.
1963 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
1964 "Expected StartIdx to be folded to a constant when VF is not "
1965 "scalable");
1966 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
1967 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
1968 State.set(this, Add, VPLane(Lane));
1969 }
1970}
1971
1972#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1974 VPSlotTracker &SlotTracker) const {
1975 O << Indent;
1977 O << " = SCALAR-STEPS ";
1979}
1980#endif
1981
1983 assert(State.VF.isVector() && "not widening");
1984 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1985 // Construct a vector GEP by widening the operands of the scalar GEP as
1986 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1987 // results in a vector of pointers when at least one operand of the GEP
1988 // is vector-typed. Thus, to keep the representation compact, we only use
1989 // vector-typed operands for loop-varying values.
1990
1991 if (areAllOperandsInvariant()) {
1992 // If we are vectorizing, but the GEP has only loop-invariant operands,
1993 // the GEP we build (by only using vector-typed operands for
1994 // loop-varying values) would be a scalar pointer. Thus, to ensure we
1995 // produce a vector of pointers, we need to either arbitrarily pick an
1996 // operand to broadcast, or broadcast a clone of the original GEP.
1997 // Here, we broadcast a clone of the original.
1998 //
1999 // TODO: If at some point we decide to scalarize instructions having
2000 // loop-invariant operands, this special case will no longer be
2001 // required. We would add the scalarization decision to
2002 // collectLoopScalars() and teach getVectorValue() to broadcast
2003 // the lane-zero scalar value.
2005 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
2006 Ops.push_back(State.get(getOperand(I), VPLane(0)));
2007
2008 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
2009 ArrayRef(Ops).drop_front(), "",
2011 Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
2012 State.set(this, Splat);
2013 State.addMetadata(Splat, GEP);
2014 } else {
2015 // If the GEP has at least one loop-varying operand, we are sure to
2016 // produce a vector of pointers unless VF is scalar.
2017 // The pointer operand of the new GEP. If it's loop-invariant, we
2018 // won't broadcast it.
2019 auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
2020 : State.get(getOperand(0));
2021
2022 // Collect all the indices for the new GEP. If any index is
2023 // loop-invariant, we won't broadcast it.
2025 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
2026 VPValue *Operand = getOperand(I);
2027 if (isIndexLoopInvariant(I - 1))
2028 Indices.push_back(State.get(Operand, VPLane(0)));
2029 else
2030 Indices.push_back(State.get(Operand));
2031 }
2032
2033 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
2034 // but it should be a vector, otherwise.
2035 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
2036 Indices, "", getGEPNoWrapFlags());
2037 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
2038 "NewGEP is not a pointer vector");
2039 State.set(this, NewGEP);
2040 State.addMetadata(NewGEP, GEP);
2041 }
2042}
2043
2044#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2046 VPSlotTracker &SlotTracker) const {
2047 O << Indent << "WIDEN-GEP ";
2048 O << (isPointerLoopInvariant() ? "Inv" : "Var");
2049 for (size_t I = 0; I < getNumOperands() - 1; ++I)
2050 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
2051
2052 O << " ";
2054 O << " = getelementptr";
2055 printFlags(O);
2057}
2058#endif
2059
2060static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
2061 unsigned CurrentPart, IRBuilderBase &Builder) {
2062 // Use i32 for the gep index type when the value is constant,
2063 // or query DataLayout for a more suitable index type otherwise.
2064 const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2065 return IsScalable && (IsReverse || CurrentPart > 0)
2066 ? DL.getIndexType(Builder.getPtrTy(0))
2067 : Builder.getInt32Ty();
2068}
2069
2071 auto &Builder = State.Builder;
2073 unsigned CurrentPart = getUnrollPart(*this);
2074 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
2075 CurrentPart, Builder);
2076
2077 // The wide store needs to start at the last vector element.
2078 Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
2079 if (IndexTy != RunTimeVF->getType())
2080 RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
2081 // NumElt = -CurrentPart * RunTimeVF
2082 Value *NumElt = Builder.CreateMul(
2083 ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
2084 // LastLane = 1 - RunTimeVF
2085 Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
2086 Value *Ptr = State.get(getOperand(0), VPLane(0));
2087 Value *ResultPtr =
2088 Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
2089 ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
2091
2092 State.set(this, ResultPtr, /*IsScalar*/ true);
2093}
2094
2095#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2097 VPSlotTracker &SlotTracker) const {
2098 O << Indent;
2100 O << " = reverse-vector-pointer";
2101 printFlags(O);
2103}
2104#endif
2105
2107 auto &Builder = State.Builder;
2109 unsigned CurrentPart = getUnrollPart(*this);
2110 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2111 CurrentPart, Builder);
2112 Value *Ptr = State.get(getOperand(0), VPLane(0));
2113
2114 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2115 Value *ResultPtr =
2116 Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
2117
2118 State.set(this, ResultPtr, /*IsScalar*/ true);
2119}
2120
2121#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2123 VPSlotTracker &SlotTracker) const {
2124 O << Indent;
2126 O << " = vector-pointer ";
2127
2129}
2130#endif
2131
2133 assert(isNormalized() && "Expected blend to be normalized!");
2135 // We know that all PHIs in non-header blocks are converted into
2136 // selects, so we don't have to worry about the insertion order and we
2137 // can just use the builder.
2138 // At this point we generate the predication tree. There may be
2139 // duplications since this is a simple recursive scan, but future
2140 // optimizations will clean it up.
2141
2142 unsigned NumIncoming = getNumIncomingValues();
2143
2144 // Generate a sequence of selects of the form:
2145 // SELECT(Mask3, In3,
2146 // SELECT(Mask2, In2,
2147 // SELECT(Mask1, In1,
2148 // In0)))
2149 // Note that Mask0 is never used: lanes for which no path reaches this phi and
2150 // are essentially undef are taken from In0.
2151 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
2152 Value *Result = nullptr;
2153 for (unsigned In = 0; In < NumIncoming; ++In) {
2154 // We might have single edge PHIs (blocks) - use an identity
2155 // 'select' for the first PHI operand.
2156 Value *In0 = State.get(getIncomingValue(In), OnlyFirstLaneUsed);
2157 if (In == 0)
2158 Result = In0; // Initialize with the first incoming value.
2159 else {
2160 // Select between the current value and the previous incoming edge
2161 // based on the incoming mask.
2162 Value *Cond = State.get(getMask(In), OnlyFirstLaneUsed);
2163 Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
2164 }
2165 }
2166 State.set(this, Result, OnlyFirstLaneUsed);
2167}
2168
2170 VPCostContext &Ctx) const {
2171 // Handle cases where only the first lane is used the same way as the legacy
2172 // cost model.
2174 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
2175
2176 Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
2177 Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2178 return (getNumIncomingValues() - 1) *
2179 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2181}
2182
2183#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2185 VPSlotTracker &SlotTracker) const {
2186 O << Indent << "BLEND ";
2188 O << " =";
2189 if (getNumIncomingValues() == 1) {
2190 // Not a User of any mask: not really blending, this is a
2191 // single-predecessor phi.
2192 O << " ";
2194 } else {
2195 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
2196 O << " ";
2198 if (I == 0)
2199 continue;
2200 O << "/";
2202 }
2203 }
2204}
2205#endif
2206
2208 assert(!State.Lane && "Reduction being replicated.");
2209 Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2210 RecurKind Kind = RdxDesc.getRecurrenceKind();
2211 // Propagate the fast-math flags carried by the underlying instruction.
2213 State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2215 Value *NewVecOp = State.get(getVecOp());
2216 if (VPValue *Cond = getCondOp()) {
2217 Value *NewCond = State.get(Cond, State.VF.isScalar());
2218 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
2219 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
2220
2221 Value *Start;
2223 Start = RdxDesc.getRecurrenceStartValue();
2224 else
2225 Start = llvm::getRecurrenceIdentity(Kind, ElementTy,
2226 RdxDesc.getFastMathFlags());
2227 if (State.VF.isVector())
2228 Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
2229
2230 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
2231 NewVecOp = Select;
2232 }
2233 Value *NewRed;
2234 Value *NextInChain;
2235 if (IsOrdered) {
2236 if (State.VF.isVector())
2237 NewRed =
2238 createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);
2239 else
2240 NewRed = State.Builder.CreateBinOp(
2241 (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
2242 PrevInChain = NewRed;
2243 NextInChain = NewRed;
2244 } else {
2245 PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2246 NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);
2248 NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
2249 NewRed, PrevInChain);
2250 else
2251 NextInChain = State.Builder.CreateBinOp(
2252 (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
2253 }
2254 State.set(this, NextInChain, /*IsScalar*/ true);
2255}
2256
2258 assert(!State.Lane && "Reduction being replicated.");
2259
2260 auto &Builder = State.Builder;
2261 // Propagate the fast-math flags carried by the underlying instruction.
2262 IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
2264 Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
2265
2266 RecurKind Kind = RdxDesc.getRecurrenceKind();
2267 Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
2268 Value *VecOp = State.get(getVecOp());
2269 Value *EVL = State.get(getEVL(), VPLane(0));
2270
2271 VectorBuilder VBuilder(Builder);
2272 VBuilder.setEVL(EVL);
2273 Value *Mask;
2274 // TODO: move the all-true mask generation into VectorBuilder.
2275 if (VPValue *CondOp = getCondOp())
2276 Mask = State.get(CondOp);
2277 else
2278 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2279 VBuilder.setMask(Mask);
2280
2281 Value *NewRed;
2282 if (isOrdered()) {
2283 NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
2284 } else {
2285 NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc);
2287 NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
2288 else
2289 NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
2290 NewRed, Prev);
2291 }
2292 State.set(this, NewRed, /*IsScalar*/ true);
2293}
2294
2296 VPCostContext &Ctx) const {
2297 RecurKind RdxKind = RdxDesc.getRecurrenceKind();
2298 Type *ElementTy = Ctx.Types.inferScalarType(this);
2299 auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
2300 unsigned Opcode = RdxDesc.getOpcode();
2301
2302 // TODO: Support any-of and in-loop reductions.
2303 assert(
2305 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2306 "Any-of reduction not implemented in VPlan-based cost model currently.");
2307 assert(
2308 (!cast<VPReductionPHIRecipe>(getOperand(0))->isInLoop() ||
2309 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2310 "In-loop reduction not implemented in VPlan-based cost model currently.");
2311
2312 assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
2313 "Inferred type and recurrence type mismatch.");
2314
2315 // Cost = Reduction cost + BinOp cost
2317 Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind);
2320 return Cost + Ctx.TTI.getMinMaxReductionCost(
2321 Id, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);
2322 }
2323
2324 return Cost + Ctx.TTI.getArithmeticReductionCost(
2325 Opcode, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);
2326}
2327
2328#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2330 VPSlotTracker &SlotTracker) const {
2331 O << Indent << "REDUCE ";
2333 O << " = ";
2335 O << " +";
2336 if (isa<FPMathOperator>(getUnderlyingInstr()))
2338 O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2340 if (isConditional()) {
2341 O << ", ";
2343 }
2344 O << ")";
2345 if (RdxDesc.IntermediateStore)
2346 O << " (with final reduction value stored in invariant address sank "
2347 "outside of loop)";
2348}
2349
2351 VPSlotTracker &SlotTracker) const {
2353 O << Indent << "REDUCE ";
2355 O << " = ";
2357 O << " +";
2358 if (isa<FPMathOperator>(getUnderlyingInstr()))
2360 O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
2362 O << ", ";
2364 if (isConditional()) {
2365 O << ", ";
2367 }
2368 O << ")";
2369 if (RdxDesc.IntermediateStore)
2370 O << " (with final reduction value stored in invariant address sank "
2371 "outside of loop)";
2372}
2373#endif
2374
2376 // Find if the recipe is used by a widened recipe via an intervening
2377 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
2378 return any_of(users(), [](const VPUser *U) {
2379 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
2380 return any_of(PredR->users(), [PredR](const VPUser *U) {
2381 return !U->usesScalars(PredR);
2382 });
2383 return false;
2384 });
2385}
2386
2388 VPCostContext &Ctx) const {
2389 Instruction *UI = cast<Instruction>(getUnderlyingValue());
2390 // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
2391 // transform, avoid computing their cost multiple times for now.
2392 Ctx.SkipCostComputation.insert(UI);
2393 return Ctx.getLegacyCost(UI, VF);
2394}
2395
2396#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2398 VPSlotTracker &SlotTracker) const {
2399 O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
2400
2401 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
2403 O << " = ";
2404 }
2405 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
2406 O << "call";
2407 printFlags(O);
2408 O << "@" << CB->getCalledFunction()->getName() << "(";
2410 O, [&O, &SlotTracker](VPValue *Op) {
2411 Op->printAsOperand(O, SlotTracker);
2412 });
2413 O << ")";
2414 } else {
2416 printFlags(O);
2418 }
2419
2420 if (shouldPack())
2421 O << " (S->V)";
2422}
2423#endif
2424
2425Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
2428 "Codegen only implemented for first lane.");
2429 switch (Opcode) {
2430 case Instruction::SExt:
2431 case Instruction::ZExt:
2432 case Instruction::Trunc: {
2433 // Note: SExt/ZExt not used yet.
2434 Value *Op = State.get(getOperand(0), VPLane(0));
2435 return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
2436 }
2437 default:
2438 llvm_unreachable("opcode not implemented yet");
2439 }
2440}
2441
2442void VPScalarCastRecipe ::execute(VPTransformState &State) {
2443 State.set(this, generate(State), VPLane(0));
2444}
2445
2446#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2447void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
2448 VPSlotTracker &SlotTracker) const {
2449 O << Indent << "SCALAR-CAST ";
2450 printAsOperand(O, SlotTracker);
2451 O << " = " << Instruction::getOpcodeName(Opcode) << " ";
2452 printOperands(O, SlotTracker);
2453 O << " to " << *ResultTy;
2454}
2455#endif
2456
2458 assert(State.Lane && "Branch on Mask works only on single instance.");
2459
2460
2461 Value *ConditionBit = nullptr;
2462 VPValue *BlockInMask = getMask();
2463 if (BlockInMask)
2464 ConditionBit = State.get(BlockInMask, *State.Lane);
2465 else // Block in mask is all-one.
2466 ConditionBit = State.Builder.getTrue();
2467
2468 // Replace the temporary unreachable terminator with a new conditional branch,
2469 // whose two destinations will be set later when they are created.
2470 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2471 assert(isa<UnreachableInst>(CurrentTerminator) &&
2472 "Expected to replace unreachable terminator with conditional branch.");
2473 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
2474 CondBr->setSuccessor(0, nullptr);
2475 ReplaceInstWithInst(CurrentTerminator, CondBr);
2476}
2477
2479 VPCostContext &Ctx) const {
2480 // The legacy cost model doesn't assign costs to branches for individual
2481 // replicate regions. Match the current behavior in the VPlan cost model for
2482 // now.
2483 return 0;
2484}
2485
2488 assert(State.Lane && "Predicated instruction PHI works per instance.");
2489 Instruction *ScalarPredInst =
2490 cast<Instruction>(State.get(getOperand(0), *State.Lane));
2491 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
2492 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
2493 assert(PredicatingBB && "Predicated block has no single predecessor.");
2494 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
2495 "operand must be VPReplicateRecipe");
2496
2497 // By current pack/unpack logic we need to generate only a single phi node: if
2498 // a vector value for the predicated instruction exists at this point it means
2499 // the instruction has vector users only, and a phi for the vector value is
2500 // needed. In this case the recipe of the predicated instruction is marked to
2501 // also do that packing, thereby "hoisting" the insert-element sequence.
2502 // Otherwise, a phi node for the scalar value is needed.
2503 if (State.hasVectorValue(getOperand(0))) {
2504 Value *VectorValue = State.get(getOperand(0));
2505 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
2506 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
2507 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
2508 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
2509 if (State.hasVectorValue(this))
2510 State.reset(this, VPhi);
2511 else
2512 State.set(this, VPhi);
2513 // NOTE: Currently we need to update the value of the operand, so the next
2514 // predicated iteration inserts its generated value in the correct vector.
2515 State.reset(getOperand(0), VPhi);
2516 } else {
2517 if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())
2518 return;
2519
2520 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
2521 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
2522 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
2523 PredicatingBB);
2524 Phi->addIncoming(ScalarPredInst, PredicatedBB);
2525 if (State.hasScalarValue(this, *State.Lane))
2526 State.reset(this, Phi, *State.Lane);
2527 else
2528 State.set(this, Phi, *State.Lane);
2529 // NOTE: Currently we need to update the value of the operand, so the next
2530 // predicated iteration inserts its generated value in the correct vector.
2531 State.reset(getOperand(0), Phi, *State.Lane);
2532 }
2533}
2534
2535#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2537 VPSlotTracker &SlotTracker) const {
2538 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
2540 O << " = ";
2542}
2543#endif
2544
2546 VPCostContext &Ctx) const {
2548 const Align Alignment =
2550 unsigned AS =
2552
2553 if (!Consecutive) {
2554 // TODO: Using the original IR may not be accurate.
2555 // Currently, ARM will use the underlying IR to calculate gather/scatter
2556 // instruction cost.
2558 assert(!Reverse &&
2559 "Inconsecutive memory access should not have the order.");
2560 return Ctx.TTI.getAddressComputationCost(Ty) +
2562 IsMasked, Alignment, Ctx.CostKind,
2563 &Ingredient);
2564 }
2565
2567 if (IsMasked) {
2568 Cost += Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
2569 AS, Ctx.CostKind);
2570 } else {
2571 TTI::OperandValueInfo OpInfo =
2573 Cost += Ctx.TTI.getMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment, AS,
2574 Ctx.CostKind, OpInfo, &Ingredient);
2575 }
2576 if (!Reverse)
2577 return Cost;
2578
2579 return Cost +=
2581 cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
2582}
2583
2585 auto *LI = cast<LoadInst>(&Ingredient);
2586
2587 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2588 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2589 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2590 bool CreateGather = !isConsecutive();
2591
2592 auto &Builder = State.Builder;
2594 Value *Mask = nullptr;
2595 if (auto *VPMask = getMask()) {
2596 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2597 // of a null all-one mask is a null mask.
2598 Mask = State.get(VPMask);
2599 if (isReverse())
2600 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2601 }
2602
2603 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
2604 Value *NewLI;
2605 if (CreateGather) {
2606 NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
2607 "wide.masked.gather");
2608 } else if (Mask) {
2609 NewLI =
2610 Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2611 PoisonValue::get(DataTy), "wide.masked.load");
2612 } else {
2613 NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
2614 }
2615 // Add metadata to the load, but setVectorValue to the reverse shuffle.
2616 State.addMetadata(NewLI, LI);
2617 if (Reverse)
2618 NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
2619 State.set(this, NewLI);
2620}
2621
2622#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2624 VPSlotTracker &SlotTracker) const {
2625 O << Indent << "WIDEN ";
2627 O << " = load ";
2629}
2630#endif
2631
2632/// Use all-true mask for reverse rather than actual mask, as it avoids a
2633/// dependence w/o affecting the result.
2635 Value *EVL, const Twine &Name) {
2636 VectorType *ValTy = cast<VectorType>(Operand->getType());
2637 Value *AllTrueMask =
2638 Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
2639 return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
2640 {Operand, AllTrueMask, EVL}, nullptr, Name);
2641}
2642
2644 auto *LI = cast<LoadInst>(&Ingredient);
2645
2646 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
2647 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
2648 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2649 bool CreateGather = !isConsecutive();
2650
2651 auto &Builder = State.Builder;
2653 CallInst *NewLI;
2654 Value *EVL = State.get(getEVL(), VPLane(0));
2655 Value *Addr = State.get(getAddr(), !CreateGather);
2656 Value *Mask = nullptr;
2657 if (VPValue *VPMask = getMask()) {
2658 Mask = State.get(VPMask);
2659 if (isReverse())
2660 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2661 } else {
2662 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2663 }
2664
2665 if (CreateGather) {
2666 NewLI =
2667 Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
2668 nullptr, "wide.masked.gather");
2669 } else {
2670 VectorBuilder VBuilder(Builder);
2671 VBuilder.setEVL(EVL).setMask(Mask);
2672 NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
2673 Instruction::Load, DataTy, Addr, "vp.op.load"));
2674 }
2675 NewLI->addParamAttr(
2676 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2677 State.addMetadata(NewLI, LI);
2678 Instruction *Res = NewLI;
2679 if (isReverse())
2680 Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
2681 State.set(this, Res);
2682}
2683
2685 VPCostContext &Ctx) const {
2686 if (!Consecutive || IsMasked)
2687 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2688
2689 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2690 // here because the EVL recipes using EVL to replace the tail mask. But in the
2691 // legacy model, it will always calculate the cost of mask.
2692 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2693 // don't need to compare to the legacy cost model.
2695 const Align Alignment =
2697 unsigned AS =
2700 Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);
2701 if (!Reverse)
2702 return Cost;
2703
2705 cast<VectorType>(Ty), {}, Ctx.CostKind,
2706 0);
2707}
2708
2709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2711 VPSlotTracker &SlotTracker) const {
2712 O << Indent << "WIDEN ";
2714 O << " = vp.load ";
2716}
2717#endif
2718
2720 auto *SI = cast<StoreInst>(&Ingredient);
2721
2722 VPValue *StoredVPValue = getStoredValue();
2723 bool CreateScatter = !isConsecutive();
2724 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2725
2726 auto &Builder = State.Builder;
2728
2729 Value *Mask = nullptr;
2730 if (auto *VPMask = getMask()) {
2731 // Mask reversal is only needed for non-all-one (null) masks, as reverse
2732 // of a null all-one mask is a null mask.
2733 Mask = State.get(VPMask);
2734 if (isReverse())
2735 Mask = Builder.CreateVectorReverse(Mask, "reverse");
2736 }
2737
2738 Value *StoredVal = State.get(StoredVPValue);
2739 if (isReverse()) {
2740 // If we store to reverse consecutive memory locations, then we need
2741 // to reverse the order of elements in the stored value.
2742 StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
2743 // We don't want to update the value in the map as it might be used in
2744 // another expression. So don't call resetVectorValue(StoredVal).
2745 }
2746 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
2747 Instruction *NewSI = nullptr;
2748 if (CreateScatter)
2749 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
2750 else if (Mask)
2751 NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2752 else
2753 NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2754 State.addMetadata(NewSI, SI);
2755}
2756
2757#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2759 VPSlotTracker &SlotTracker) const {
2760 O << Indent << "WIDEN store ";
2762}
2763#endif
2764
2766 auto *SI = cast<StoreInst>(&Ingredient);
2767
2768 VPValue *StoredValue = getStoredValue();
2769 bool CreateScatter = !isConsecutive();
2770 const Align Alignment = getLoadStoreAlignment(&Ingredient);
2771
2772 auto &Builder = State.Builder;
2774
2775 CallInst *NewSI = nullptr;
2776 Value *StoredVal = State.get(StoredValue);
2777 Value *EVL = State.get(getEVL(), VPLane(0));
2778 if (isReverse())
2779 StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
2780 Value *Mask = nullptr;
2781 if (VPValue *VPMask = getMask()) {
2782 Mask = State.get(VPMask);
2783 if (isReverse())
2784 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
2785 } else {
2786 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2787 }
2788 Value *Addr = State.get(getAddr(), !CreateScatter);
2789 if (CreateScatter) {
2790 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
2791 Intrinsic::vp_scatter,
2792 {StoredVal, Addr, Mask, EVL});
2793 } else {
2794 VectorBuilder VBuilder(Builder);
2795 VBuilder.setEVL(EVL).setMask(Mask);
2796 NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2797 Instruction::Store, Type::getVoidTy(EVL->getContext()),
2798 {StoredVal, Addr}));
2799 }
2800 NewSI->addParamAttr(
2801 1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2802 State.addMetadata(NewSI, SI);
2803}
2804
2806 VPCostContext &Ctx) const {
2807 if (!Consecutive || IsMasked)
2808 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
2809
2810 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
2811 // here because the EVL recipes using EVL to replace the tail mask. But in the
2812 // legacy model, it will always calculate the cost of mask.
2813 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
2814 // don't need to compare to the legacy cost model.
2816 const Align Alignment =
2818 unsigned AS =
2821 Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);
2822 if (!Reverse)
2823 return Cost;
2824
2826 cast<VectorType>(Ty), {}, Ctx.CostKind,
2827 0);
2828}
2829
2830#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2832 VPSlotTracker &SlotTracker) const {
2833 O << Indent << "WIDEN vp.store ";
2835}
2836#endif
2837
2839 VectorType *DstVTy, const DataLayout &DL) {
2840 // Verify that V is a vector type with same number of elements as DstVTy.
2841 auto VF = DstVTy->getElementCount();
2842 auto *SrcVecTy = cast<VectorType>(V->getType());
2843 assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
2844 Type *SrcElemTy = SrcVecTy->getElementType();
2845 Type *DstElemTy = DstVTy->getElementType();
2846 assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
2847 "Vector elements must have same size");
2848
2849 // Do a direct cast if element types are castable.
2850 if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
2851 return Builder.CreateBitOrPointerCast(V, DstVTy);
2852 }
2853 // V cannot be directly casted to desired vector type.
2854 // May happen when V is a floating point vector but DstVTy is a vector of
2855 // pointers or vice-versa. Handle this using a two-step bitcast using an
2856 // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
2857 assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
2858 "Only one type should be a pointer type");
2859 assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
2860 "Only one type should be a floating point type");
2861 Type *IntTy =
2862 IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
2863 auto *VecIntTy = VectorType::get(IntTy, VF);
2864 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
2865 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
2866}
2867
2868/// Return a vector containing interleaved elements from multiple
2869/// smaller input vectors.
2871 const Twine &Name) {
2872 unsigned Factor = Vals.size();
2873 assert(Factor > 1 && "Tried to interleave invalid number of vectors");
2874
2875 VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
2876#ifndef NDEBUG
2877 for (Value *Val : Vals)
2878 assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
2879#endif
2880
2881 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2882 // must use intrinsics to interleave.
2883 if (VecTy->isScalableTy()) {
2885 return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
2886 Vals,
2887 /*FMFSource=*/nullptr, Name);
2888 }
2889
2890 // Fixed length. Start by concatenating all vectors into a wide vector.
2891 Value *WideVec = concatenateVectors(Builder, Vals);
2892
2893 // Interleave the elements into the wide vector.
2894 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
2895 return Builder.CreateShuffleVector(
2896 WideVec, createInterleaveMask(NumElts, Factor), Name);
2897}
2898
2899// Try to vectorize the interleave group that \p Instr belongs to.
2900//
2901// E.g. Translate following interleaved load group (factor = 3):
2902// for (i = 0; i < N; i+=3) {
2903// R = Pic[i]; // Member of index 0
2904// G = Pic[i+1]; // Member of index 1
2905// B = Pic[i+2]; // Member of index 2
2906// ... // do something to R, G, B
2907// }
2908// To:
2909// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
2910// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
2911// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
2912// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
2913//
2914// Or translate following interleaved store group (factor = 3):
2915// for (i = 0; i < N; i+=3) {
2916// ... do something to R, G, B
2917// Pic[i] = R; // Member of index 0
2918// Pic[i+1] = G; // Member of index 1
2919// Pic[i+2] = B; // Member of index 2
2920// }
2921// To:
2922// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
2923// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
2924// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
2925// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
2926// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
2928 assert(!State.Lane && "Interleave group being replicated.");
2929 const InterleaveGroup<Instruction> *Group = IG;
2930 Instruction *Instr = Group->getInsertPos();
2931
2932 // Prepare for the vector type of the interleaved load/store.
2933 Type *ScalarTy = getLoadStoreType(Instr);
2934 unsigned InterleaveFactor = Group->getFactor();
2935 auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
2936
2937 // TODO: extend the masked interleaved-group support to reversed access.
2938 VPValue *BlockInMask = getMask();
2939 assert((!BlockInMask || !Group->isReverse()) &&
2940 "Reversed masked interleave-group not supported.");
2941
2942 VPValue *Addr = getAddr();
2943 Value *ResAddr = State.get(Addr, VPLane(0));
2944 if (auto *I = dyn_cast<Instruction>(ResAddr))
2945 State.setDebugLocFrom(I->getDebugLoc());
2946
2947 // If the group is reverse, adjust the index to refer to the last vector lane
2948 // instead of the first. We adjust the index from the first vector lane,
2949 // rather than directly getting the pointer for lane VF - 1, because the
2950 // pointer operand of the interleaved access is supposed to be uniform.
2951 if (Group->isReverse()) {
2952 Value *RuntimeVF =
2953 getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2954 Value *Index =
2955 State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2956 Index = State.Builder.CreateMul(Index,
2957 State.Builder.getInt32(Group->getFactor()));
2958 Index = State.Builder.CreateNeg(Index);
2959
2960 bool InBounds = false;
2961 if (auto *Gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
2962 InBounds = Gep->isInBounds();
2963 ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
2964 }
2965
2966 State.setDebugLocFrom(Instr->getDebugLoc());
2967 Value *PoisonVec = PoisonValue::get(VecTy);
2968
2969 auto CreateGroupMask = [&BlockInMask, &State,
2970 &InterleaveFactor](Value *MaskForGaps) -> Value * {
2971 if (State.VF.isScalable()) {
2972 assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2973 assert(InterleaveFactor == 2 &&
2974 "Unsupported deinterleave factor for scalable vectors");
2975 auto *ResBlockInMask = State.get(BlockInMask);
2976 SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
2977 auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
2978 State.VF.getKnownMinValue() * 2, true);
2979 return State.Builder.CreateIntrinsic(
2980 MaskTy, Intrinsic::vector_interleave2, Ops,
2981 /*FMFSource=*/nullptr, "interleaved.mask");
2982 }
2983
2984 if (!BlockInMask)
2985 return MaskForGaps;
2986
2987 Value *ResBlockInMask = State.get(BlockInMask);
2988 Value *ShuffledMask = State.Builder.CreateShuffleVector(
2989 ResBlockInMask,
2990 createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
2991 "interleaved.mask");
2992 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
2993 ShuffledMask, MaskForGaps)
2994 : ShuffledMask;
2995 };
2996
2997 const DataLayout &DL = Instr->getDataLayout();
2998 // Vectorize the interleaved load group.
2999 if (isa<LoadInst>(Instr)) {
3000 Value *MaskForGaps = nullptr;
3001 if (NeedsMaskForGaps) {
3002 MaskForGaps = createBitMaskForGaps(State.Builder,
3003 State.VF.getKnownMinValue(), *Group);
3004 assert(MaskForGaps && "Mask for Gaps is required but it is null");
3005 }
3006
3007 Instruction *NewLoad;
3008 if (BlockInMask || MaskForGaps) {
3009 Value *GroupMask = CreateGroupMask(MaskForGaps);
3010 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
3011 Group->getAlign(), GroupMask,
3012 PoisonVec, "wide.masked.vec");
3013 } else
3014 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
3015 Group->getAlign(), "wide.vec");
3016 Group->addMetadata(NewLoad);
3017
3019 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3020 if (VecTy->isScalableTy()) {
3021 assert(InterleaveFactor == 2 &&
3022 "Unsupported deinterleave factor for scalable vectors");
3023
3024 // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3025 // so must use intrinsics to deinterleave.
3026 Value *DI = State.Builder.CreateIntrinsic(
3027 Intrinsic::vector_deinterleave2, VecTy, NewLoad,
3028 /*FMFSource=*/nullptr, "strided.vec");
3029 unsigned J = 0;
3030 for (unsigned I = 0; I < InterleaveFactor; ++I) {
3031 Instruction *Member = Group->getMember(I);
3032
3033 if (!Member)
3034 continue;
3035
3036 Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
3037 // If this member has different type, cast the result type.
3038 if (Member->getType() != ScalarTy) {
3039 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3040 StridedVec =
3041 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3042 }
3043
3044 if (Group->isReverse())
3045 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3046
3047 State.set(VPDefs[J], StridedVec);
3048 ++J;
3049 }
3050
3051 return;
3052 }
3053
3054 // For each member in the group, shuffle out the appropriate data from the
3055 // wide loads.
3056 unsigned J = 0;
3057 for (unsigned I = 0; I < InterleaveFactor; ++I) {
3058 Instruction *Member = Group->getMember(I);
3059
3060 // Skip the gaps in the group.
3061 if (!Member)
3062 continue;
3063
3064 auto StrideMask =
3065 createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
3066 Value *StridedVec =
3067 State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
3068
3069 // If this member has different type, cast the result type.
3070 if (Member->getType() != ScalarTy) {
3071 assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
3072 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3073 StridedVec =
3074 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3075 }
3076
3077 if (Group->isReverse())
3078 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3079
3080 State.set(VPDefs[J], StridedVec);
3081 ++J;
3082 }
3083 return;
3084 }
3085
3086 // The sub vector type for current instruction.
3087 auto *SubVT = VectorType::get(ScalarTy, State.VF);
3088
3089 // Vectorize the interleaved store group.
3090 Value *MaskForGaps =
3091 createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
3092 assert((!MaskForGaps || !State.VF.isScalable()) &&
3093 "masking gaps for scalable vectors is not yet supported.");
3094 ArrayRef<VPValue *> StoredValues = getStoredValues();
3095 // Collect the stored vector from each member.
3096 SmallVector<Value *, 4> StoredVecs;
3097 unsigned StoredIdx = 0;
3098 for (unsigned i = 0; i < InterleaveFactor; i++) {
3099 assert((Group->getMember(i) || MaskForGaps) &&
3100 "Fail to get a member from an interleaved store group");
3101 Instruction *Member = Group->getMember(i);
3102
3103 // Skip the gaps in the group.
3104 if (!Member) {
3105 Value *Undef = PoisonValue::get(SubVT);
3106 StoredVecs.push_back(Undef);
3107 continue;
3108 }
3109
3110 Value *StoredVec = State.get(StoredValues[StoredIdx]);
3111 ++StoredIdx;
3112
3113 if (Group->isReverse())
3114 StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
3115
3116 // If this member has different type, cast it to a unified type.
3117
3118 if (StoredVec->getType() != SubVT)
3119 StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3120
3121 StoredVecs.push_back(StoredVec);
3122 }
3123
3124 // Interleave all the smaller vectors into one wider vector.
3125 Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
3126 Instruction *NewStoreInstr;
3127 if (BlockInMask || MaskForGaps) {
3128 Value *GroupMask = CreateGroupMask(MaskForGaps);
3129 NewStoreInstr = State.Builder.CreateMaskedStore(
3130 IVec, ResAddr, Group->getAlign(), GroupMask);
3131 } else
3132 NewStoreInstr =
3133 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
3134
3135 Group->addMetadata(NewStoreInstr);
3136}
3137
3138#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3140 VPSlotTracker &SlotTracker) const {
3141 O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
3142 IG->getInsertPos()->printAsOperand(O, false);
3143 O << ", ";
3145 VPValue *Mask = getMask();
3146 if (Mask) {
3147 O << ", ";
3148 Mask->printAsOperand(O, SlotTracker);
3149 }
3150
3151 unsigned OpIdx = 0;
3152 for (unsigned i = 0; i < IG->getFactor(); ++i) {
3153 if (!IG->getMember(i))
3154 continue;
3155 if (getNumStoreOperands() > 0) {
3156 O << "\n" << Indent << " store ";
3157 getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
3158 O << " to index " << i;
3159 } else {
3160 O << "\n" << Indent << " ";
3162 O << " = load from index " << i;
3163 }
3164 ++OpIdx;
3165 }
3166}
3167#endif
3168
3170 VPCostContext &Ctx) const {
3171 Instruction *InsertPos = getInsertPos();
3172 // Find the VPValue index of the interleave group. We need to skip gaps.
3173 unsigned InsertPosIdx = 0;
3174 for (unsigned Idx = 0; IG->getFactor(); ++Idx)
3175 if (auto *Member = IG->getMember(Idx)) {
3176 if (Member == InsertPos)
3177 break;
3178 InsertPosIdx++;
3179 }
3180 Type *ValTy = Ctx.Types.inferScalarType(
3181 getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
3182 : getStoredValues()[InsertPosIdx]);
3183 auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
3184 unsigned AS = getLoadStoreAddressSpace(InsertPos);
3185
3186 unsigned InterleaveFactor = IG->getFactor();
3187 auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
3188
3189 // Holds the indices of existing members in the interleaved group.
3191 for (unsigned IF = 0; IF < InterleaveFactor; IF++)
3192 if (IG->getMember(IF))
3193 Indices.push_back(IF);
3194
3195 // Calculate the cost of the whole interleaved group.
3197 InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
3198 IG->getAlign(), AS, Ctx.CostKind, getMask(), NeedsMaskForGaps);
3199
3200 if (!IG->isReverse())
3201 return Cost;
3202
3203 return Cost + IG->getNumMembers() *
3205 VectorTy, std::nullopt, Ctx.CostKind,
3206 0);
3207}
3208
3209#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3211 VPSlotTracker &SlotTracker) const {
3212 O << Indent << "EMIT ";
3214 O << " = CANONICAL-INDUCTION ";
3216}
3217#endif
3218
3220 return IsScalarAfterVectorization &&
3221 (!IsScalable || vputils::onlyFirstLaneUsed(this));
3222}
3223
3225 assert(getInductionDescriptor().getKind() ==
3227 "Not a pointer induction according to InductionDescriptor!");
3228 assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
3229 "Unexpected type.");
3231 "Recipe should have been replaced");
3232
3233 unsigned CurrentPart = getUnrollPart(*this);
3234
3235 // Build a pointer phi
3236 Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
3237 Type *ScStValueType = ScalarStartValue->getType();
3238
3239 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3240 PHINode *NewPointerPhi = nullptr;
3241 if (CurrentPart == 0) {
3242 auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
3243 ->getPlan()
3244 ->getVectorLoopRegion()
3245 ->getEntryBasicBlock()
3246 ->front());
3247 PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
3248 NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",
3249 CanonicalIV->getIterator());
3250 NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
3251 NewPointerPhi->setDebugLoc(getDebugLoc());
3252 } else {
3253 // The recipe has been unrolled. In that case, fetch the single pointer phi
3254 // shared among all unrolled parts of the recipe.
3255 auto *GEP =
3256 cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
3257 NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
3258 }
3259
3260 // A pointer induction, performed by using a gep
3261 BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
3262 Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
3263 Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3264 Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3265 // Add induction update using an incorrect block temporarily. The phi node
3266 // will be fixed after VPlan execution. Note that at this point the latch
3267 // block cannot be used, as it does not exist yet.
3268 // TODO: Model increment value in VPlan, by turning the recipe into a
3269 // multi-def and a subclass of VPHeaderPHIRecipe.
3270 if (CurrentPart == 0) {
3271 // The recipe represents the first part of the pointer induction. Create the
3272 // GEP to increment the phi across all unrolled parts.
3273 unsigned UF = CurrentPart == 0 ? getParent()->getPlan()->getUF() : 1;
3274 Value *NumUnrolledElems =
3275 State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, UF));
3276
3277 Value *InductionGEP = GetElementPtrInst::Create(
3278 State.Builder.getInt8Ty(), NewPointerPhi,
3279 State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
3280 InductionLoc);
3281
3282 NewPointerPhi->addIncoming(InductionGEP, VectorPH);
3283 }
3284
3285 // Create actual address geps that use the pointer phi as base and a
3286 // vectorized version of the step value (<step*0, ..., step*N>) as offset.
3287 Type *VecPhiType = VectorType::get(PhiType, State.VF);
3288 Value *StartOffsetScalar = State.Builder.CreateMul(
3289 RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
3290 Value *StartOffset =
3291 State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
3292 // Create a vector of consecutive numbers from zero to VF.
3293 StartOffset = State.Builder.CreateAdd(
3294 StartOffset, State.Builder.CreateStepVector(VecPhiType));
3295
3296 assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
3297 "scalar step must be the same across all parts");
3298 Value *GEP = State.Builder.CreateGEP(
3299 State.Builder.getInt8Ty(), NewPointerPhi,
3300 State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
3301 State.VF, ScalarStepValue)),
3302 "vector.gep");
3303 State.set(this, GEP);
3304}
3305
3306#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3308 VPSlotTracker &SlotTracker) const {
3309 assert((getNumOperands() == 2 || getNumOperands() == 4) &&
3310 "unexpected number of operands");
3311 O << Indent << "EMIT ";
3313 O << " = WIDEN-POINTER-INDUCTION ";
3315 O << ", ";
3317 if (getNumOperands() == 4) {
3318 O << ", ";
3320 O << ", ";
3322 }
3323}
3324#endif
3325
3327 assert(!State.Lane && "cannot be used in per-lane");
3328 if (State.ExpandedSCEVs.contains(Expr)) {
3329 // SCEV Expr has already been expanded, result must already be set. At the
3330 // moment we have to execute the entry block twice (once before skeleton
3331 // creation to get expanded SCEVs used by the skeleton and once during
3332 // regular VPlan execution).
3334 assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
3335 "Results must match");
3336 return;
3337 }
3338
3339 const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
3340 SCEVExpander Exp(SE, DL, "induction");
3341
3342 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
3343 &*State.Builder.GetInsertPoint());
3344 State.ExpandedSCEVs[Expr] = Res;
3345 State.set(this, Res, VPLane(0));
3346}
3347
3348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3350 VPSlotTracker &SlotTracker) const {
3351 O << Indent << "EMIT ";
3353 O << " = EXPAND SCEV " << *Expr;
3354}
3355#endif
3356
3358 Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);
3359 Type *STy = CanonicalIV->getType();
3360 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
3361 ElementCount VF = State.VF;
3362 Value *VStart = VF.isScalar()
3363 ? CanonicalIV
3364 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
3365 Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
3366 if (VF.isVector()) {
3367 VStep = Builder.CreateVectorSplat(VF, VStep);
3368 VStep =
3369 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
3370 }
3371 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
3372 State.set(this, CanonicalVectorIV);
3373}
3374
3375#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3377 VPSlotTracker &SlotTracker) const {
3378 O << Indent << "EMIT ";
3380 O << " = WIDEN-CANONICAL-INDUCTION ";
3382}
3383#endif
3384
3386 auto &Builder = State.Builder;
3387 // Create a vector from the initial value.
3388 auto *VectorInit = getStartValue()->getLiveInIRValue();
3389
3390 Type *VecTy = State.VF.isScalar()
3391 ? VectorInit->getType()
3392 : VectorType::get(VectorInit->getType(), State.VF);
3393
3394 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3395 if (State.VF.isVector()) {
3396 auto *IdxTy = Builder.getInt32Ty();
3397 auto *One = ConstantInt::get(IdxTy, 1);
3398 IRBuilder<>::InsertPointGuard Guard(Builder);
3399 Builder.SetInsertPoint(VectorPH->getTerminator());
3400 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
3401 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
3402 VectorInit = Builder.CreateInsertElement(
3403 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
3404 }
3405
3406 // Create a phi node for the new recurrence.
3407 PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
3408 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
3409 Phi->addIncoming(VectorInit, VectorPH);
3410 State.set(this, Phi);
3411}
3412
3415 VPCostContext &Ctx) const {
3416 if (VF.isScalar())
3417 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
3418
3419 if (VF.isScalable() && VF.getKnownMinValue() == 1)
3421
3423 std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
3424 Type *VectorTy =
3425 toVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
3426
3428 cast<VectorType>(VectorTy), Mask, Ctx.CostKind,
3429 VF.getKnownMinValue() - 1);
3430}
3431
3432#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3434 VPSlotTracker &SlotTracker) const {
3435 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
3437 O << " = phi ";
3439}
3440#endif
3441
3443 auto &Builder = State.Builder;
3444
3445 // If this phi is fed by a scaled reduction then it should output a
3446 // vector with fewer elements than the VF.
3447 ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor);
3448
3449 // Reductions do not have to start at zero. They can start with
3450 // any loop invariant values.
3451 VPValue *StartVPV = getStartValue();
3452 Value *StartV = StartVPV->getLiveInIRValue();
3453
3454 // In order to support recurrences we need to be able to vectorize Phi nodes.
3455 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
3456 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
3457 // this value when we vectorize all of the instructions that use the PHI.
3458 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
3459 Type *VecTy =
3460 ScalarPHI ? StartV->getType() : VectorType::get(StartV->getType(), VF);
3461
3462 BasicBlock *HeaderBB = State.CFG.PrevBB;
3463 assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
3464 "recipe must be in the vector loop header");
3465 auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
3466 Phi->insertBefore(HeaderBB->getFirstInsertionPt());
3467 State.set(this, Phi, IsInLoop);
3468
3469 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3470
3471 Value *Iden = nullptr;
3472 RecurKind RK = RdxDesc.getRecurrenceKind();
3473 unsigned CurrentPart = getUnrollPart(*this);
3474
3477 // MinMax and AnyOf reductions have the start value as their identity.
3478 if (ScalarPHI) {
3479 Iden = StartV;
3480 } else {
3481 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3482 Builder.SetInsertPoint(VectorPH->getTerminator());
3483 StartV = Iden = State.get(StartVPV);
3484 }
3486 // [I|F]FindLastIV will use a sentinel value to initialize the reduction
3487 // phi or the resume value from the main vector loop when vectorizing the
3488 // epilogue loop. In the exit block, ComputeReductionResult will generate
3489 // checks to verify if the reduction result is the sentinel value. If the
3490 // result is the sentinel value, it will be corrected back to the start
3491 // value.
3492 // TODO: The sentinel value is not always necessary. When the start value is
3493 // a constant, and smaller than the start value of the induction variable,
3494 // the start value can be directly used to initialize the reduction phi.
3495 Iden = StartV;
3496 if (!ScalarPHI) {
3497 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3498 Builder.SetInsertPoint(VectorPH->getTerminator());
3499 StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
3500 }
3501 } else {
3502 Iden = llvm::getRecurrenceIdentity(RK, VecTy->getScalarType(),
3503 RdxDesc.getFastMathFlags());
3504
3505 if (!ScalarPHI) {
3506 if (CurrentPart == 0) {
3507 // Create start and identity vector values for the reduction in the
3508 // preheader.
3509 // TODO: Introduce recipes in VPlan preheader to create initial values.
3510 Iden = Builder.CreateVectorSplat(VF, Iden);
3511 IRBuilderBase::InsertPointGuard IPBuilder(Builder);
3512 Builder.SetInsertPoint(VectorPH->getTerminator());
3513 Constant *Zero = Builder.getInt32(0);
3514 StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
3515 } else {
3516 Iden = Builder.CreateVectorSplat(VF, Iden);
3517 }
3518 }
3519 }
3520
3521 Phi = cast<PHINode>(State.get(this, IsInLoop));
3522 Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
3523 Phi->addIncoming(StartVal, VectorPH);
3524}
3525
3526#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3528 VPSlotTracker &SlotTracker) const {
3529 O << Indent << "WIDEN-REDUCTION-PHI ";
3530
3532 O << " = phi ";
3534 if (VFScaleFactor != 1)
3535 O << " (VF scaled by 1/" << VFScaleFactor << ")";
3536}
3537#endif
3538
3541 "Non-native vplans are not expected to have VPWidenPHIRecipes.");
3542
3544 Value *Op0 = State.get(getOperand(0));
3545 Type *VecTy = Op0->getType();
3546 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
3547 State.set(this, VecPhi);
3548}
3549
3550#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3552 VPSlotTracker &SlotTracker) const {
3553 O << Indent << "WIDEN-PHI ";
3554
3555 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
3556 // Unless all incoming values are modeled in VPlan print the original PHI
3557 // directly.
3558 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
3559 // values as VPValues.
3560 if (getNumOperands() != OriginalPhi->getNumOperands()) {
3561 O << VPlanIngredient(OriginalPhi);
3562 return;
3563 }
3564
3566 O << " = phi ";
3568}
3569#endif
3570
3571// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3572// remove VPActiveLaneMaskPHIRecipe.
3574 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3575 Value *StartMask = State.get(getOperand(0));
3576 PHINode *Phi =
3577 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
3578 Phi->addIncoming(StartMask, VectorPH);
3579 Phi->setDebugLoc(getDebugLoc());
3580 State.set(this, Phi);
3581}
3582
3583#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3585 VPSlotTracker &SlotTracker) const {
3586 O << Indent << "ACTIVE-LANE-MASK-PHI ";
3587
3589 O << " = phi ";
3591}
3592#endif
3593
3594#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3596 VPSlotTracker &SlotTracker) const {
3597 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
3598
3600 O << " = phi ";
3602}
3603#endif
3604
3606 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
3607 Value *Start = State.get(getStartValue(), VPLane(0));
3608 PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, Name);
3609 Phi->addIncoming(Start, VectorPH);
3610 Phi->setDebugLoc(getDebugLoc());
3611 State.set(this, Phi, /*IsScalar=*/true);
3612}
3613
3614#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3616 VPSlotTracker &SlotTracker) const {
3617 O << Indent << "SCALAR-PHI ";
3619 O << " = phi ";
3621}
3622#endif
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
Hexagon Common GEP
This file provides a LoopVectorizationPlanner class.
cl::opt< unsigned > ForceTargetInstructionCost("force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " "useful for getting consistent testing."))
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This file contains the declarations of different VPlan-related auxiliary helpers.
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
cl::opt< unsigned > ForceTargetInstructionCost
static Value * getStepVector(Value *Val, Value *Step, Instruction::BinaryOps BinOp, ElementCount VF, IRBuilderBase &Builder)
This function adds (0 * Step, 1 * Step, 2 * Step, ...) to each vector element of Val.
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, unsigned CurrentPart, IRBuilderBase &Builder)
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:437
InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:381
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:481
const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:296
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:240
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:292
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1494
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
static StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:326
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
void setAllowContract(bool B=true)
Definition: FMF.h:91
bool noSignedZeros() const
Definition: FMF.h:68
bool noInfs() const
Definition: FMF.h:67
void setAllowReciprocal(bool B=true)
Definition: FMF.h:88
bool allowReciprocal() const
Definition: FMF.h:69
void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:271
void setNoSignedZeros(bool B=true)
Definition: FMF.h:85
bool allowReassoc() const
Flag queries.
Definition: FMF.h:65
bool approxFunc() const
Definition: FMF.h:71
void setNoNaNs(bool B=true)
Definition: FMF.h:79
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:76
bool noNaNs() const
Definition: FMF.h:66
void setApproxFunc(bool B=true)
Definition: FMF.h:94
void setNoInfs(bool B=true)
Definition: FMF.h:82
bool allowContract() const
Definition: FMF.h:70
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
bool willReturn() const
Determine if the function will return.
Definition: Function.h:674
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:607
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool hasNoUnsignedSignedWrap() const
bool hasNoUnsignedWrap() const
bool isInBounds() const
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:480
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2511
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:530
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2106
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2499
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1815
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2051
Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1135
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1163
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:485
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1053
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:194
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2574
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1987
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2186
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2093
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:330
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition: IRBuilder.cpp:1119
Value * CreateFCmpFMF(CmpInst::Predicate P, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2398
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1874
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1733
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:424
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:296
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2234
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2404
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2435
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2270
Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")
Create a call to llvm.experimental_cttz_elts.
Definition: IRBuilder.h:1101
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1164
Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:968
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2533
LLVMContext & getContext() const
Definition: IRBuilder.h:195
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1370
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:588
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1671
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1688
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:308
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1834
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2380
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1614
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:108
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1404
CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
Definition: IRBuilder.cpp:627
CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
Definition: IRBuilder.cpp:596
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
A struct for saving information about induction variables.
@ IK_PtrInduction
Pointer induction var. Step = C.
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InstructionCost getInvalid(CostType Val=0)
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:99
bool isBinaryOp() const
Definition: Instruction.h:315
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
const char * getOpcodeName() const
Definition: Instruction.h:312
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:310
bool isUnaryOp() const
Definition: Instruction.h:314
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:508
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:488
uint32_t getFactor() const
Definition: VectorUtils.h:504
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:558
bool isReverse() const
Definition: VectorUtils.h:503
InstTy * getInsertPos() const
Definition: VectorUtils.h:574
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:505
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
BlockT * getHeader() const
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
FastMathFlags getFastMathFlags() const
static unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Type * getRecurrenceType() const
Returns the type of the recurrence.
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
bool isSigned() const
Returns true if all source operands of the recurrence are SExtInsts.
RecurKind getRecurrenceKind() const
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class uses information about analyze scalars to rewrite expressions in canonical form.
Type * getType() const
Return the LLVM type of this SCEV expression.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
@ TCC_Free
Expected to fold away in lowering.
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, PartialReductionExtendKind OpAExtend, PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp=std::nullopt) const
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Reverse
Reverse the order of the vector.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
value_op_iterator value_op_end()
Definition: User.h:309
Value * getOperand(unsigned i) const
Definition: User.h:228
value_op_iterator value_op_begin()
Definition: User.h:306
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3200
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3253
iterator end()
Definition: VPlan.h:3237
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3266
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2187
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2192
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2182
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2178
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:78
VPRegionBlock * getParent()
Definition: VPlan.h:170
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:180
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:201
VPlan * getPlan()
Definition: VPlan.cpp:155
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:160
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2554
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:298
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:116
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:421
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:416
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:394
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:406
unsigned getVPDefID() const
Definition: VPlanValue.h:426
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3130
VPValue * getStartValue() const
Definition: VPlan.h:3129
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1729
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1471
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Instruction & getInstruction() const
Definition: VPlan.h:1060
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void extractLastLaneOfOperand(VPBuilder &Builder)
Update the recipes single operand to the last lane of the operand using Builder.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:863
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:851
@ CanonicalIVIncrementForPart
Definition: VPlan.h:866
@ ComputeReductionResult
Definition: VPlan.h:869
@ CalculateTripCountMinusVF
Definition: VPlan.h:864
bool hasResult() const
Definition: VPlan.h:989
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:966
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2266
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2272
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2279
Instruction * getInsertPos() const
Definition: VPlan.h:2314
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2303
static bool isVPIntrinsic(Intrinsic::ID)
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlanHelpers.h:116
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlanHelpers.h:157
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlanHelpers.h:143
static VPLane getFirstLane()
Definition: VPlanHelpers.h:141
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition: VPlan.h:2147
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:366
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
Definition: VPlan.h:391
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:460
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:577
ExactFlagsTy ExactFlags
Definition: VPlan.h:627
FastMathFlagsTy FMFs
Definition: VPlan.h:630
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:629
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:798
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:759
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:801
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:626
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:628
WrapFlagsTy WrapFlags
Definition: VPlan.h:625
bool hasNoUnsignedWrap() const
Definition: VPlan.h:805
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:792
bool hasNoSignedWrap() const
Definition: VPlan.h:811
FastMathFlags getFastMathFlags() const
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2427
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2385
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2389
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2379
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2391
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2383
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2387
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3377
const VPBlockBase * getEntry() const
Definition: VPlan.h:3413
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
unsigned getOpcode() const
Definition: VPlan.h:2514
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3187
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:563
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
This class can be used to assign names to VPValues.
Definition: VPlanHelpers.h:389
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Definition: VPlanAnalysis.h:65
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:206
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1474
operand_range operands()
Definition: VPlanValue.h:263
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:248
unsigned getNumOperands() const
Definition: VPlanValue.h:242
operand_iterator op_begin()
Definition: VPlanValue.h:259
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:243
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanValue.h:278
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1435
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:125
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1470
friend class VPInstruction
Definition: VPlanValue.h:50
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:144
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:89
user_iterator user_begin()
Definition: VPlanValue.h:134
unsigned getNumUsers() const
Definition: VPlanValue.h:117
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:178
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:173
user_range users()
Definition: VPlanValue.h:138
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Function * getCalledScalarFunction() const
Definition: VPlan.h:1419
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1423
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1242
void execute(VPTransformState &State) override
Produce widened copies of the cast.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1170
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
PHINode * getPHINode() const
Definition: VPlan.h:1785
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1782
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1788
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1860
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1869
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1362
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2625
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2622
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2661
Instruction & Ingredient
Definition: VPlan.h:2616
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2619
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2675
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2668
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2665
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:1914
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:1136
unsigned getUF() const
Definition: VPlan.h:3685
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1094
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
VectorBuilder & setEVL(Value *NewExplicitVectorLength)
Definition: VectorBuilder.h:82
VectorBuilder & setMask(Value *NewMask)
Definition: VectorBuilder.h:78
Value * createVectorInstruction(unsigned Opcode, Type *ReturnTy, ArrayRef< Value * > VecOpArray, const Twine &Name=Twine())
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Definition: DerivedTypes.h:541
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:732
StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition: Intrinsics.cpp:42
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlanUtils.h:41
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlanUtils.cpp:21
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:16
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition: LoopUtils.cpp:1278
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:989
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2207
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1076
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:54
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isPointerTy(const Type *T)
Definition: SPIRVUtils.h:256
Value * createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence descriptor Desc.
Definition: LoopUtils.cpp:1341
Value * createReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi=nullptr)
Create a generic reduction using a recurrence descriptor Desc Fast-math-flags are propagated using th...
Definition: LoopUtils.cpp:1323
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:33
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ Add
Sum of integers.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition: LoopUtils.cpp:1270
DWARFExpression::Operation Op
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
InstructionCost Cost
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Struct to hold various analysis needed for cost computations.
Definition: VPlanHelpers.h:356
LLVMContext & LLVMCtx
Definition: VPlanHelpers.h:360
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1634
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
TargetTransformInfo::TargetCostKind CostKind
Definition: VPlanHelpers.h:363
VPTypeAnalysis Types
Definition: VPlanHelpers.h:359
const TargetLibraryInfo & TLI
Definition: VPlanHelpers.h:358
const TargetTransformInfo & TTI
Definition: VPlanHelpers.h:357
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlanHelpers.h:362
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlanHelpers.h:304
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlanHelpers.h:312
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:349
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlanHelpers.h:196
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlanHelpers.h:229
bool hasVectorValue(VPValue *Def)
Definition: VPlanHelpers.h:227
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlanHelpers.h:349
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlanHelpers.h:352
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:362
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:251
struct llvm::VPTransformState::CFGState CFG
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlanHelpers.h:210
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlanHelpers.h:329
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlanHelpers.h:202
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlanHelpers.h:250
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlanHelpers.h:205
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:373
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
Definition: VPlanHelpers.h:338
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlanHelpers.h:239
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2745
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1514
VPValue * getCond() const
Definition: VPlan.h:1510
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:2824
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2827
void execute(VPTransformState &State) override
Generate a wide store or scatter.
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:2789
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.