LLVM 22.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanHelpers.h"
18#include "VPlanPatternMatch.h"
19#include "VPlanUtils.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Twine.h"
26#include "llvm/IR/BasicBlock.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/Type.h"
32#include "llvm/IR/Value.h"
35#include "llvm/Support/Debug.h"
40#include <cassert>
41
42using namespace llvm;
43
45
46#define LV_NAME "loop-vectorize"
47#define DEBUG_TYPE LV_NAME
48
50 switch (getVPDefID()) {
51 case VPExpressionSC:
52 return cast<VPExpressionRecipe>(this)->mayReadOrWriteMemory();
53 case VPInstructionSC:
54 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
55 case VPInterleaveSC:
56 return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
57 case VPWidenStoreEVLSC:
58 case VPWidenStoreSC:
59 return true;
60 case VPReplicateSC:
61 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
62 ->mayWriteToMemory();
63 case VPWidenCallSC:
64 return !cast<VPWidenCallRecipe>(this)
65 ->getCalledScalarFunction()
66 ->onlyReadsMemory();
67 case VPWidenIntrinsicSC:
68 return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
69 case VPCanonicalIVPHISC:
70 case VPBranchOnMaskSC:
71 case VPFirstOrderRecurrencePHISC:
72 case VPReductionPHISC:
73 case VPScalarIVStepsSC:
74 case VPPredInstPHISC:
75 return false;
76 case VPBlendSC:
77 case VPReductionEVLSC:
78 case VPReductionSC:
79 case VPVectorPointerSC:
80 case VPWidenCanonicalIVSC:
81 case VPWidenCastSC:
82 case VPWidenGEPSC:
83 case VPWidenIntOrFpInductionSC:
84 case VPWidenLoadEVLSC:
85 case VPWidenLoadSC:
86 case VPWidenPHISC:
87 case VPWidenSC:
88 case VPWidenSelectSC: {
89 const Instruction *I =
90 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
91 (void)I;
92 assert((!I || !I->mayWriteToMemory()) &&
93 "underlying instruction may write to memory");
94 return false;
95 }
96 default:
97 return true;
98 }
99}
100
102 switch (getVPDefID()) {
103 case VPExpressionSC:
104 return cast<VPExpressionRecipe>(this)->mayReadOrWriteMemory();
105 case VPInstructionSC:
106 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
107 case VPWidenLoadEVLSC:
108 case VPWidenLoadSC:
109 return true;
110 case VPReplicateSC:
111 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
112 ->mayReadFromMemory();
113 case VPWidenCallSC:
114 return !cast<VPWidenCallRecipe>(this)
115 ->getCalledScalarFunction()
116 ->onlyWritesMemory();
117 case VPWidenIntrinsicSC:
118 return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
119 case VPBranchOnMaskSC:
120 case VPFirstOrderRecurrencePHISC:
121 case VPPredInstPHISC:
122 case VPScalarIVStepsSC:
123 case VPWidenStoreEVLSC:
124 case VPWidenStoreSC:
125 return false;
126 case VPBlendSC:
127 case VPReductionEVLSC:
128 case VPReductionSC:
129 case VPVectorPointerSC:
130 case VPWidenCanonicalIVSC:
131 case VPWidenCastSC:
132 case VPWidenGEPSC:
133 case VPWidenIntOrFpInductionSC:
134 case VPWidenPHISC:
135 case VPWidenSC:
136 case VPWidenSelectSC: {
137 const Instruction *I =
138 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
139 (void)I;
140 assert((!I || !I->mayReadFromMemory()) &&
141 "underlying instruction may read from memory");
142 return false;
143 }
144 default:
145 return true;
146 }
147}
148
150 switch (getVPDefID()) {
151 case VPExpressionSC:
152 return cast<VPExpressionRecipe>(this)->mayHaveSideEffects();
153 case VPDerivedIVSC:
154 case VPFirstOrderRecurrencePHISC:
155 case VPPredInstPHISC:
156 case VPVectorEndPointerSC:
157 return false;
158 case VPInstructionSC:
159 return mayWriteToMemory();
160 case VPWidenCallSC: {
161 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
162 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
163 }
164 case VPWidenIntrinsicSC:
165 return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
166 case VPBlendSC:
167 case VPReductionEVLSC:
168 case VPReductionSC:
169 case VPScalarIVStepsSC:
170 case VPVectorPointerSC:
171 case VPWidenCanonicalIVSC:
172 case VPWidenCastSC:
173 case VPWidenGEPSC:
174 case VPWidenIntOrFpInductionSC:
175 case VPWidenPHISC:
176 case VPWidenPointerInductionSC:
177 case VPWidenSC:
178 case VPWidenSelectSC: {
179 const Instruction *I =
180 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
181 (void)I;
182 assert((!I || !I->mayHaveSideEffects()) &&
183 "underlying instruction has side-effects");
184 return false;
185 }
186 case VPInterleaveSC:
187 return mayWriteToMemory();
188 case VPWidenLoadEVLSC:
189 case VPWidenLoadSC:
190 case VPWidenStoreEVLSC:
191 case VPWidenStoreSC:
192 assert(
193 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
195 "mayHaveSideffects result for ingredient differs from this "
196 "implementation");
197 return mayWriteToMemory();
198 case VPReplicateSC: {
199 auto *R = cast<VPReplicateRecipe>(this);
200 return R->getUnderlyingInstr()->mayHaveSideEffects();
201 }
202 default:
203 return true;
204 }
205}
206
208 assert(!Parent && "Recipe already in some VPBasicBlock");
209 assert(InsertPos->getParent() &&
210 "Insertion position not in any VPBasicBlock");
211 InsertPos->getParent()->insert(this, InsertPos->getIterator());
212}
213
216 assert(!Parent && "Recipe already in some VPBasicBlock");
217 assert(I == BB.end() || I->getParent() == &BB);
218 BB.insert(this, I);
219}
220
222 assert(!Parent && "Recipe already in some VPBasicBlock");
223 assert(InsertPos->getParent() &&
224 "Insertion position not in any VPBasicBlock");
225 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
226}
227
229 assert(getParent() && "Recipe not in any VPBasicBlock");
231 Parent = nullptr;
232}
233
235 assert(getParent() && "Recipe not in any VPBasicBlock");
237}
238
241 insertAfter(InsertPos);
242}
243
247 insertBefore(BB, I);
248}
249
251 // Get the underlying instruction for the recipe, if there is one. It is used
252 // to
253 // * decide if cost computation should be skipped for this recipe,
254 // * apply forced target instruction cost.
255 Instruction *UI = nullptr;
256 if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
257 UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
258 else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
259 UI = IG->getInsertPos();
260 else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
261 UI = &WidenMem->getIngredient();
262
263 InstructionCost RecipeCost;
264 if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
265 RecipeCost = 0;
266 } else {
267 RecipeCost = computeCost(VF, Ctx);
268 if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
269 RecipeCost.isValid())
271 }
272
273 LLVM_DEBUG({
274 dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
275 dump();
276 });
277 return RecipeCost;
278}
279
281 VPCostContext &Ctx) const {
282 llvm_unreachable("subclasses should implement computeCost");
283}
284
286 return (getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC) ||
287 isa<VPPhi, VPIRPhi>(this);
288}
289
291 auto *VPI = dyn_cast<VPInstruction>(this);
292 return VPI && Instruction::isCast(VPI->getOpcode());
293}
294
297 VPCostContext &Ctx) const {
298 std::optional<unsigned> Opcode;
299 VPValue *Op = getOperand(0);
300 VPRecipeBase *OpR = Op->getDefiningRecipe();
301
302 // If the partial reduction is predicated, a select will be operand 0
303 using namespace llvm::VPlanPatternMatch;
304 if (match(getOperand(1), m_Select(m_VPValue(), m_VPValue(Op), m_VPValue()))) {
305 OpR = Op->getDefiningRecipe();
306 }
307
308 Type *InputTypeA = nullptr, *InputTypeB = nullptr;
310 ExtBType = TTI::PR_None;
311
312 auto GetExtendKind = [](VPRecipeBase *R) {
313 if (!R)
314 return TTI::PR_None;
315 auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R);
316 if (!WidenCastR)
317 return TTI::PR_None;
318 if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)
319 return TTI::PR_ZeroExtend;
320 if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)
321 return TTI::PR_SignExtend;
322 return TTI::PR_None;
323 };
324
325 // Pick out opcode, type/ext information and use sub side effects from a widen
326 // recipe.
327 auto HandleWiden = [&](VPWidenRecipe *Widen) {
328 if (match(Widen, m_Sub(m_SpecificInt(0), m_VPValue(Op)))) {
329 Widen = dyn_cast<VPWidenRecipe>(Op->getDefiningRecipe());
330 }
331 Opcode = Widen->getOpcode();
332 VPRecipeBase *ExtAR = Widen->getOperand(0)->getDefiningRecipe();
333 VPRecipeBase *ExtBR = Widen->getOperand(1)->getDefiningRecipe();
334 InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->getOperand(0)
335 : Widen->getOperand(0));
336 InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->getOperand(0)
337 : Widen->getOperand(1));
338 ExtAType = GetExtendKind(ExtAR);
339 ExtBType = GetExtendKind(ExtBR);
340 };
341
342 if (isa<VPWidenCastRecipe>(OpR)) {
343 InputTypeA = Ctx.Types.inferScalarType(OpR->getOperand(0));
344 ExtAType = GetExtendKind(OpR);
345 } else if (isa<VPReductionPHIRecipe>(OpR)) {
346 auto RedPhiOp1R = getOperand(1)->getDefiningRecipe();
347 if (isa<VPWidenCastRecipe>(RedPhiOp1R)) {
348 InputTypeA = Ctx.Types.inferScalarType(RedPhiOp1R->getOperand(0));
349 ExtAType = GetExtendKind(RedPhiOp1R);
350 } else if (auto Widen = dyn_cast<VPWidenRecipe>(RedPhiOp1R))
351 HandleWiden(Widen);
352 } else if (auto Widen = dyn_cast<VPWidenRecipe>(OpR)) {
353 HandleWiden(Widen);
354 } else if (auto Reduction = dyn_cast<VPPartialReductionRecipe>(OpR)) {
355 return Reduction->computeCost(VF, Ctx);
356 }
357 auto *PhiType = Ctx.Types.inferScalarType(getOperand(1));
358 return Ctx.TTI.getPartialReductionCost(getOpcode(), InputTypeA, InputTypeB,
359 PhiType, VF, ExtAType, ExtBType,
360 Opcode, Ctx.CostKind);
361}
362
364 auto &Builder = State.Builder;
365
366 assert(getOpcode() == Instruction::Add &&
367 "Unhandled partial reduction opcode");
368
369 Value *BinOpVal = State.get(getOperand(1));
370 Value *PhiVal = State.get(getOperand(0));
371 assert(PhiVal && BinOpVal && "Phi and Mul must be set");
372
373 Type *RetTy = PhiVal->getType();
374
375 CallInst *V = Builder.CreateIntrinsic(
376 RetTy, Intrinsic::experimental_vector_partial_reduce_add,
377 {PhiVal, BinOpVal}, nullptr, "partial.reduce");
378
379 State.set(this, V);
380}
381
382#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
384 VPSlotTracker &SlotTracker) const {
385 O << Indent << "PARTIAL-REDUCE ";
387 O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
389}
390#endif
391
393 assert(OpType == OperationType::FPMathOp &&
394 "recipe doesn't have fast math flags");
395 FastMathFlags Res;
396 Res.setAllowReassoc(FMFs.AllowReassoc);
397 Res.setNoNaNs(FMFs.NoNaNs);
398 Res.setNoInfs(FMFs.NoInfs);
399 Res.setNoSignedZeros(FMFs.NoSignedZeros);
400 Res.setAllowReciprocal(FMFs.AllowReciprocal);
401 Res.setAllowContract(FMFs.AllowContract);
402 Res.setApproxFunc(FMFs.ApproxFunc);
403 return Res;
404}
405
406#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
408#endif
409
410template <unsigned PartOpIdx>
411VPValue *
413 if (U.getNumOperands() == PartOpIdx + 1)
414 return U.getOperand(PartOpIdx);
415 return nullptr;
416}
417
418template <unsigned PartOpIdx>
420 if (auto *UnrollPartOp = getUnrollPartOperand(U))
421 return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
422 return 0;
423}
424
425namespace llvm {
426template class VPUnrollPartAccessor<1>;
427template class VPUnrollPartAccessor<2>;
428template class VPUnrollPartAccessor<3>;
429}
430
432 const VPIRFlags &Flags, DebugLoc DL,
433 const Twine &Name)
434 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, Flags, DL),
435 VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {
437 "Set flags not supported for the provided opcode");
438 assert((getNumOperandsForOpcode(Opcode) == -1u ||
439 getNumOperandsForOpcode(Opcode) == getNumOperands()) &&
440 "number of operands does not match opcode");
441}
442
443#ifndef NDEBUG
444unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
445 if (Instruction::isUnaryOp(Opcode) || Instruction::isCast(Opcode))
446 return 1;
447
448 if (Instruction::isBinaryOp(Opcode))
449 return 2;
450
451 switch (Opcode) {
454 return 0;
455 case Instruction::Alloca:
456 case Instruction::ExtractValue:
457 case Instruction::Freeze:
458 case Instruction::Load:
470 return 1;
471 case Instruction::ICmp:
472 case Instruction::FCmp:
473 case Instruction::Store:
482 return 2;
483 case Instruction::Select:
486 return 3;
488 return 4;
489 case Instruction::Call:
490 case Instruction::GetElementPtr:
491 case Instruction::PHI:
492 case Instruction::Switch:
493 // Cannot determine the number of operands from the opcode.
494 return -1u;
495 }
496 llvm_unreachable("all cases should be handled above");
497}
498#endif
499
500bool VPInstruction::doesGeneratePerAllLanes() const {
501 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
502}
503
504bool VPInstruction::canGenerateScalarForFirstLane() const {
506 return true;
508 return true;
509 switch (Opcode) {
510 case Instruction::Freeze:
511 case Instruction::ICmp:
512 case Instruction::PHI:
513 case Instruction::Select:
522 return true;
523 default:
524 return false;
525 }
526}
527
528Value *VPInstruction::generatePerLane(VPTransformState &State,
529 const VPLane &Lane) {
530 IRBuilderBase &Builder = State.Builder;
531
533 "only PtrAdd opcodes are supported for now");
534 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
535 State.get(getOperand(1), Lane), Name);
536}
537
538/// Create a conditional branch using \p Cond branching to the successors of \p
539/// VPBB. Note that the first successor is always forward (i.e. not created yet)
540/// while the second successor may already have been created (if it is a header
541/// block and VPBB is a latch).
543 VPTransformState &State) {
544 // Replace the temporary unreachable terminator with a new conditional
545 // branch, hooking it up to backward destination (header) for latch blocks
546 // now, and to forward destination(s) later when they are created.
547 // Second successor may be backwards - iff it is already in VPBB2IRBB.
548 VPBasicBlock *SecondVPSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
549 BasicBlock *SecondIRSucc = State.CFG.VPBB2IRBB.lookup(SecondVPSucc);
550 BasicBlock *IRBB = State.CFG.VPBB2IRBB[VPBB];
551 BranchInst *CondBr = State.Builder.CreateCondBr(Cond, IRBB, SecondIRSucc);
552 // First successor is always forward, reset it to nullptr
553 CondBr->setSuccessor(0, nullptr);
555 return CondBr;
556}
557
558Value *VPInstruction::generate(VPTransformState &State) {
559 IRBuilderBase &Builder = State.Builder;
560
562 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
563 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
564 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
565 auto *Res =
566 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
567 if (auto *I = dyn_cast<Instruction>(Res))
568 applyFlags(*I);
569 return Res;
570 }
571
572 switch (getOpcode()) {
573 case VPInstruction::Not: {
574 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
575 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
576 return Builder.CreateNot(A, Name);
577 }
578 case Instruction::ExtractElement: {
579 assert(State.VF.isVector() && "Only extract elements from vectors");
580 if (getOperand(1)->isLiveIn()) {
581 unsigned IdxToExtract =
582 cast<ConstantInt>(getOperand(1)->getLiveInIRValue())->getZExtValue();
583 return State.get(getOperand(0), VPLane(IdxToExtract));
584 }
585 Value *Vec = State.get(getOperand(0));
586 Value *Idx = State.get(getOperand(1), /*IsScalar=*/true);
587 return Builder.CreateExtractElement(Vec, Idx, Name);
588 }
589 case Instruction::Freeze: {
591 return Builder.CreateFreeze(Op, Name);
592 }
593 case Instruction::FCmp:
594 case Instruction::ICmp: {
595 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
596 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
597 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
598 return Builder.CreateCmp(getPredicate(), A, B, Name);
599 }
600 case Instruction::PHI: {
601 llvm_unreachable("should be handled by VPPhi::execute");
602 }
603 case Instruction::Select: {
604 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
605 Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
606 Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
607 Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
608 return Builder.CreateSelect(Cond, Op1, Op2, Name);
609 }
611 // Get first lane of vector induction variable.
612 Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
613 // Get the original loop tripcount.
614 Value *ScalarTC = State.get(getOperand(1), VPLane(0));
615
616 // If this part of the active lane mask is scalar, generate the CMP directly
617 // to avoid unnecessary extracts.
618 if (State.VF.isScalar())
619 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
620 Name);
621
622 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
623 auto *PredTy = VectorType::get(Int1Ty, State.VF);
624 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
625 {PredTy, ScalarTC->getType()},
626 {VIVElem0, ScalarTC}, nullptr, Name);
627 }
629 // Generate code to combine the previous and current values in vector v3.
630 //
631 // vector.ph:
632 // v_init = vector(..., ..., ..., a[-1])
633 // br vector.body
634 //
635 // vector.body
636 // i = phi [0, vector.ph], [i+4, vector.body]
637 // v1 = phi [v_init, vector.ph], [v2, vector.body]
638 // v2 = a[i, i+1, i+2, i+3];
639 // v3 = vector(v1(3), v2(0, 1, 2))
640
641 auto *V1 = State.get(getOperand(0));
642 if (!V1->getType()->isVectorTy())
643 return V1;
644 Value *V2 = State.get(getOperand(1));
645 return Builder.CreateVectorSplice(V1, V2, -1, Name);
646 }
648 unsigned UF = getParent()->getPlan()->getUF();
649 Value *ScalarTC = State.get(getOperand(0), VPLane(0));
650 Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
651 Value *Sub = Builder.CreateSub(ScalarTC, Step);
652 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
653 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
654 return Builder.CreateSelect(Cmp, Sub, Zero);
655 }
657 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
658 // be outside of the main loop.
659 Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
660 // Compute EVL
661 assert(AVL->getType()->isIntegerTy() &&
662 "Requested vector length should be an integer.");
663
664 assert(State.VF.isScalable() && "Expected scalable vector factor.");
665 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
666
667 Value *EVL = State.Builder.CreateIntrinsic(
668 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
669 {AVL, VFArg, State.Builder.getTrue()});
670 return EVL;
671 }
673 unsigned Part = getUnrollPart(*this);
674 auto *IV = State.get(getOperand(0), VPLane(0));
675 assert(Part != 0 && "Must have a positive part");
676 // The canonical IV is incremented by the vectorization factor (num of
677 // SIMD elements) times the unroll part.
678 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
679 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
681 }
683 Value *Cond = State.get(getOperand(0), VPLane(0));
684 auto *Br = createCondBranch(Cond, getParent(), State);
685 applyMetadata(*Br);
686 return Br;
687 }
689 // First create the compare.
690 Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
691 Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
692 Value *Cond = Builder.CreateICmpEQ(IV, TC);
693 return createCondBranch(Cond, getParent(), State);
694 }
696 return Builder.CreateVectorSplat(
697 State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
698 }
700 // For struct types, we need to build a new 'wide' struct type, where each
701 // element is widened, i.e., we create a struct of vectors.
702 auto *StructTy =
703 cast<StructType>(State.TypeAnalysis.inferScalarType(getOperand(0)));
704 Value *Res = PoisonValue::get(toVectorizedTy(StructTy, State.VF));
705 for (const auto &[LaneIndex, Op] : enumerate(operands())) {
706 for (unsigned FieldIndex = 0; FieldIndex != StructTy->getNumElements();
707 FieldIndex++) {
708 Value *ScalarValue =
709 Builder.CreateExtractValue(State.get(Op, true), FieldIndex);
710 Value *VectorValue = Builder.CreateExtractValue(Res, FieldIndex);
711 VectorValue =
712 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneIndex);
713 Res = Builder.CreateInsertValue(Res, VectorValue, FieldIndex);
714 }
715 }
716 return Res;
717 }
719 auto *ScalarTy = State.TypeAnalysis.inferScalarType(getOperand(0));
720 auto NumOfElements = ElementCount::getFixed(getNumOperands());
721 Value *Res = PoisonValue::get(toVectorizedTy(ScalarTy, NumOfElements));
722 for (const auto &[Idx, Op] : enumerate(operands()))
723 Res = State.Builder.CreateInsertElement(Res, State.get(Op, true),
724 State.Builder.getInt32(Idx));
725 return Res;
726 }
728 if (State.VF.isScalar())
729 return State.get(getOperand(0), true);
732 // If this start vector is scaled then it should produce a vector with fewer
733 // elements than the VF.
735 cast<ConstantInt>(getOperand(2)->getLiveInIRValue())->getZExtValue());
736 auto *Iden = Builder.CreateVectorSplat(VF, State.get(getOperand(1), true));
737 Constant *Zero = Builder.getInt32(0);
738 return Builder.CreateInsertElement(Iden, State.get(getOperand(0), true),
739 Zero);
740 }
742 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
743 // and will be removed by breaking up the recipe further.
744 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
745 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
746 Value *ReducedPartRdx = State.get(getOperand(2));
747 for (unsigned Idx = 3; Idx < getNumOperands(); ++Idx)
748 ReducedPartRdx = Builder.CreateBinOp(
751 State.get(getOperand(Idx)), ReducedPartRdx, "bin.rdx");
752 return createAnyOfReduction(Builder, ReducedPartRdx,
753 State.get(getOperand(1), VPLane(0)), OrigPhi);
754 }
756 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
757 // and will be removed by breaking up the recipe further.
758 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
759 // Get its reduction variable descriptor.
760 RecurKind RK = PhiR->getRecurrenceKind();
762 "Unexpected reduction kind");
763 assert(!PhiR->isInLoop() &&
764 "In-loop FindLastIV reduction is not supported yet");
765
766 // The recipe's operands are the reduction phi, the start value, the
767 // sentinel value, followed by one operand for each part of the reduction.
768 unsigned UF = getNumOperands() - 3;
769 Value *ReducedPartRdx = State.get(getOperand(3));
770 RecurKind MinMaxKind;
773 MinMaxKind = IsSigned ? RecurKind::SMax : RecurKind::UMax;
774 else
775 MinMaxKind = IsSigned ? RecurKind::SMin : RecurKind::UMin;
776 for (unsigned Part = 1; Part < UF; ++Part)
777 ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
778 State.get(getOperand(3 + Part)));
779
780 Value *Start = State.get(getOperand(1), true);
782 return createFindLastIVReduction(Builder, ReducedPartRdx, RK, Start,
783 Sentinel);
784 }
786 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
787 // and will be removed by breaking up the recipe further.
788 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
789 // Get its reduction variable descriptor.
790
791 RecurKind RK = PhiR->getRecurrenceKind();
793 "should be handled by ComputeFindIVResult");
794
795 // The recipe's operands are the reduction phi, followed by one operand for
796 // each part of the reduction.
797 unsigned UF = getNumOperands() - 1;
798 VectorParts RdxParts(UF);
799 for (unsigned Part = 0; Part < UF; ++Part)
800 RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
801
803 if (hasFastMathFlags())
805
806 // Reduce all of the unrolled parts into a single vector.
807 Value *ReducedPartRdx = RdxParts[0];
808 if (PhiR->isOrdered()) {
809 ReducedPartRdx = RdxParts[UF - 1];
810 } else {
811 // Floating-point operations should have some FMF to enable the reduction.
812 for (unsigned Part = 1; Part < UF; ++Part) {
813 Value *RdxPart = RdxParts[Part];
815 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
816 else {
818 // For sub-recurrences, each UF's reduction variable is already
819 // negative, we need to do: reduce.add(-acc_uf0 + -acc_uf1)
820 if (RK == RecurKind::Sub)
821 Opcode = Instruction::Add;
822 else
823 Opcode =
825 ReducedPartRdx =
826 Builder.CreateBinOp(Opcode, RdxPart, ReducedPartRdx, "bin.rdx");
827 }
828 }
829 }
830
831 // Create the reduction after the loop. Note that inloop reductions create
832 // the target reduction in the loop using a Reduction recipe.
833 if (State.VF.isVector() && !PhiR->isInLoop()) {
834 // TODO: Support in-order reductions based on the recurrence descriptor.
835 // All ops in the reduction inherit fast-math-flags from the recurrence
836 // descriptor.
837 ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);
838 }
839
840 return ReducedPartRdx;
841 }
844 unsigned Offset = getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
845 Value *Res;
846 if (State.VF.isVector()) {
847 assert(Offset <= State.VF.getKnownMinValue() &&
848 "invalid offset to extract from");
849 // Extract lane VF - Offset from the operand.
850 Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
851 } else {
852 assert(Offset <= 1 && "invalid offset to extract from");
853 Res = State.get(getOperand(0));
854 }
855 if (isa<ExtractElementInst>(Res))
856 Res->setName(Name);
857 return Res;
858 }
860 Value *A = State.get(getOperand(0));
861 Value *B = State.get(getOperand(1));
862 return Builder.CreateLogicalAnd(A, B, Name);
863 }
866 "can only generate first lane for PtrAdd");
867 Value *Ptr = State.get(getOperand(0), VPLane(0));
868 Value *Addend = State.get(getOperand(1), VPLane(0));
869 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
870 }
872 Value *Ptr =
874 Value *Addend = State.get(getOperand(1));
875 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
876 }
878 Value *Res = Builder.CreateFreeze(State.get(getOperand(0)));
879 for (VPValue *Op : drop_begin(operands()))
880 Res = Builder.CreateOr(Res, Builder.CreateFreeze(State.get(Op)));
881 return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);
882 }
884 Value *LaneToExtract = State.get(getOperand(0), true);
885 Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0));
886 Value *Res = nullptr;
887 Value *RuntimeVF = getRuntimeVF(State.Builder, IdxTy, State.VF);
888
889 for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) {
890 Value *VectorStart =
891 Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));
892 Value *VectorIdx = Idx == 1
893 ? LaneToExtract
894 : Builder.CreateSub(LaneToExtract, VectorStart);
895 Value *Ext = State.VF.isScalar()
896 ? State.get(getOperand(Idx))
897 : Builder.CreateExtractElement(
898 State.get(getOperand(Idx)), VectorIdx);
899 if (Res) {
900 Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);
901 Res = Builder.CreateSelect(Cmp, Ext, Res);
902 } else {
903 Res = Ext;
904 }
905 }
906 return Res;
907 }
909 if (getNumOperands() == 1) {
910 Value *Mask = State.get(getOperand(0));
911 return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask,
912 true, Name);
913 }
914 // If there are multiple operands, create a chain of selects to pick the
915 // first operand with an active lane and add the number of lanes of the
916 // preceding operands.
917 Value *RuntimeVF =
918 getRuntimeVF(State.Builder, State.Builder.getInt64Ty(), State.VF);
919 unsigned LastOpIdx = getNumOperands() - 1;
920 Value *Res = nullptr;
921 for (int Idx = LastOpIdx; Idx >= 0; --Idx) {
922 Value *TrailingZeros =
923 State.VF.isScalar()
924 ? Builder.CreateZExt(
925 Builder.CreateICmpEQ(State.get(getOperand(Idx)),
926 Builder.getFalse()),
927 Builder.getInt64Ty())
928 : Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(),
929 State.get(getOperand(Idx)),
930 true, Name);
931 Value *Current = Builder.CreateAdd(
932 Builder.CreateMul(RuntimeVF, Builder.getInt64(Idx)), TrailingZeros);
933 if (Res) {
934 Value *Cmp = Builder.CreateICmpNE(TrailingZeros, RuntimeVF);
935 Res = Builder.CreateSelect(Cmp, Current, Res);
936 } else {
937 Res = Current;
938 }
939 }
940
941 return Res;
942 }
944 return State.get(getOperand(0), true);
945 default:
946 llvm_unreachable("Unsupported opcode for instruction");
947 }
948}
949
951 unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const {
952 Type *ScalarTy = Ctx.Types.inferScalarType(this);
953 Type *ResultTy = VF.isVector() ? toVectorTy(ScalarTy, VF) : ScalarTy;
954 switch (Opcode) {
955 case Instruction::FNeg:
956 return Ctx.TTI.getArithmeticInstrCost(Opcode, ResultTy, Ctx.CostKind);
957 case Instruction::UDiv:
958 case Instruction::SDiv:
959 case Instruction::SRem:
960 case Instruction::URem:
961 case Instruction::Add:
962 case Instruction::FAdd:
963 case Instruction::Sub:
964 case Instruction::FSub:
965 case Instruction::Mul:
966 case Instruction::FMul:
967 case Instruction::FDiv:
968 case Instruction::FRem:
969 case Instruction::Shl:
970 case Instruction::LShr:
971 case Instruction::AShr:
972 case Instruction::And:
973 case Instruction::Or:
974 case Instruction::Xor: {
977
978 if (VF.isVector()) {
979 // Certain instructions can be cheaper to vectorize if they have a
980 // constant second vector operand. One example of this are shifts on x86.
981 VPValue *RHS = getOperand(1);
982 RHSInfo = Ctx.getOperandInfo(RHS);
983
984 if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
987 }
988
989 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
991 if (CtxI)
992 Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
993 return Ctx.TTI.getArithmeticInstrCost(
994 Opcode, ResultTy, Ctx.CostKind,
995 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
996 RHSInfo, Operands, CtxI, &Ctx.TLI);
997 }
998 case Instruction::Freeze:
999 // This opcode is unknown. Assume that it is the same as 'mul'.
1000 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, ResultTy,
1001 Ctx.CostKind);
1002 case Instruction::ExtractValue:
1003 return Ctx.TTI.getInsertExtractValueCost(Instruction::ExtractValue,
1004 Ctx.CostKind);
1005 case Instruction::ICmp:
1006 case Instruction::FCmp: {
1007 Type *ScalarOpTy = Ctx.Types.inferScalarType(getOperand(0));
1008 Type *OpTy = VF.isVector() ? toVectorTy(ScalarOpTy, VF) : ScalarOpTy;
1009 Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1010 return Ctx.TTI.getCmpSelInstrCost(
1011 Opcode, OpTy, CmpInst::makeCmpResultType(OpTy), getPredicate(),
1012 Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None},
1013 {TTI::OK_AnyValue, TTI::OP_None}, CtxI);
1014 }
1015 }
1016 return std::nullopt;
1017}
1018
1020 VPCostContext &Ctx) const {
1022 if (!getUnderlyingValue() && getOpcode() != Instruction::FMul) {
1023 // TODO: Compute cost for VPInstructions without underlying values once
1024 // the legacy cost model has been retired.
1025 return 0;
1026 }
1027
1028 assert(!doesGeneratePerAllLanes() &&
1029 "Should only generate a vector value or single scalar, not scalars "
1030 "for all lanes.");
1032 getOpcode(),
1034 }
1035
1036 switch (getOpcode()) {
1037 case Instruction::Select: {
1038 // TODO: It may be possible to improve this by analyzing where the
1039 // condition operand comes from.
1041 auto *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1042 auto *VecTy = Ctx.Types.inferScalarType(getOperand(1));
1043 if (!vputils::onlyFirstLaneUsed(this)) {
1044 CondTy = toVectorTy(CondTy, VF);
1045 VecTy = toVectorTy(VecTy, VF);
1046 }
1047 return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VecTy, CondTy, Pred,
1048 Ctx.CostKind);
1049 }
1050 case Instruction::ExtractElement:
1052 if (VF.isScalar()) {
1053 // ExtractLane with VF=1 takes care of handling extracting across multiple
1054 // parts.
1055 return 0;
1056 }
1057
1058 // Add on the cost of extracting the element.
1059 auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1060 return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,
1061 Ctx.CostKind);
1062 }
1063 case VPInstruction::AnyOf: {
1064 auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1066 Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind);
1067 }
1069 Type *ScalarTy = Ctx.Types.inferScalarType(getOperand(0));
1070 if (VF.isScalar())
1071 return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
1074 // Calculate the cost of determining the lane index.
1075 auto *PredTy = toVectorTy(ScalarTy, VF);
1076 IntrinsicCostAttributes Attrs(Intrinsic::experimental_cttz_elts,
1078 {PredTy, Type::getInt1Ty(Ctx.LLVMCtx)});
1079 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1080 }
1082 assert(VF.isVector() && "Scalar FirstOrderRecurrenceSplice?");
1084 std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
1085 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1086
1088 cast<VectorType>(VectorTy),
1089 cast<VectorType>(VectorTy), Mask,
1090 Ctx.CostKind, VF.getKnownMinValue() - 1);
1091 }
1093 Type *ArgTy = Ctx.Types.inferScalarType(getOperand(0));
1095 IntrinsicCostAttributes Attrs(Intrinsic::get_active_lane_mask, RetTy,
1096 {ArgTy, ArgTy});
1097 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1098 }
1100 Type *Arg0Ty = Ctx.Types.inferScalarType(getOperand(0));
1101 Type *I32Ty = Type::getInt32Ty(Ctx.LLVMCtx);
1102 Type *I1Ty = Type::getInt1Ty(Ctx.LLVMCtx);
1103 IntrinsicCostAttributes Attrs(Intrinsic::experimental_get_vector_length,
1104 I32Ty, {Arg0Ty, I32Ty, I1Ty});
1105 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1106 }
1108 // Add on the cost of extracting the element.
1109 auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1110 return Ctx.TTI.getIndexedVectorInstrCostFromEnd(Instruction::ExtractElement,
1111 VecTy, Ctx.CostKind, 0);
1112 }
1114 if (VF == ElementCount::getScalable(1))
1117 default:
1118 // TODO: Compute cost other VPInstructions once the legacy cost model has
1119 // been retired.
1121 "unexpected VPInstruction witht underlying value");
1122 return 0;
1123 }
1124}
1125
1129 getOpcode() == Instruction::ExtractElement ||
1136}
1137
1139 switch (getOpcode()) {
1140 case Instruction::PHI:
1144 return true;
1145 default:
1146 return isScalarCast();
1147 }
1148}
1149
1151 assert(!State.Lane && "VPInstruction executing an Lane");
1154 "Set flags not supported for the provided opcode");
1155 if (hasFastMathFlags())
1157 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
1160 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
1161 if (GeneratesPerAllLanes) {
1162 for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
1163 Lane != NumLanes; ++Lane) {
1164 Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
1165 assert(GeneratedValue && "generatePerLane must produce a value");
1166 State.set(this, GeneratedValue, VPLane(Lane));
1167 }
1168 return;
1169 }
1170
1171 Value *GeneratedValue = generate(State);
1172 if (!hasResult())
1173 return;
1174 assert(GeneratedValue && "generate must produce a value");
1175 assert((((GeneratedValue->getType()->isVectorTy() ||
1176 GeneratedValue->getType()->isStructTy()) ==
1177 !GeneratesPerFirstLaneOnly) ||
1178 State.VF.isScalar()) &&
1179 "scalar value but not only first lane defined");
1180 State.set(this, GeneratedValue,
1181 /*IsScalar*/ GeneratesPerFirstLaneOnly);
1182}
1183
1186 return false;
1187 switch (getOpcode()) {
1188 case Instruction::ExtractElement:
1189 case Instruction::Freeze:
1190 case Instruction::FCmp:
1191 case Instruction::ICmp:
1192 case Instruction::Select:
1193 case Instruction::PHI:
1205 case VPInstruction::Not:
1212 return false;
1213 default:
1214 return true;
1215 }
1216}
1217
1219 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1221 return vputils::onlyFirstLaneUsed(this);
1222
1223 switch (getOpcode()) {
1224 default:
1225 return false;
1226 case Instruction::ExtractElement:
1227 return Op == getOperand(1);
1228 case Instruction::PHI:
1229 return true;
1230 case Instruction::FCmp:
1231 case Instruction::ICmp:
1232 case Instruction::Select:
1233 case Instruction::Or:
1234 case Instruction::Freeze:
1235 case VPInstruction::Not:
1236 // TODO: Cover additional opcodes.
1237 return vputils::onlyFirstLaneUsed(this);
1246 return true;
1248 return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
1250 return Op == getOperand(0);
1253 return Op == getOperand(1);
1255 return Op == getOperand(0);
1256 };
1257 llvm_unreachable("switch should return");
1258}
1259
1261 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1263 return vputils::onlyFirstPartUsed(this);
1264
1265 switch (getOpcode()) {
1266 default:
1267 return false;
1268 case Instruction::FCmp:
1269 case Instruction::ICmp:
1270 case Instruction::Select:
1271 return vputils::onlyFirstPartUsed(this);
1275 return true;
1276 };
1277 llvm_unreachable("switch should return");
1278}
1279
1280#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1282 VPSlotTracker SlotTracker(getParent()->getPlan());
1283 print(dbgs(), "", SlotTracker);
1284}
1285
1287 VPSlotTracker &SlotTracker) const {
1288 O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";
1289
1290 if (hasResult()) {
1292 O << " = ";
1293 }
1294
1295 switch (getOpcode()) {
1296 case VPInstruction::Not:
1297 O << "not";
1298 break;
1300 O << "combined load";
1301 break;
1303 O << "combined store";
1304 break;
1306 O << "active lane mask";
1307 break;
1309 O << "EXPLICIT-VECTOR-LENGTH";
1310 break;
1312 O << "first-order splice";
1313 break;
1315 O << "branch-on-cond";
1316 break;
1318 O << "TC > VF ? TC - VF : 0";
1319 break;
1321 O << "VF * Part +";
1322 break;
1324 O << "branch-on-count";
1325 break;
1327 O << "broadcast";
1328 break;
1330 O << "buildstructvector";
1331 break;
1333 O << "buildvector";
1334 break;
1336 O << "extract-lane";
1337 break;
1339 O << "extract-last-element";
1340 break;
1342 O << "extract-penultimate-element";
1343 break;
1345 O << "compute-anyof-result";
1346 break;
1348 O << "compute-find-iv-result";
1349 break;
1351 O << "compute-reduction-result";
1352 break;
1354 O << "logical-and";
1355 break;
1357 O << "ptradd";
1358 break;
1360 O << "wide-ptradd";
1361 break;
1363 O << "any-of";
1364 break;
1366 O << "first-active-lane";
1367 break;
1369 O << "reduction-start-vector";
1370 break;
1372 O << "resume-for-epilogue";
1373 break;
1374 default:
1376 }
1377
1378 printFlags(O);
1380
1381 if (auto DL = getDebugLoc()) {
1382 O << ", !dbg ";
1383 DL.print(O);
1384 }
1385}
1386#endif
1387
1390 if (isScalarCast()) {
1391 Value *Op = State.get(getOperand(0), VPLane(0));
1393 Op, ResultTy);
1394 State.set(this, Cast, VPLane(0));
1395 return;
1396 }
1397 switch (getOpcode()) {
1399 Value *StepVector =
1400 State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF));
1401 State.set(this, StepVector);
1402 break;
1403 }
1404 case VPInstruction::VScale: {
1405 Value *VScale = State.Builder.CreateVScale(ResultTy);
1406 State.set(this, VScale, true);
1407 break;
1408 }
1409
1410 default:
1411 llvm_unreachable("opcode not implemented yet");
1412 }
1413}
1414
1415#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1417 VPSlotTracker &SlotTracker) const {
1418 O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";
1420 O << " = ";
1421
1422 switch (getOpcode()) {
1424 O << "wide-iv-step ";
1426 break;
1428 O << "step-vector " << *ResultTy;
1429 break;
1431 O << "vscale " << *ResultTy;
1432 break;
1433 default:
1434 assert(Instruction::isCast(getOpcode()) && "unhandled opcode");
1437 O << " to " << *ResultTy;
1438 }
1439}
1440#endif
1441
1444 PHINode *NewPhi = State.Builder.CreatePHI(
1445 State.TypeAnalysis.inferScalarType(this), 2, getName());
1446 unsigned NumIncoming = getNumIncoming();
1447 if (getParent() != getParent()->getPlan()->getScalarPreheader()) {
1448 // TODO: Fixup all incoming values of header phis once recipes defining them
1449 // are introduced.
1450 NumIncoming = 1;
1451 }
1452 for (unsigned Idx = 0; Idx != NumIncoming; ++Idx) {
1453 Value *IncV = State.get(getIncomingValue(Idx), VPLane(0));
1454 BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(getIncomingBlock(Idx));
1455 NewPhi->addIncoming(IncV, PredBB);
1456 }
1457 State.set(this, NewPhi, VPLane(0));
1458}
1459
1460#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1461void VPPhi::print(raw_ostream &O, const Twine &Indent,
1462 VPSlotTracker &SlotTracker) const {
1463 O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";
1465 O << " = phi ";
1467}
1468#endif
1469
1470VPIRInstruction *VPIRInstruction ::create(Instruction &I) {
1471 if (auto *Phi = dyn_cast<PHINode>(&I))
1472 return new VPIRPhi(*Phi);
1473 return new VPIRInstruction(I);
1474}
1475
1477 assert(!isa<VPIRPhi>(this) && getNumOperands() == 0 &&
1478 "PHINodes must be handled by VPIRPhi");
1479 // Advance the insert point after the wrapped IR instruction. This allows
1480 // interleaving VPIRInstructions and other recipes.
1481 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
1482}
1483
1485 VPCostContext &Ctx) const {
1486 // The recipe wraps an existing IR instruction on the border of VPlan's scope,
1487 // hence it does not contribute to the cost-modeling for the VPlan.
1488 return 0;
1489}
1490
1492 assert(isa<PHINode>(getInstruction()) &&
1493 "can only update exiting operands to phi nodes");
1494 assert(getNumOperands() > 0 && "must have at least one operand");
1495 VPValue *Exiting = getOperand(0);
1496 if (Exiting->isLiveIn())
1497 return;
1498
1499 Exiting = Builder.createNaryOp(VPInstruction::ExtractLastElement, {Exiting});
1500 setOperand(0, Exiting);
1501}
1502
1503#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1505 VPSlotTracker &SlotTracker) const {
1506 O << Indent << "IR " << I;
1507}
1508#endif
1509
1511 PHINode *Phi = &getIRPhi();
1512 for (const auto &[Idx, Op] : enumerate(operands())) {
1513 VPValue *ExitValue = Op;
1514 auto Lane = vputils::isSingleScalar(ExitValue)
1518 auto *PredVPBB = Pred->getExitingBasicBlock();
1519 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
1520 // Set insertion point in PredBB in case an extract needs to be generated.
1521 // TODO: Model extracts explicitly.
1522 State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
1523 Value *V = State.get(ExitValue, VPLane(Lane));
1524 // If there is no existing block for PredBB in the phi, add a new incoming
1525 // value. Otherwise update the existing incoming value for PredBB.
1526 if (Phi->getBasicBlockIndex(PredBB) == -1)
1527 Phi->addIncoming(V, PredBB);
1528 else
1529 Phi->setIncomingValueForBlock(PredBB, V);
1530 }
1531
1532 // Advance the insert point after the wrapped IR instruction. This allows
1533 // interleaving VPIRInstructions and other recipes.
1534 State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));
1535}
1536
1538 VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());
1539 assert(R->getNumOperands() == R->getParent()->getNumPredecessors() &&
1540 "Number of phi operands must match number of predecessors");
1541 unsigned Position = R->getParent()->getIndexForPredecessor(IncomingBlock);
1542 R->removeOperand(Position);
1543}
1544
1545#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1547 VPSlotTracker &SlotTracker) const {
1548 interleaveComma(enumerate(getAsRecipe()->operands()), O,
1549 [this, &O, &SlotTracker](auto Op) {
1550 O << "[ ";
1551 Op.value()->printAsOperand(O, SlotTracker);
1552 O << ", ";
1553 getIncomingBlock(Op.index())->printAsOperand(O);
1554 O << " ]";
1555 });
1556}
1557#endif
1558
1559#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1560void VPIRPhi::print(raw_ostream &O, const Twine &Indent,
1561 VPSlotTracker &SlotTracker) const {
1563
1564 if (getNumOperands() != 0) {
1565 O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
1567 [&O, &SlotTracker](auto Op) {
1568 std::get<0>(Op)->printAsOperand(O, SlotTracker);
1569 O << " from ";
1570 std::get<1>(Op)->printAsOperand(O);
1571 });
1572 O << ")";
1573 }
1574}
1575#endif
1576
1578 : VPIRMetadata(I) {
1579 if (!LVer || !isa<LoadInst, StoreInst>(&I))
1580 return;
1581 const auto &[AliasScopeMD, NoAliasMD] = LVer->getNoAliasMetadataFor(&I);
1582 if (AliasScopeMD)
1583 Metadata.emplace_back(LLVMContext::MD_alias_scope, AliasScopeMD);
1584 if (NoAliasMD)
1585 Metadata.emplace_back(LLVMContext::MD_noalias, NoAliasMD);
1586}
1587
1589 for (const auto &[Kind, Node] : Metadata)
1590 I.setMetadata(Kind, Node);
1591}
1592
1594 SmallVector<std::pair<unsigned, MDNode *>> MetadataIntersection;
1595 for (const auto &[KindA, MDA] : Metadata) {
1596 for (const auto &[KindB, MDB] : Other.Metadata) {
1597 if (KindA == KindB && MDA == MDB) {
1598 MetadataIntersection.emplace_back(KindA, MDA);
1599 break;
1600 }
1601 }
1602 }
1603 Metadata = std::move(MetadataIntersection);
1604}
1605
1607 assert(State.VF.isVector() && "not widening");
1608 assert(Variant != nullptr && "Can't create vector function.");
1609
1610 FunctionType *VFTy = Variant->getFunctionType();
1611 // Add return type if intrinsic is overloaded on it.
1613 for (const auto &I : enumerate(args())) {
1614 Value *Arg;
1615 // Some vectorized function variants may also take a scalar argument,
1616 // e.g. linear parameters for pointers. This needs to be the scalar value
1617 // from the start of the respective part when interleaving.
1618 if (!VFTy->getParamType(I.index())->isVectorTy())
1619 Arg = State.get(I.value(), VPLane(0));
1620 else
1621 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
1622 Args.push_back(Arg);
1623 }
1624
1625 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1627 if (CI)
1628 CI->getOperandBundlesAsDefs(OpBundles);
1629
1630 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
1631 applyFlags(*V);
1632 applyMetadata(*V);
1633 V->setCallingConv(Variant->getCallingConv());
1634
1635 if (!V->getType()->isVoidTy())
1636 State.set(this, V);
1637}
1638
1640 VPCostContext &Ctx) const {
1641 return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
1642 Variant->getFunctionType()->params(),
1643 Ctx.CostKind);
1644}
1645
1646#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1648 VPSlotTracker &SlotTracker) const {
1649 O << Indent << "WIDEN-CALL ";
1650
1651 Function *CalledFn = getCalledScalarFunction();
1652 if (CalledFn->getReturnType()->isVoidTy())
1653 O << "void ";
1654 else {
1656 O << " = ";
1657 }
1658
1659 O << "call";
1660 printFlags(O);
1661 O << " @" << CalledFn->getName() << "(";
1662 interleaveComma(args(), O, [&O, &SlotTracker](VPValue *Op) {
1663 Op->printAsOperand(O, SlotTracker);
1664 });
1665 O << ")";
1666
1667 O << " (using library function";
1668 if (Variant->hasName())
1669 O << ": " << Variant->getName();
1670 O << ")";
1671}
1672#endif
1673
1675 assert(State.VF.isVector() && "not widening");
1676
1677 SmallVector<Type *, 2> TysForDecl;
1678 // Add return type if intrinsic is overloaded on it.
1679 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
1680 TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
1682 for (const auto &I : enumerate(operands())) {
1683 // Some intrinsics have a scalar argument - don't replace it with a
1684 // vector.
1685 Value *Arg;
1686 if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
1687 State.TTI))
1688 Arg = State.get(I.value(), VPLane(0));
1689 else
1690 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
1691 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
1692 State.TTI))
1693 TysForDecl.push_back(Arg->getType());
1694 Args.push_back(Arg);
1695 }
1696
1697 // Use vector version of the intrinsic.
1698 Module *M = State.Builder.GetInsertBlock()->getModule();
1699 Function *VectorF =
1700 Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1701 assert(VectorF &&
1702 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
1703
1704 auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1706 if (CI)
1707 CI->getOperandBundlesAsDefs(OpBundles);
1708
1709 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1710
1711 applyFlags(*V);
1712 applyMetadata(*V);
1713
1714 if (!V->getType()->isVoidTy())
1715 State.set(this, V);
1716}
1717
1718/// Compute the cost for the intrinsic \p ID with \p Operands, produced by \p R.
1721 const VPRecipeWithIRFlags &R,
1722 ElementCount VF,
1723 VPCostContext &Ctx) {
1724 // Some backends analyze intrinsic arguments to determine cost. Use the
1725 // underlying value for the operand if it has one. Otherwise try to use the
1726 // operand of the underlying call instruction, if there is one. Otherwise
1727 // clear Arguments.
1728 // TODO: Rework TTI interface to be independent of concrete IR values.
1730 for (const auto &[Idx, Op] : enumerate(Operands)) {
1731 auto *V = Op->getUnderlyingValue();
1732 if (!V) {
1733 if (auto *UI = dyn_cast_or_null<CallBase>(R.getUnderlyingValue())) {
1734 Arguments.push_back(UI->getArgOperand(Idx));
1735 continue;
1736 }
1737 Arguments.clear();
1738 break;
1739 }
1740 Arguments.push_back(V);
1741 }
1742
1743 Type *ScalarRetTy = Ctx.Types.inferScalarType(&R);
1744 Type *RetTy = VF.isVector() ? toVectorizedTy(ScalarRetTy, VF) : ScalarRetTy;
1745 SmallVector<Type *> ParamTys;
1746 for (const VPValue *Op : Operands) {
1747 ParamTys.push_back(VF.isVector()
1749 : Ctx.Types.inferScalarType(Op));
1750 }
1751
1752 // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
1753 FastMathFlags FMF =
1754 R.hasFastMathFlags() ? R.getFastMathFlags() : FastMathFlags();
1755 IntrinsicCostAttributes CostAttrs(
1756 ID, RetTy, Arguments, ParamTys, FMF,
1757 dyn_cast_or_null<IntrinsicInst>(R.getUnderlyingValue()),
1759 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);
1760}
1761
1763 VPCostContext &Ctx) const {
1765 return getCostForIntrinsics(VectorIntrinsicID, ArgOps, *this, VF, Ctx);
1766}
1767
1769 return Intrinsic::getBaseName(VectorIntrinsicID);
1770}
1771
1773 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1774 return all_of(enumerate(operands()), [this, &Op](const auto &X) {
1775 auto [Idx, V] = X;
1777 Idx, nullptr);
1778 });
1779}
1780
1781#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1783 VPSlotTracker &SlotTracker) const {
1784 O << Indent << "WIDEN-INTRINSIC ";
1785 if (ResultTy->isVoidTy()) {
1786 O << "void ";
1787 } else {
1789 O << " = ";
1790 }
1791
1792 O << "call";
1793 printFlags(O);
1794 O << getIntrinsicName() << "(";
1795
1797 Op->printAsOperand(O, SlotTracker);
1798 });
1799 O << ")";
1800}
1801#endif
1802
1804 IRBuilderBase &Builder = State.Builder;
1805
1806 Value *Address = State.get(getOperand(0));
1807 Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
1808 VectorType *VTy = cast<VectorType>(Address->getType());
1809
1810 // The histogram intrinsic requires a mask even if the recipe doesn't;
1811 // if the mask operand was omitted then all lanes should be executed and
1812 // we just need to synthesize an all-true mask.
1813 Value *Mask = nullptr;
1814 if (VPValue *VPMask = getMask())
1815 Mask = State.get(VPMask);
1816 else
1817 Mask =
1818 Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));
1819
1820 // If this is a subtract, we want to invert the increment amount. We may
1821 // add a separate intrinsic in future, but for now we'll try this.
1822 if (Opcode == Instruction::Sub)
1823 IncAmt = Builder.CreateNeg(IncAmt);
1824 else
1825 assert(Opcode == Instruction::Add && "only add or sub supported for now");
1826
1827 State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1828 {VTy, IncAmt->getType()},
1829 {Address, IncAmt, Mask});
1830}
1831
1833 VPCostContext &Ctx) const {
1834 // FIXME: Take the gather and scatter into account as well. For now we're
1835 // generating the same cost as the fallback path, but we'll likely
1836 // need to create a new TTI method for determining the cost, including
1837 // whether we can use base + vec-of-smaller-indices or just
1838 // vec-of-pointers.
1839 assert(VF.isVector() && "Invalid VF for histogram cost");
1840 Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
1841 VPValue *IncAmt = getOperand(1);
1842 Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
1843 VectorType *VTy = VectorType::get(IncTy, VF);
1844
1845 // Assume that a non-constant update value (or a constant != 1) requires
1846 // a multiply, and add that into the cost.
1847 InstructionCost MulCost =
1848 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind);
1849 if (IncAmt->isLiveIn()) {
1850 ConstantInt *CI = dyn_cast<ConstantInt>(IncAmt->getLiveInIRValue());
1851
1852 if (CI && CI->getZExtValue() == 1)
1853 MulCost = TTI::TCC_Free;
1854 }
1855
1856 // Find the cost of the histogram operation itself.
1857 Type *PtrTy = VectorType::get(AddressTy, VF);
1858 Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
1859 IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
1861 {PtrTy, IncTy, MaskTy});
1862
1863 // Add the costs together with the add/sub operation.
1864 return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost +
1865 Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);
1866}
1867
1868#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1870 VPSlotTracker &SlotTracker) const {
1871 O << Indent << "WIDEN-HISTOGRAM buckets: ";
1873
1874 if (Opcode == Instruction::Sub)
1875 O << ", dec: ";
1876 else {
1877 assert(Opcode == Instruction::Add);
1878 O << ", inc: ";
1879 }
1881
1882 if (VPValue *Mask = getMask()) {
1883 O << ", mask: ";
1884 Mask->printAsOperand(O, SlotTracker);
1885 }
1886}
1887
1889 VPSlotTracker &SlotTracker) const {
1890 O << Indent << "WIDEN-SELECT ";
1892 O << " = select ";
1893 printFlags(O);
1895 O << ", ";
1897 O << ", ";
1899 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
1900}
1901#endif
1902
1904 // The condition can be loop invariant but still defined inside the
1905 // loop. This means that we can't just use the original 'cond' value.
1906 // We have to take the 'vectorized' value and pick the first lane.
1907 // Instcombine will make this a no-op.
1908 auto *InvarCond =
1909 isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
1910
1911 Value *Cond = InvarCond ? InvarCond : State.get(getCond());
1912 Value *Op0 = State.get(getOperand(1));
1913 Value *Op1 = State.get(getOperand(2));
1914 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
1915 State.set(this, Sel);
1916 if (auto *I = dyn_cast<Instruction>(Sel)) {
1917 if (isa<FPMathOperator>(I))
1918 applyFlags(*I);
1919 applyMetadata(*I);
1920 }
1921}
1922
1924 VPCostContext &Ctx) const {
1925 SelectInst *SI = cast<SelectInst>(getUnderlyingValue());
1926 bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
1927 Type *ScalarTy = Ctx.Types.inferScalarType(this);
1928 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1929
1930 VPValue *Op0, *Op1;
1931 using namespace llvm::VPlanPatternMatch;
1932 if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
1933 (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
1934 match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
1935 // select x, y, false --> x & y
1936 // select x, true, y --> x | y
1937 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
1938 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
1939
1941 if (all_of(operands(),
1942 [](VPValue *Op) { return Op->getUnderlyingValue(); }))
1943 Operands.append(SI->op_begin(), SI->op_end());
1944 bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));
1945 return Ctx.TTI.getArithmeticInstrCost(
1946 IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy,
1947 Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);
1948 }
1949
1950 Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1951 if (!ScalarCond)
1952 CondTy = VectorType::get(CondTy, VF);
1953
1955 if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
1956 Pred = Cmp->getPredicate();
1957 return Ctx.TTI.getCmpSelInstrCost(
1958 Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind,
1959 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, SI);
1960}
1961
1962VPIRFlags::FastMathFlagsTy::FastMathFlagsTy(const FastMathFlags &FMF) {
1963 AllowReassoc = FMF.allowReassoc();
1964 NoNaNs = FMF.noNaNs();
1965 NoInfs = FMF.noInfs();
1966 NoSignedZeros = FMF.noSignedZeros();
1967 AllowReciprocal = FMF.allowReciprocal();
1968 AllowContract = FMF.allowContract();
1969 ApproxFunc = FMF.approxFunc();
1970}
1971
1972#if !defined(NDEBUG)
1973bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
1974 switch (OpType) {
1975 case OperationType::OverflowingBinOp:
1976 return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
1977 Opcode == Instruction::Mul ||
1978 Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;
1979 case OperationType::Trunc:
1980 return Opcode == Instruction::Trunc;
1981 case OperationType::DisjointOp:
1982 return Opcode == Instruction::Or;
1983 case OperationType::PossiblyExactOp:
1984 return Opcode == Instruction::AShr;
1985 case OperationType::GEPOp:
1986 return Opcode == Instruction::GetElementPtr ||
1987 Opcode == VPInstruction::PtrAdd ||
1988 Opcode == VPInstruction::WidePtrAdd;
1989 case OperationType::FPMathOp:
1990 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
1991 Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||
1992 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
1993 Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
1994 Opcode == VPInstruction::WideIVStep ||
1997 case OperationType::NonNegOp:
1998 return Opcode == Instruction::ZExt;
1999 break;
2000 case OperationType::Cmp:
2001 return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;
2002 case OperationType::Other:
2003 return true;
2004 }
2005 llvm_unreachable("Unknown OperationType enum");
2006}
2007#endif
2008
2009#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2011 switch (OpType) {
2012 case OperationType::Cmp:
2014 break;
2015 case OperationType::DisjointOp:
2017 O << " disjoint";
2018 break;
2019 case OperationType::PossiblyExactOp:
2020 if (ExactFlags.IsExact)
2021 O << " exact";
2022 break;
2023 case OperationType::OverflowingBinOp:
2024 if (WrapFlags.HasNUW)
2025 O << " nuw";
2026 if (WrapFlags.HasNSW)
2027 O << " nsw";
2028 break;
2029 case OperationType::Trunc:
2030 if (TruncFlags.HasNUW)
2031 O << " nuw";
2032 if (TruncFlags.HasNSW)
2033 O << " nsw";
2034 break;
2035 case OperationType::FPMathOp:
2037 break;
2038 case OperationType::GEPOp:
2039 if (GEPFlags.isInBounds())
2040 O << " inbounds";
2042 O << " nusw";
2044 O << " nuw";
2045 break;
2046 case OperationType::NonNegOp:
2047 if (NonNegFlags.NonNeg)
2048 O << " nneg";
2049 break;
2050 case OperationType::Other:
2051 break;
2052 }
2053 O << " ";
2054}
2055#endif
2056
2058 auto &Builder = State.Builder;
2059 switch (Opcode) {
2060 case Instruction::Call:
2061 case Instruction::Br:
2062 case Instruction::PHI:
2063 case Instruction::GetElementPtr:
2064 case Instruction::Select:
2065 llvm_unreachable("This instruction is handled by a different recipe.");
2066 case Instruction::UDiv:
2067 case Instruction::SDiv:
2068 case Instruction::SRem:
2069 case Instruction::URem:
2070 case Instruction::Add:
2071 case Instruction::FAdd:
2072 case Instruction::Sub:
2073 case Instruction::FSub:
2074 case Instruction::FNeg:
2075 case Instruction::Mul:
2076 case Instruction::FMul:
2077 case Instruction::FDiv:
2078 case Instruction::FRem:
2079 case Instruction::Shl:
2080 case Instruction::LShr:
2081 case Instruction::AShr:
2082 case Instruction::And:
2083 case Instruction::Or:
2084 case Instruction::Xor: {
2085 // Just widen unops and binops.
2087 for (VPValue *VPOp : operands())
2088 Ops.push_back(State.get(VPOp));
2089
2090 Value *V = Builder.CreateNAryOp(Opcode, Ops);
2091
2092 if (auto *VecOp = dyn_cast<Instruction>(V)) {
2093 applyFlags(*VecOp);
2094 applyMetadata(*VecOp);
2095 }
2096
2097 // Use this vector value for all users of the original instruction.
2098 State.set(this, V);
2099 break;
2100 }
2101 case Instruction::ExtractValue: {
2102 assert(getNumOperands() == 2 && "expected single level extractvalue");
2103 Value *Op = State.get(getOperand(0));
2104 auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
2105 Value *Extract = Builder.CreateExtractValue(Op, CI->getZExtValue());
2106 State.set(this, Extract);
2107 break;
2108 }
2109 case Instruction::Freeze: {
2110 Value *Op = State.get(getOperand(0));
2111 Value *Freeze = Builder.CreateFreeze(Op);
2112 State.set(this, Freeze);
2113 break;
2114 }
2115 case Instruction::ICmp:
2116 case Instruction::FCmp: {
2117 // Widen compares. Generate vector compares.
2118 bool FCmp = Opcode == Instruction::FCmp;
2119 Value *A = State.get(getOperand(0));
2120 Value *B = State.get(getOperand(1));
2121 Value *C = nullptr;
2122 if (FCmp) {
2123 // Propagate fast math flags.
2124 C = Builder.CreateFCmpFMF(
2125 getPredicate(), A, B,
2126 dyn_cast_or_null<Instruction>(getUnderlyingValue()));
2127 } else {
2128 C = Builder.CreateICmp(getPredicate(), A, B);
2129 }
2130 if (auto *I = dyn_cast<Instruction>(C))
2131 applyMetadata(*I);
2132 State.set(this, C);
2133 break;
2134 }
2135 default:
2136 // This instruction is not vectorized by simple widening.
2137 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
2138 << Instruction::getOpcodeName(Opcode));
2139 llvm_unreachable("Unhandled instruction!");
2140 } // end of switch.
2141
2142#if !defined(NDEBUG)
2143 // Verify that VPlan type inference results agree with the type of the
2144 // generated values.
2146 State.get(this)->getType() &&
2147 "inferred type and type from generated instructions do not match");
2148#endif
2149}
2150
2152 VPCostContext &Ctx) const {
2153 switch (Opcode) {
2154 case Instruction::UDiv:
2155 case Instruction::SDiv:
2156 case Instruction::SRem:
2157 case Instruction::URem:
2158 // If the div/rem operation isn't safe to speculate and requires
2159 // predication, then the only way we can even create a vplan is to insert
2160 // a select on the second input operand to ensure we use the value of 1
2161 // for the inactive lanes. The select will be costed separately.
2162 case Instruction::FNeg:
2163 case Instruction::Add:
2164 case Instruction::FAdd:
2165 case Instruction::Sub:
2166 case Instruction::FSub:
2167 case Instruction::Mul:
2168 case Instruction::FMul:
2169 case Instruction::FDiv:
2170 case Instruction::FRem:
2171 case Instruction::Shl:
2172 case Instruction::LShr:
2173 case Instruction::AShr:
2174 case Instruction::And:
2175 case Instruction::Or:
2176 case Instruction::Xor:
2177 case Instruction::Freeze:
2178 case Instruction::ExtractValue:
2179 case Instruction::ICmp:
2180 case Instruction::FCmp:
2181 return *getCostForRecipeWithOpcode(getOpcode(), VF, Ctx);
2182 default:
2183 llvm_unreachable("Unsupported opcode for instruction");
2184 }
2185}
2186
2187#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2189 VPSlotTracker &SlotTracker) const {
2190 O << Indent << "WIDEN ";
2192 O << " = " << Instruction::getOpcodeName(Opcode);
2193 printFlags(O);
2195}
2196#endif
2197
2199 auto &Builder = State.Builder;
2200 /// Vectorize casts.
2201 assert(State.VF.isVector() && "Not vectorizing?");
2202 Type *DestTy = VectorType::get(getResultType(), State.VF);
2203 VPValue *Op = getOperand(0);
2204 Value *A = State.get(Op);
2205 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
2206 State.set(this, Cast);
2207 if (auto *CastOp = dyn_cast<Instruction>(Cast)) {
2208 applyFlags(*CastOp);
2209 applyMetadata(*CastOp);
2210 }
2211}
2212
2214 VPCostContext &Ctx) const {
2215 // TODO: In some cases, VPWidenCastRecipes are created but not considered in
2216 // the legacy cost model, including truncates/extends when evaluating a
2217 // reduction in a smaller type.
2218 if (!getUnderlyingValue())
2219 return 0;
2220 // Computes the CastContextHint from a recipes that may access memory.
2221 auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
2222 if (VF.isScalar())
2224 if (isa<VPInterleaveRecipe>(R))
2226 if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
2227 return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
2229 const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
2230 if (WidenMemoryRecipe == nullptr)
2232 if (!WidenMemoryRecipe->isConsecutive())
2234 if (WidenMemoryRecipe->isReverse())
2236 if (WidenMemoryRecipe->isMasked())
2239 };
2240
2241 VPValue *Operand = getOperand(0);
2243 // For Trunc/FPTrunc, get the context from the only user.
2244 if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
2246 if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
2247 CCH = ComputeCCH(StoreRecipe);
2248 }
2249 // For Z/Sext, get the context from the operand.
2250 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
2251 Opcode == Instruction::FPExt) {
2252 if (Operand->isLiveIn())
2254 else if (Operand->getDefiningRecipe())
2255 CCH = ComputeCCH(Operand->getDefiningRecipe());
2256 }
2257
2258 auto *SrcTy =
2259 cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
2260 auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
2261 // Arm TTI will use the underlying instruction to determine the cost.
2262 return Ctx.TTI.getCastInstrCost(
2263 Opcode, DestTy, SrcTy, CCH, Ctx.CostKind,
2264 dyn_cast_if_present<Instruction>(getUnderlyingValue()));
2265}
2266
2267#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2269 VPSlotTracker &SlotTracker) const {
2270 O << Indent << "WIDEN-CAST ";
2272 O << " = " << Instruction::getOpcodeName(Opcode);
2273 printFlags(O);
2275 O << " to " << *getResultType();
2276}
2277#endif
2278
2280 VPCostContext &Ctx) const {
2281 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
2282}
2283
2284/// A helper function that returns an integer or floating-point constant with
2285/// value C.
2287 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
2288 : ConstantFP::get(Ty, C);
2289}
2290
2291#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2293 VPSlotTracker &SlotTracker) const {
2294 O << Indent;
2296 O << " = WIDEN-INDUCTION ";
2298
2299 if (auto *TI = getTruncInst())
2300 O << " (truncated to " << *TI->getType() << ")";
2301}
2302#endif
2303
2305 // The step may be defined by a recipe in the preheader (e.g. if it requires
2306 // SCEV expansion), but for the canonical induction the step is required to be
2307 // 1, which is represented as live-in.
2309 return false;
2310 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
2311 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
2312 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
2313 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
2314 getScalarType() == CanIV->getScalarType();
2315}
2316
2317#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2319 VPSlotTracker &SlotTracker) const {
2320 O << Indent;
2322 O << " = DERIVED-IV ";
2324 O << " + ";
2326 O << " * ";
2328}
2329#endif
2330
2332 // Fast-math-flags propagate from the original induction instruction.
2334 if (hasFastMathFlags())
2336
2337 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
2338 /// variable on which to base the steps, \p Step is the size of the step.
2339
2340 Value *BaseIV = State.get(getOperand(0), VPLane(0));
2341 Value *Step = State.get(getStepValue(), VPLane(0));
2342 IRBuilderBase &Builder = State.Builder;
2343
2344 // Ensure step has the same type as that of scalar IV.
2345 Type *BaseIVTy = BaseIV->getType()->getScalarType();
2346 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
2347
2348 // We build scalar steps for both integer and floating-point induction
2349 // variables. Here, we determine the kind of arithmetic we will perform.
2352 if (BaseIVTy->isIntegerTy()) {
2353 AddOp = Instruction::Add;
2354 MulOp = Instruction::Mul;
2355 } else {
2356 AddOp = InductionOpcode;
2357 MulOp = Instruction::FMul;
2358 }
2359
2360 // Determine the number of scalars we need to generate for each unroll
2361 // iteration.
2362 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
2363 // Compute the scalar steps and save the results in State.
2364 Type *IntStepTy =
2365 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
2366 Type *VecIVTy = nullptr;
2367 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
2368 if (!FirstLaneOnly && State.VF.isScalable()) {
2369 VecIVTy = VectorType::get(BaseIVTy, State.VF);
2370 UnitStepVec =
2371 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
2372 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
2373 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
2374 }
2375
2376 unsigned StartLane = 0;
2377 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
2378 if (State.Lane) {
2379 StartLane = State.Lane->getKnownLane();
2380 EndLane = StartLane + 1;
2381 }
2382 Value *StartIdx0;
2383 if (getUnrollPart(*this) == 0)
2384 StartIdx0 = ConstantInt::get(IntStepTy, 0);
2385 else {
2386 StartIdx0 = State.get(getOperand(2), true);
2387 if (getUnrollPart(*this) != 1) {
2388 StartIdx0 =
2389 Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->getType(),
2390 getUnrollPart(*this)));
2391 }
2392 StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);
2393 }
2394
2395 if (!FirstLaneOnly && State.VF.isScalable()) {
2396 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
2397 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
2398 if (BaseIVTy->isFloatingPointTy())
2399 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
2400 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
2401 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
2402 State.set(this, Add);
2403 // It's useful to record the lane values too for the known minimum number
2404 // of elements so we do those below. This improves the code quality when
2405 // trying to extract the first element, for example.
2406 }
2407
2408 if (BaseIVTy->isFloatingPointTy())
2409 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
2410
2411 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
2412 Value *StartIdx = Builder.CreateBinOp(
2413 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
2414 // The step returned by `createStepForVF` is a runtime-evaluated value
2415 // when VF is scalable. Otherwise, it should be folded into a Constant.
2416 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
2417 "Expected StartIdx to be folded to a constant when VF is not "
2418 "scalable");
2419 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
2420 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
2421 State.set(this, Add, VPLane(Lane));
2422 }
2423}
2424
2425#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2427 VPSlotTracker &SlotTracker) const {
2428 O << Indent;
2430 O << " = SCALAR-STEPS ";
2432}
2433#endif
2434
2436 assert(State.VF.isVector() && "not widening");
2437 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
2438 // Construct a vector GEP by widening the operands of the scalar GEP as
2439 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
2440 // results in a vector of pointers when at least one operand of the GEP
2441 // is vector-typed. Thus, to keep the representation compact, we only use
2442 // vector-typed operands for loop-varying values.
2443
2444 if (areAllOperandsInvariant()) {
2445 // If we are vectorizing, but the GEP has only loop-invariant operands,
2446 // the GEP we build (by only using vector-typed operands for
2447 // loop-varying values) would be a scalar pointer. Thus, to ensure we
2448 // produce a vector of pointers, we need to either arbitrarily pick an
2449 // operand to broadcast, or broadcast a clone of the original GEP.
2450 // Here, we broadcast a clone of the original.
2451 //
2452 // TODO: If at some point we decide to scalarize instructions having
2453 // loop-invariant operands, this special case will no longer be
2454 // required. We would add the scalarization decision to
2455 // collectLoopScalars() and teach getVectorValue() to broadcast
2456 // the lane-zero scalar value.
2458 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
2459 Ops.push_back(State.get(getOperand(I), VPLane(0)));
2460
2461 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
2462 ArrayRef(Ops).drop_front(), "",
2464 Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
2465 State.set(this, Splat);
2466 } else {
2467 // If the GEP has at least one loop-varying operand, we are sure to
2468 // produce a vector of pointers unless VF is scalar.
2469 // The pointer operand of the new GEP. If it's loop-invariant, we
2470 // won't broadcast it.
2471 auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
2472 : State.get(getOperand(0));
2473
2474 // Collect all the indices for the new GEP. If any index is
2475 // loop-invariant, we won't broadcast it.
2477 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
2478 VPValue *Operand = getOperand(I);
2479 if (isIndexLoopInvariant(I - 1))
2480 Indices.push_back(State.get(Operand, VPLane(0)));
2481 else
2482 Indices.push_back(State.get(Operand));
2483 }
2484
2485 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
2486 // but it should be a vector, otherwise.
2487 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
2488 Indices, "", getGEPNoWrapFlags());
2489 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
2490 "NewGEP is not a pointer vector");
2491 State.set(this, NewGEP);
2492 }
2493}
2494
2495#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2497 VPSlotTracker &SlotTracker) const {
2498 O << Indent << "WIDEN-GEP ";
2499 O << (isPointerLoopInvariant() ? "Inv" : "Var");
2500 for (size_t I = 0; I < getNumOperands() - 1; ++I)
2501 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
2502
2503 O << " ";
2505 O << " = getelementptr";
2506 printFlags(O);
2508}
2509#endif
2510
2511static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride,
2512 unsigned CurrentPart, IRBuilderBase &Builder) {
2513 // Use i32 for the gep index type when the value is constant,
2514 // or query DataLayout for a more suitable index type otherwise.
2515 const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2516 return !IsUnitStride || (IsScalable && (IsReverse || CurrentPart > 0))
2517 ? DL.getIndexType(Builder.getPtrTy(0))
2518 : Builder.getInt32Ty();
2519}
2520
2522 auto &Builder = State.Builder;
2523 unsigned CurrentPart = getUnrollPart(*this);
2524 bool IsUnitStride = Stride == 1 || Stride == -1;
2525 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
2526 IsUnitStride, CurrentPart, Builder);
2527
2528 // The wide store needs to start at the last vector element.
2529 Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
2530 if (IndexTy != RunTimeVF->getType())
2531 RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
2532 // NumElt = Stride * CurrentPart * RunTimeVF
2533 Value *NumElt = Builder.CreateMul(
2534 ConstantInt::get(IndexTy, Stride * (int64_t)CurrentPart), RunTimeVF);
2535 // LastLane = Stride * (RunTimeVF - 1)
2536 Value *LastLane = Builder.CreateSub(RunTimeVF, ConstantInt::get(IndexTy, 1));
2537 if (Stride != 1)
2538 LastLane = Builder.CreateMul(ConstantInt::get(IndexTy, Stride), LastLane);
2539 Value *Ptr = State.get(getOperand(0), VPLane(0));
2540 Value *ResultPtr =
2541 Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
2542 ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
2544
2545 State.set(this, ResultPtr, /*IsScalar*/ true);
2546}
2547
2548#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2550 VPSlotTracker &SlotTracker) const {
2551 O << Indent;
2553 O << " = vector-end-pointer";
2554 printFlags(O);
2556}
2557#endif
2558
2560 auto &Builder = State.Builder;
2561 unsigned CurrentPart = getUnrollPart(*this);
2562 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2563 /*IsUnitStride*/ true, CurrentPart, Builder);
2564 Value *Ptr = State.get(getOperand(0), VPLane(0));
2565
2566 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2567 Value *ResultPtr =
2568 Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
2569
2570 State.set(this, ResultPtr, /*IsScalar*/ true);
2571}
2572
2573#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2575 VPSlotTracker &SlotTracker) const {
2576 O << Indent;
2578 O << " = vector-pointer ";
2579
2581}
2582#endif
2583
2585 VPCostContext &Ctx) const {
2586 // Handle cases where only the first lane is used the same way as the legacy
2587 // cost model.
2589 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
2590
2591 Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
2592 Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2593 return (getNumIncomingValues() - 1) *
2594 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2596}
2597
2598#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2600 VPSlotTracker &SlotTracker) const {
2601 O << Indent << "BLEND ";
2603 O << " =";
2604 if (getNumIncomingValues() == 1) {
2605 // Not a User of any mask: not really blending, this is a
2606 // single-predecessor phi.
2607 O << " ";
2609 } else {
2610 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
2611 O << " ";
2613 if (I == 0)
2614 continue;
2615 O << "/";
2617 }
2618 }
2619}
2620#endif
2621
2623 assert(!State.Lane && "Reduction being replicated.");
2624 Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2627 "In-loop AnyOf reductions aren't currently supported");
2628 // Propagate the fast-math flags carried by the underlying instruction.
2631 Value *NewVecOp = State.get(getVecOp());
2632 if (VPValue *Cond = getCondOp()) {
2633 Value *NewCond = State.get(Cond, State.VF.isScalar());
2634 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
2635 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
2636
2637 Value *Start = getRecurrenceIdentity(Kind, ElementTy, getFastMathFlags());
2638 if (State.VF.isVector())
2639 Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
2640
2641 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
2642 NewVecOp = Select;
2643 }
2644 Value *NewRed;
2645 Value *NextInChain;
2646 if (IsOrdered) {
2647 if (State.VF.isVector())
2648 NewRed =
2649 createOrderedReduction(State.Builder, Kind, NewVecOp, PrevInChain);
2650 else
2651 NewRed = State.Builder.CreateBinOp(
2653 PrevInChain, NewVecOp);
2654 PrevInChain = NewRed;
2655 NextInChain = NewRed;
2656 } else {
2657 PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2658 NewRed = createSimpleReduction(State.Builder, NewVecOp, Kind);
2660 NextInChain = createMinMaxOp(State.Builder, Kind, NewRed, PrevInChain);
2661 else
2662 NextInChain = State.Builder.CreateBinOp(
2664 PrevInChain, NewRed);
2665 }
2666 State.set(this, NextInChain, /*IsScalar*/ true);
2667}
2668
2670 assert(!State.Lane && "Reduction being replicated.");
2671
2672 auto &Builder = State.Builder;
2673 // Propagate the fast-math flags carried by the underlying instruction.
2674 IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
2676
2678 Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
2679 Value *VecOp = State.get(getVecOp());
2680 Value *EVL = State.get(getEVL(), VPLane(0));
2681
2682 Value *Mask;
2683 if (VPValue *CondOp = getCondOp())
2684 Mask = State.get(CondOp);
2685 else
2686 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2687
2688 Value *NewRed;
2689 if (isOrdered()) {
2690 NewRed = createOrderedReduction(Builder, Kind, VecOp, Prev, Mask, EVL);
2691 } else {
2692 NewRed = createSimpleReduction(Builder, VecOp, Kind, Mask, EVL);
2694 NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
2695 else
2696 NewRed = Builder.CreateBinOp(
2698 Prev);
2699 }
2700 State.set(this, NewRed, /*IsScalar*/ true);
2701}
2702
2704 VPCostContext &Ctx) const {
2705 RecurKind RdxKind = getRecurrenceKind();
2706 Type *ElementTy = Ctx.Types.inferScalarType(this);
2707 auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
2708 unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);
2710 std::optional<FastMathFlags> OptionalFMF =
2711 ElementTy->isFloatingPointTy() ? std::make_optional(FMFs) : std::nullopt;
2712
2713 // TODO: Support any-of reductions.
2714 assert(
2716 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2717 "Any-of reduction not implemented in VPlan-based cost model currently.");
2718
2719 // Note that TTI should model the cost of moving result to the scalar register
2720 // and the BinOp cost in the getMinMaxReductionCost().
2723 return Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind);
2724 }
2725
2726 // Note that TTI should model the cost of moving result to the scalar register
2727 // and the BinOp cost in the getArithmeticReductionCost().
2728 return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, OptionalFMF,
2729 Ctx.CostKind);
2730}
2731
2733 ExpressionTypes ExpressionType,
2734 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes)
2735 : VPSingleDefRecipe(VPDef::VPExpressionSC, {}, {}),
2736 ExpressionRecipes(SetVector<VPSingleDefRecipe *>(
2737 ExpressionRecipes.begin(), ExpressionRecipes.end())
2738 .takeVector()),
2739 ExpressionType(ExpressionType) {
2740 assert(!ExpressionRecipes.empty() && "Nothing to combine?");
2741 assert(
2742 none_of(ExpressionRecipes,
2743 [](VPSingleDefRecipe *R) { return R->mayHaveSideEffects(); }) &&
2744 "expression cannot contain recipes with side-effects");
2745
2746 // Maintain a copy of the expression recipes as a set of users.
2747 SmallPtrSet<VPUser *, 4> ExpressionRecipesAsSetOfUsers;
2748 for (auto *R : ExpressionRecipes)
2749 ExpressionRecipesAsSetOfUsers.insert(R);
2750
2751 // Recipes in the expression, except the last one, must only be used by
2752 // (other) recipes inside the expression. If there are other users, external
2753 // to the expression, use a clone of the recipe for external users.
2754 for (VPSingleDefRecipe *R : ExpressionRecipes) {
2755 if (R != ExpressionRecipes.back() &&
2756 any_of(R->users(), [&ExpressionRecipesAsSetOfUsers](VPUser *U) {
2757 return !ExpressionRecipesAsSetOfUsers.contains(U);
2758 })) {
2759 // There are users outside of the expression. Clone the recipe and use the
2760 // clone those external users.
2761 VPSingleDefRecipe *CopyForExtUsers = R->clone();
2762 R->replaceUsesWithIf(CopyForExtUsers, [&ExpressionRecipesAsSetOfUsers](
2763 VPUser &U, unsigned) {
2764 return !ExpressionRecipesAsSetOfUsers.contains(&U);
2765 });
2766 CopyForExtUsers->insertBefore(R);
2767 }
2768 if (R->getParent())
2769 R->removeFromParent();
2770 }
2771
2772 // Internalize all external operands to the expression recipes. To do so,
2773 // create new temporary VPValues for all operands defined by a recipe outside
2774 // the expression. The original operands are added as operands of the
2775 // VPExpressionRecipe itself.
2776 for (auto *R : ExpressionRecipes) {
2777 for (const auto &[Idx, Op] : enumerate(R->operands())) {
2778 auto *Def = Op->getDefiningRecipe();
2779 if (Def && ExpressionRecipesAsSetOfUsers.contains(Def))
2780 continue;
2781 addOperand(Op);
2782 LiveInPlaceholders.push_back(new VPValue());
2783 R->setOperand(Idx, LiveInPlaceholders.back());
2784 }
2785 }
2786}
2787
2789 for (auto *R : ExpressionRecipes)
2790 R->insertBefore(this);
2791
2792 for (const auto &[Idx, Op] : enumerate(operands()))
2793 LiveInPlaceholders[Idx]->replaceAllUsesWith(Op);
2794
2795 replaceAllUsesWith(ExpressionRecipes.back());
2796 ExpressionRecipes.clear();
2797}
2798
2800 VPCostContext &Ctx) const {
2801 Type *RedTy = Ctx.Types.inferScalarType(this);
2802 auto *SrcVecTy = cast<VectorType>(
2804 assert(RedTy->isIntegerTy() &&
2805 "VPExpressionRecipe only supports integer types currently.");
2806 switch (ExpressionType) {
2807 case ExpressionTypes::ExtendedReduction: {
2808 unsigned Opcode = RecurrenceDescriptor::getOpcode(
2809 cast<VPReductionRecipe>(ExpressionRecipes[1])->getRecurrenceKind());
2810 return Ctx.TTI.getExtendedReductionCost(
2811 Opcode,
2812 cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
2813 Instruction::ZExt,
2814 RedTy, SrcVecTy, std::nullopt, Ctx.CostKind);
2815 }
2816 case ExpressionTypes::MulAccReduction:
2817 return Ctx.TTI.getMulAccReductionCost(false, RedTy, SrcVecTy, Ctx.CostKind);
2818
2819 case ExpressionTypes::ExtMulAccReduction:
2820 return Ctx.TTI.getMulAccReductionCost(
2821 cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
2822 Instruction::ZExt,
2823 RedTy, SrcVecTy, Ctx.CostKind);
2824 }
2825 llvm_unreachable("Unknown VPExpressionRecipe::ExpressionTypes enum");
2826}
2827
2829 return any_of(ExpressionRecipes, [](VPSingleDefRecipe *R) {
2830 return R->mayReadFromMemory() || R->mayWriteToMemory();
2831 });
2832}
2833
2835 assert(
2836 none_of(ExpressionRecipes,
2837 [](VPSingleDefRecipe *R) { return R->mayHaveSideEffects(); }) &&
2838 "expression cannot contain recipes with side-effects");
2839 return false;
2840}
2841
2842#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2843
2845 VPSlotTracker &SlotTracker) const {
2846 O << Indent << "EXPRESSION ";
2848 O << " = ";
2849 auto *Red = cast<VPReductionRecipe>(ExpressionRecipes.back());
2850 unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
2851
2852 switch (ExpressionType) {
2853 case ExpressionTypes::ExtendedReduction: {
2855 O << " +";
2856 O << " reduce." << Instruction::getOpcodeName(Opcode) << " (";
2858 Red->printFlags(O);
2859
2860 auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
2861 O << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "
2862 << *Ext0->getResultType();
2863 if (Red->isConditional()) {
2864 O << ", ";
2865 Red->getCondOp()->printAsOperand(O, SlotTracker);
2866 }
2867 O << ")";
2868 break;
2869 }
2870 case ExpressionTypes::MulAccReduction:
2871 case ExpressionTypes::ExtMulAccReduction: {
2873 O << " + ";
2874 O << "reduce."
2876 RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
2877 << " (";
2878 O << "mul";
2879 bool IsExtended = ExpressionType == ExpressionTypes::ExtMulAccReduction;
2880 auto *Mul = cast<VPWidenRecipe>(IsExtended ? ExpressionRecipes[2]
2881 : ExpressionRecipes[0]);
2882 Mul->printFlags(O);
2883 if (IsExtended)
2884 O << "(";
2886 if (IsExtended) {
2887 auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
2888 O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "
2889 << *Ext0->getResultType() << "), (";
2890 } else {
2891 O << ", ";
2892 }
2894 if (IsExtended) {
2895 auto *Ext1 = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);
2896 O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to "
2897 << *Ext1->getResultType() << ")";
2898 }
2899 if (Red->isConditional()) {
2900 O << ", ";
2901 Red->getCondOp()->printAsOperand(O, SlotTracker);
2902 }
2903 O << ")";
2904 break;
2905 }
2906 }
2907}
2908
2910 VPSlotTracker &SlotTracker) const {
2911 O << Indent << "REDUCE ";
2913 O << " = ";
2915 O << " +";
2916 printFlags(O);
2917 O << " reduce."
2920 << " (";
2922 if (isConditional()) {
2923 O << ", ";
2925 }
2926 O << ")";
2927}
2928
2930 VPSlotTracker &SlotTracker) const {
2931 O << Indent << "REDUCE ";
2933 O << " = ";
2935 O << " +";
2936 printFlags(O);
2937 O << " vp.reduce."
2940 << " (";
2942 O << ", ";
2944 if (isConditional()) {
2945 O << ", ";
2947 }
2948 O << ")";
2949}
2950
2951#endif
2952
2953/// A helper function to scalarize a single Instruction in the innermost loop.
2954/// Generates a sequence of scalar instances for lane \p Lane. Uses the VPValue
2955/// operands from \p RepRecipe instead of \p Instr's operands.
2956static void scalarizeInstruction(const Instruction *Instr,
2957 VPReplicateRecipe *RepRecipe,
2958 const VPLane &Lane, VPTransformState &State) {
2959 assert((!Instr->getType()->isAggregateType() ||
2960 canVectorizeTy(Instr->getType())) &&
2961 "Expected vectorizable or non-aggregate type.");
2962
2963 // Does this instruction return a value ?
2964 bool IsVoidRetTy = Instr->getType()->isVoidTy();
2965
2966 Instruction *Cloned = Instr->clone();
2967 if (!IsVoidRetTy) {
2968 Cloned->setName(Instr->getName() + ".cloned");
2969 Type *ResultTy = State.TypeAnalysis.inferScalarType(RepRecipe);
2970 // The operands of the replicate recipe may have been narrowed, resulting in
2971 // a narrower result type. Update the type of the cloned instruction to the
2972 // correct type.
2973 if (ResultTy != Cloned->getType())
2974 Cloned->mutateType(ResultTy);
2975 }
2976
2977 RepRecipe->applyFlags(*Cloned);
2978 RepRecipe->applyMetadata(*Cloned);
2979
2980 if (RepRecipe->hasPredicate())
2981 cast<CmpInst>(Cloned)->setPredicate(RepRecipe->getPredicate());
2982
2983 if (auto DL = RepRecipe->getDebugLoc())
2984 State.setDebugLocFrom(DL);
2985
2986 // Replace the operands of the cloned instructions with their scalar
2987 // equivalents in the new loop.
2988 for (const auto &I : enumerate(RepRecipe->operands())) {
2989 auto InputLane = Lane;
2990 VPValue *Operand = I.value();
2991 if (vputils::isSingleScalar(Operand))
2992 InputLane = VPLane::getFirstLane();
2993 Cloned->setOperand(I.index(), State.get(Operand, InputLane));
2994 }
2995
2996 // Place the cloned scalar in the new loop.
2997 State.Builder.Insert(Cloned);
2998
2999 State.set(RepRecipe, Cloned, Lane);
3000
3001 // If we just cloned a new assumption, add it the assumption cache.
3002 if (auto *II = dyn_cast<AssumeInst>(Cloned))
3003 State.AC->registerAssumption(II);
3004
3005 assert(
3006 (RepRecipe->getParent()->getParent() ||
3007 !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
3008 all_of(RepRecipe->operands(),
3009 [](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
3010 "Expected a recipe is either within a region or all of its operands "
3011 "are defined outside the vectorized region.");
3012}
3013
3016
3017 if (!State.Lane) {
3018 assert(IsSingleScalar && "VPReplicateRecipes outside replicate regions "
3019 "must have already been unrolled");
3020 scalarizeInstruction(UI, this, VPLane(0), State);
3021 return;
3022 }
3023
3024 assert((State.VF.isScalar() || !isSingleScalar()) &&
3025 "uniform recipe shouldn't be predicated");
3026 assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
3027 scalarizeInstruction(UI, this, *State.Lane, State);
3028 // Insert scalar instance packing it into a vector.
3029 if (State.VF.isVector() && shouldPack()) {
3030 Value *WideValue =
3031 State.Lane->isFirstLane()
3033 : State.get(this);
3034 State.set(this, State.packScalarIntoVectorizedValue(this, WideValue,
3035 *State.Lane));
3036 }
3037}
3038
3040 // Find if the recipe is used by a widened recipe via an intervening
3041 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
3042 return any_of(users(), [](const VPUser *U) {
3043 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
3044 return !vputils::onlyScalarValuesUsed(PredR);
3045 return false;
3046 });
3047}
3048
3050 VPCostContext &Ctx) const {
3051 Instruction *UI = cast<Instruction>(getUnderlyingValue());
3052 // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
3053 // transform, avoid computing their cost multiple times for now.
3054 Ctx.SkipCostComputation.insert(UI);
3055
3056 switch (UI->getOpcode()) {
3057 case Instruction::GetElementPtr:
3058 // We mark this instruction as zero-cost because the cost of GEPs in
3059 // vectorized code depends on whether the corresponding memory instruction
3060 // is scalarized or not. Therefore, we handle GEPs with the memory
3061 // instruction cost.
3062 return 0;
3063 case Instruction::Call: {
3064 auto *CalledFn =
3065 cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
3066
3069 for (const VPValue *ArgOp : ArgOps)
3070 Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
3071
3072 if (CalledFn->isIntrinsic())
3073 // Various pseudo-intrinsics with costs of 0 are scalarized instead of
3074 // vectorized via VPWidenIntrinsicRecipe. Return 0 for them early.
3075 switch (CalledFn->getIntrinsicID()) {
3076 case Intrinsic::assume:
3077 case Intrinsic::lifetime_end:
3078 case Intrinsic::lifetime_start:
3079 case Intrinsic::sideeffect:
3080 case Intrinsic::pseudoprobe:
3081 case Intrinsic::experimental_noalias_scope_decl: {
3082 assert(getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,
3083 ElementCount::getFixed(1), Ctx) == 0 &&
3084 "scalarizing intrinsic should be free");
3085 return InstructionCost(0);
3086 }
3087 default:
3088 break;
3089 }
3090
3091 Type *ResultTy = Ctx.Types.inferScalarType(this);
3092 InstructionCost ScalarCallCost =
3093 Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
3094 if (isSingleScalar()) {
3095 if (CalledFn->isIntrinsic())
3096 ScalarCallCost = std::min(
3097 ScalarCallCost,
3098 getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,
3099 ElementCount::getFixed(1), Ctx));
3100 return ScalarCallCost;
3101 }
3102
3103 if (VF.isScalable())
3105
3106 // Compute the cost of scalarizing the result and operands if needed.
3107 InstructionCost ScalarizationCost = 0;
3108 if (VF.isVector()) {
3109 if (!ResultTy->isVoidTy()) {
3110 for (Type *VectorTy :
3111 to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) {
3112 ScalarizationCost += Ctx.TTI.getScalarizationOverhead(
3113 cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getFixedValue()),
3114 /*Insert=*/true,
3115 /*Extract=*/false, Ctx.CostKind);
3116 }
3117 }
3118 // Skip operands that do not require extraction/scalarization and do not
3119 // incur any overhead.
3120 SmallPtrSet<const VPValue *, 4> UniqueOperands;
3121 Tys.clear();
3122 for (auto *Op : ArgOps) {
3123 if (Op->isLiveIn() || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
3124 !UniqueOperands.insert(Op).second)
3125 continue;
3127 }
3128 ScalarizationCost +=
3130 }
3131
3132 return ScalarCallCost * VF.getFixedValue() + ScalarizationCost;
3133 }
3134 case Instruction::Add:
3135 case Instruction::Sub:
3136 case Instruction::FAdd:
3137 case Instruction::FSub:
3138 case Instruction::Mul:
3139 case Instruction::FMul:
3140 case Instruction::FDiv:
3141 case Instruction::FRem:
3142 case Instruction::Shl:
3143 case Instruction::LShr:
3144 case Instruction::AShr:
3145 case Instruction::And:
3146 case Instruction::Or:
3147 case Instruction::Xor:
3148 case Instruction::ICmp:
3149 case Instruction::FCmp:
3151 Ctx) *
3152 (isSingleScalar() ? 1 : VF.getFixedValue());
3153 }
3154
3155 return Ctx.getLegacyCost(UI, VF);
3156}
3157
3158#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3160 VPSlotTracker &SlotTracker) const {
3161 O << Indent << (IsSingleScalar ? "CLONE " : "REPLICATE ");
3162
3163 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
3165 O << " = ";
3166 }
3167 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
3168 O << "call";
3169 printFlags(O);
3170 O << "@" << CB->getCalledFunction()->getName() << "(";
3172 O, [&O, &SlotTracker](VPValue *Op) {
3173 Op->printAsOperand(O, SlotTracker);
3174 });
3175 O << ")";
3176 } else {
3178 printFlags(O);
3180 }
3181
3182 if (shouldPack())
3183 O << " (S->V)";
3184}
3185#endif
3186
3188 assert(State.Lane && "Branch on Mask works only on single instance.");
3189
3190 VPValue *BlockInMask = getOperand(0);
3191 Value *ConditionBit = State.get(BlockInMask, *State.Lane);
3192
3193 // Replace the temporary unreachable terminator with a new conditional branch,
3194 // whose two destinations will be set later when they are created.
3195 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
3196 assert(isa<UnreachableInst>(CurrentTerminator) &&
3197 "Expected to replace unreachable terminator with conditional branch.");
3198 auto CondBr =
3199 State.Builder.CreateCondBr(ConditionBit, State.CFG.PrevBB, nullptr);
3200 CondBr->setSuccessor(0, nullptr);
3201 CurrentTerminator->eraseFromParent();
3202}
3203
3205 VPCostContext &Ctx) const {
3206 // The legacy cost model doesn't assign costs to branches for individual
3207 // replicate regions. Match the current behavior in the VPlan cost model for
3208 // now.
3209 return 0;
3210}
3211
3213 assert(State.Lane && "Predicated instruction PHI works per instance.");
3214 Instruction *ScalarPredInst =
3215 cast<Instruction>(State.get(getOperand(0), *State.Lane));
3216 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
3217 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
3218 assert(PredicatingBB && "Predicated block has no single predecessor.");
3219 assert(isa<VPReplicateRecipe>(getOperand(0)) &&
3220 "operand must be VPReplicateRecipe");
3221
3222 // By current pack/unpack logic we need to generate only a single phi node: if
3223 // a vector value for the predicated instruction exists at this point it means
3224 // the instruction has vector users only, and a phi for the vector value is
3225 // needed. In this case the recipe of the predicated instruction is marked to
3226 // also do that packing, thereby "hoisting" the insert-element sequence.
3227 // Otherwise, a phi node for the scalar value is needed.
3228 if (State.hasVectorValue(getOperand(0))) {
3229 Value *VectorValue = State.get(getOperand(0));
3230 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
3231 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
3232 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
3233 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
3234 if (State.hasVectorValue(this))
3235 State.reset(this, VPhi);
3236 else
3237 State.set(this, VPhi);
3238 // NOTE: Currently we need to update the value of the operand, so the next
3239 // predicated iteration inserts its generated value in the correct vector.
3240 State.reset(getOperand(0), VPhi);
3241 } else {
3242 if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())
3243 return;
3244
3245 Type *PredInstType = State.TypeAnalysis.inferScalarType(getOperand(0));
3246 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
3247 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
3248 PredicatingBB);
3249 Phi->addIncoming(ScalarPredInst, PredicatedBB);
3250 if (State.hasScalarValue(this, *State.Lane))
3251 State.reset(this, Phi, *State.Lane);
3252 else
3253 State.set(this, Phi, *State.Lane);
3254 // NOTE: Currently we need to update the value of the operand, so the next
3255 // predicated iteration inserts its generated value in the correct vector.
3256 State.reset(getOperand(0), Phi, *State.Lane);
3257 }
3258}
3259
3260#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3262 VPSlotTracker &SlotTracker) const {
3263 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
3265 O << " = ";
3267}
3268#endif
3269
3271 VPCostContext &Ctx) const {
3273 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3274 unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
3275 ->getAddressSpace();
3276 unsigned Opcode = isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this)
3277 ? Instruction::Load
3278 : Instruction::Store;
3279
3280 if (!Consecutive) {
3281 // TODO: Using the original IR may not be accurate.
3282 // Currently, ARM will use the underlying IR to calculate gather/scatter
3283 // instruction cost.
3284 assert(!Reverse &&
3285 "Inconsecutive memory access should not have the order.");
3286
3288 Type *PtrTy = Ptr->getType();
3289
3290 // If the address value is uniform across all lanes, then the address can be
3291 // calculated with scalar type and broadcast.
3293 PtrTy = toVectorTy(PtrTy, VF);
3294
3295 return Ctx.TTI.getAddressComputationCost(PtrTy, nullptr, nullptr,
3296 Ctx.CostKind) +
3297 Ctx.TTI.getGatherScatterOpCost(Opcode, Ty, Ptr, IsMasked, Alignment,
3298 Ctx.CostKind, &Ingredient);
3299 }
3300
3302 if (IsMasked) {
3303 Cost +=
3304 Ctx.TTI.getMaskedMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind);
3305 } else {
3307 isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this) ? getOperand(0)
3308 : getOperand(1));
3309 Cost += Ctx.TTI.getMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind,
3310 OpInfo, &Ingredient);
3311 }
3312 if (!Reverse)
3313 return Cost;
3314
3315 return Cost += Ctx.TTI.getShuffleCost(
3316 TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
3317 cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
3318}
3319
3321 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
3322 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
3323 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3324 bool CreateGather = !isConsecutive();
3325
3326 auto &Builder = State.Builder;
3327 Value *Mask = nullptr;
3328 if (auto *VPMask = getMask()) {
3329 // Mask reversal is only needed for non-all-one (null) masks, as reverse
3330 // of a null all-one mask is a null mask.
3331 Mask = State.get(VPMask);
3332 if (isReverse())
3333 Mask = Builder.CreateVectorReverse(Mask, "reverse");
3334 }
3335
3336 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
3337 Value *NewLI;
3338 if (CreateGather) {
3339 NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
3340 "wide.masked.gather");
3341 } else if (Mask) {
3342 NewLI =
3343 Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
3344 PoisonValue::get(DataTy), "wide.masked.load");
3345 } else {
3346 NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
3347 }
3348 applyMetadata(*cast<Instruction>(NewLI));
3349 if (Reverse)
3350 NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
3351 State.set(this, NewLI);
3352}
3353
3354#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3356 VPSlotTracker &SlotTracker) const {
3357 O << Indent << "WIDEN ";
3359 O << " = load ";
3361}
3362#endif
3363
3364/// Use all-true mask for reverse rather than actual mask, as it avoids a
3365/// dependence w/o affecting the result.
3367 Value *EVL, const Twine &Name) {
3368 VectorType *ValTy = cast<VectorType>(Operand->getType());
3369 Value *AllTrueMask =
3370 Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
3371 return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
3372 {Operand, AllTrueMask, EVL}, nullptr, Name);
3373}
3374
3376 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
3377 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
3378 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3379 bool CreateGather = !isConsecutive();
3380
3381 auto &Builder = State.Builder;
3382 CallInst *NewLI;
3383 Value *EVL = State.get(getEVL(), VPLane(0));
3384 Value *Addr = State.get(getAddr(), !CreateGather);
3385 Value *Mask = nullptr;
3386 if (VPValue *VPMask = getMask()) {
3387 Mask = State.get(VPMask);
3388 if (isReverse())
3389 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
3390 } else {
3391 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
3392 }
3393
3394 if (CreateGather) {
3395 NewLI =
3396 Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
3397 nullptr, "wide.masked.gather");
3398 } else {
3399 NewLI = Builder.CreateIntrinsic(DataTy, Intrinsic::vp_load,
3400 {Addr, Mask, EVL}, nullptr, "vp.op.load");
3401 }
3402 NewLI->addParamAttr(
3403 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
3404 applyMetadata(*NewLI);
3405 Instruction *Res = NewLI;
3406 if (isReverse())
3407 Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
3408 State.set(this, Res);
3409}
3410
3412 VPCostContext &Ctx) const {
3413 if (!Consecutive || IsMasked)
3414 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3415
3416 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
3417 // here because the EVL recipes using EVL to replace the tail mask. But in the
3418 // legacy model, it will always calculate the cost of mask.
3419 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
3420 // don't need to compare to the legacy cost model.
3422 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3423 unsigned AS = getLoadStoreAddressSpace(&Ingredient);
3425 Instruction::Load, Ty, Alignment, AS, Ctx.CostKind);
3426 if (!Reverse)
3427 return Cost;
3428
3429 return Cost + Ctx.TTI.getShuffleCost(
3430 TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
3431 cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
3432}
3433
3434#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3436 VPSlotTracker &SlotTracker) const {
3437 O << Indent << "WIDEN ";
3439 O << " = vp.load ";
3441}
3442#endif
3443
3445 VPValue *StoredVPValue = getStoredValue();
3446 bool CreateScatter = !isConsecutive();
3447 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3448
3449 auto &Builder = State.Builder;
3450
3451 Value *Mask = nullptr;
3452 if (auto *VPMask = getMask()) {
3453 // Mask reversal is only needed for non-all-one (null) masks, as reverse
3454 // of a null all-one mask is a null mask.
3455 Mask = State.get(VPMask);
3456 if (isReverse())
3457 Mask = Builder.CreateVectorReverse(Mask, "reverse");
3458 }
3459
3460 Value *StoredVal = State.get(StoredVPValue);
3461 if (isReverse()) {
3462 // If we store to reverse consecutive memory locations, then we need
3463 // to reverse the order of elements in the stored value.
3464 StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
3465 // We don't want to update the value in the map as it might be used in
3466 // another expression. So don't call resetVectorValue(StoredVal).
3467 }
3468 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
3469 Instruction *NewSI = nullptr;
3470 if (CreateScatter)
3471 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
3472 else if (Mask)
3473 NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
3474 else
3475 NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
3476 applyMetadata(*NewSI);
3477}
3478
3479#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3481 VPSlotTracker &SlotTracker) const {
3482 O << Indent << "WIDEN store ";
3484}
3485#endif
3486
3488 VPValue *StoredValue = getStoredValue();
3489 bool CreateScatter = !isConsecutive();
3490 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3491
3492 auto &Builder = State.Builder;
3493
3494 CallInst *NewSI = nullptr;
3495 Value *StoredVal = State.get(StoredValue);
3496 Value *EVL = State.get(getEVL(), VPLane(0));
3497 if (isReverse())
3498 StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
3499 Value *Mask = nullptr;
3500 if (VPValue *VPMask = getMask()) {
3501 Mask = State.get(VPMask);
3502 if (isReverse())
3503 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
3504 } else {
3505 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
3506 }
3507 Value *Addr = State.get(getAddr(), !CreateScatter);
3508 if (CreateScatter) {
3509 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
3510 Intrinsic::vp_scatter,
3511 {StoredVal, Addr, Mask, EVL});
3512 } else {
3513 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
3514 Intrinsic::vp_store,
3515 {StoredVal, Addr, Mask, EVL});
3516 }
3517 NewSI->addParamAttr(
3518 1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
3519 applyMetadata(*NewSI);
3520}
3521
3523 VPCostContext &Ctx) const {
3524 if (!Consecutive || IsMasked)
3525 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3526
3527 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
3528 // here because the EVL recipes using EVL to replace the tail mask. But in the
3529 // legacy model, it will always calculate the cost of mask.
3530 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
3531 // don't need to compare to the legacy cost model.
3533 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3534 unsigned AS = getLoadStoreAddressSpace(&Ingredient);
3536 Instruction::Store, Ty, Alignment, AS, Ctx.CostKind);
3537 if (!Reverse)
3538 return Cost;
3539
3540 return Cost + Ctx.TTI.getShuffleCost(
3541 TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
3542 cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
3543}
3544
3545#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3547 VPSlotTracker &SlotTracker) const {
3548 O << Indent << "WIDEN vp.store ";
3550}
3551#endif
3552
3554 VectorType *DstVTy, const DataLayout &DL) {
3555 // Verify that V is a vector type with same number of elements as DstVTy.
3556 auto VF = DstVTy->getElementCount();
3557 auto *SrcVecTy = cast<VectorType>(V->getType());
3558 assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
3559 Type *SrcElemTy = SrcVecTy->getElementType();
3560 Type *DstElemTy = DstVTy->getElementType();
3561 assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
3562 "Vector elements must have same size");
3563
3564 // Do a direct cast if element types are castable.
3565 if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
3566 return Builder.CreateBitOrPointerCast(V, DstVTy);
3567 }
3568 // V cannot be directly casted to desired vector type.
3569 // May happen when V is a floating point vector but DstVTy is a vector of
3570 // pointers or vice-versa. Handle this using a two-step bitcast using an
3571 // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
3572 assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
3573 "Only one type should be a pointer type");
3574 assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
3575 "Only one type should be a floating point type");
3576 Type *IntTy =
3577 IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
3578 auto *VecIntTy = VectorType::get(IntTy, VF);
3579 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
3580 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
3581}
3582
3583/// Return a vector containing interleaved elements from multiple
3584/// smaller input vectors.
3586 const Twine &Name) {
3587 unsigned Factor = Vals.size();
3588 assert(Factor > 1 && "Tried to interleave invalid number of vectors");
3589
3590 VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
3591#ifndef NDEBUG
3592 for (Value *Val : Vals)
3593 assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
3594#endif
3595
3596 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
3597 // must use intrinsics to interleave.
3598 if (VecTy->isScalableTy()) {
3599 assert(Factor <= 8 && "Unsupported interleave factor for scalable vectors");
3600 return Builder.CreateVectorInterleave(Vals, Name);
3601 }
3602
3603 // Fixed length. Start by concatenating all vectors into a wide vector.
3604 Value *WideVec = concatenateVectors(Builder, Vals);
3605
3606 // Interleave the elements into the wide vector.
3607 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
3608 return Builder.CreateShuffleVector(
3609 WideVec, createInterleaveMask(NumElts, Factor), Name);
3610}
3611
3612// Try to vectorize the interleave group that \p Instr belongs to.
3613//
3614// E.g. Translate following interleaved load group (factor = 3):
3615// for (i = 0; i < N; i+=3) {
3616// R = Pic[i]; // Member of index 0
3617// G = Pic[i+1]; // Member of index 1
3618// B = Pic[i+2]; // Member of index 2
3619// ... // do something to R, G, B
3620// }
3621// To:
3622// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
3623// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
3624// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
3625// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
3626//
3627// Or translate following interleaved store group (factor = 3):
3628// for (i = 0; i < N; i+=3) {
3629// ... do something to R, G, B
3630// Pic[i] = R; // Member of index 0
3631// Pic[i+1] = G; // Member of index 1
3632// Pic[i+2] = B; // Member of index 2
3633// }
3634// To:
3635// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
3636// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
3637// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
3638// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
3639// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
3641 assert(!State.Lane && "Interleave group being replicated.");
3642 assert((!NeedsMaskForGaps || !State.VF.isScalable()) &&
3643 "Masking gaps for scalable vectors is not yet supported.");
3644 const InterleaveGroup<Instruction> *Group = IG;
3645 Instruction *Instr = Group->getInsertPos();
3646
3647 // Prepare for the vector type of the interleaved load/store.
3648 Type *ScalarTy = getLoadStoreType(Instr);
3649 unsigned InterleaveFactor = Group->getFactor();
3650 auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
3651
3652 VPValue *BlockInMask = getMask();
3653 VPValue *Addr = getAddr();
3654 Value *ResAddr = State.get(Addr, VPLane(0));
3655
3656 auto CreateGroupMask = [&BlockInMask, &State,
3657 &InterleaveFactor](Value *MaskForGaps) -> Value * {
3658 if (State.VF.isScalable()) {
3659 assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
3660 assert(InterleaveFactor <= 8 &&
3661 "Unsupported deinterleave factor for scalable vectors");
3662 auto *ResBlockInMask = State.get(BlockInMask);
3663 SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
3664 return interleaveVectors(State.Builder, Ops, "interleaved.mask");
3665 }
3666
3667 if (!BlockInMask)
3668 return MaskForGaps;
3669
3670 Value *ResBlockInMask = State.get(BlockInMask);
3671 Value *ShuffledMask = State.Builder.CreateShuffleVector(
3672 ResBlockInMask,
3673 createReplicatedMask(InterleaveFactor, State.VF.getFixedValue()),
3674 "interleaved.mask");
3675 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
3676 ShuffledMask, MaskForGaps)
3677 : ShuffledMask;
3678 };
3679
3680 const DataLayout &DL = Instr->getDataLayout();
3681 // Vectorize the interleaved load group.
3682 if (isa<LoadInst>(Instr)) {
3683 Value *MaskForGaps = nullptr;
3684 if (NeedsMaskForGaps) {
3685 MaskForGaps =
3686 createBitMaskForGaps(State.Builder, State.VF.getFixedValue(), *Group);
3687 assert(MaskForGaps && "Mask for Gaps is required but it is null");
3688 }
3689
3690 Instruction *NewLoad;
3691 if (BlockInMask || MaskForGaps) {
3692 Value *GroupMask = CreateGroupMask(MaskForGaps);
3693 Value *PoisonVec = PoisonValue::get(VecTy);
3694 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
3695 Group->getAlign(), GroupMask,
3696 PoisonVec, "wide.masked.vec");
3697 } else
3698 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
3699 Group->getAlign(), "wide.vec");
3700 applyMetadata(*NewLoad);
3701 // TODO: Also manage existing metadata using VPIRMetadata.
3702 Group->addMetadata(NewLoad);
3703
3705 if (VecTy->isScalableTy()) {
3706 // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3707 // so must use intrinsics to deinterleave.
3708 assert(InterleaveFactor <= 8 &&
3709 "Unsupported deinterleave factor for scalable vectors");
3710 NewLoad = State.Builder.CreateIntrinsic(
3711 Intrinsic::getDeinterleaveIntrinsicID(InterleaveFactor),
3712 NewLoad->getType(), NewLoad,
3713 /*FMFSource=*/nullptr, "strided.vec");
3714 }
3715
3716 auto CreateStridedVector = [&InterleaveFactor, &State,
3717 &NewLoad](unsigned Index) -> Value * {
3718 assert(Index < InterleaveFactor && "Illegal group index");
3719 if (State.VF.isScalable())
3720 return State.Builder.CreateExtractValue(NewLoad, Index);
3721
3722 // For fixed length VF, use shuffle to extract the sub-vectors from the
3723 // wide load.
3724 auto StrideMask =
3725 createStrideMask(Index, InterleaveFactor, State.VF.getFixedValue());
3726 return State.Builder.CreateShuffleVector(NewLoad, StrideMask,
3727 "strided.vec");
3728 };
3729
3730 for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
3731 Instruction *Member = Group->getMember(I);
3732
3733 // Skip the gaps in the group.
3734 if (!Member)
3735 continue;
3736
3737 Value *StridedVec = CreateStridedVector(I);
3738
3739 // If this member has different type, cast the result type.
3740 if (Member->getType() != ScalarTy) {
3741 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3742 StridedVec =
3743 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3744 }
3745
3746 if (Group->isReverse())
3747 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3748
3749 State.set(VPDefs[J], StridedVec);
3750 ++J;
3751 }
3752 return;
3753 }
3754
3755 // The sub vector type for current instruction.
3756 auto *SubVT = VectorType::get(ScalarTy, State.VF);
3757
3758 // Vectorize the interleaved store group.
3759 Value *MaskForGaps =
3760 createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
3761 assert(((MaskForGaps != nullptr) == NeedsMaskForGaps) &&
3762 "Mismatch between NeedsMaskForGaps and MaskForGaps");
3763 ArrayRef<VPValue *> StoredValues = getStoredValues();
3764 // Collect the stored vector from each member.
3765 SmallVector<Value *, 4> StoredVecs;
3766 unsigned StoredIdx = 0;
3767 for (unsigned i = 0; i < InterleaveFactor; i++) {
3768 assert((Group->getMember(i) || MaskForGaps) &&
3769 "Fail to get a member from an interleaved store group");
3770 Instruction *Member = Group->getMember(i);
3771
3772 // Skip the gaps in the group.
3773 if (!Member) {
3774 Value *Undef = PoisonValue::get(SubVT);
3775 StoredVecs.push_back(Undef);
3776 continue;
3777 }
3778
3779 Value *StoredVec = State.get(StoredValues[StoredIdx]);
3780 ++StoredIdx;
3781
3782 if (Group->isReverse())
3783 StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
3784
3785 // If this member has different type, cast it to a unified type.
3786
3787 if (StoredVec->getType() != SubVT)
3788 StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3789
3790 StoredVecs.push_back(StoredVec);
3791 }
3792
3793 // Interleave all the smaller vectors into one wider vector.
3794 Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
3795 Instruction *NewStoreInstr;
3796 if (BlockInMask || MaskForGaps) {
3797 Value *GroupMask = CreateGroupMask(MaskForGaps);
3798 NewStoreInstr = State.Builder.CreateMaskedStore(
3799 IVec, ResAddr, Group->getAlign(), GroupMask);
3800 } else
3801 NewStoreInstr =
3802 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
3803
3804 applyMetadata(*NewStoreInstr);
3805 // TODO: Also manage existing metadata using VPIRMetadata.
3806 Group->addMetadata(NewStoreInstr);
3807}
3808
3809#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3811 VPSlotTracker &SlotTracker) const {
3812 O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
3813 IG->getInsertPos()->printAsOperand(O, false);
3814 O << ", ";
3816 VPValue *Mask = getMask();
3817 if (Mask) {
3818 O << ", ";
3819 Mask->printAsOperand(O, SlotTracker);
3820 }
3821
3822 unsigned OpIdx = 0;
3823 for (unsigned i = 0; i < IG->getFactor(); ++i) {
3824 if (!IG->getMember(i))
3825 continue;
3826 if (getNumStoreOperands() > 0) {
3827 O << "\n" << Indent << " store ";
3829 O << " to index " << i;
3830 } else {
3831 O << "\n" << Indent << " ";
3833 O << " = load from index " << i;
3834 }
3835 ++OpIdx;
3836 }
3837}
3838#endif
3839
3841 VPCostContext &Ctx) const {
3842 Instruction *InsertPos = getInsertPos();
3843 // Find the VPValue index of the interleave group. We need to skip gaps.
3844 unsigned InsertPosIdx = 0;
3845 for (unsigned Idx = 0; IG->getFactor(); ++Idx)
3846 if (auto *Member = IG->getMember(Idx)) {
3847 if (Member == InsertPos)
3848 break;
3849 InsertPosIdx++;
3850 }
3851 Type *ValTy = Ctx.Types.inferScalarType(
3852 getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
3853 : getStoredValues()[InsertPosIdx]);
3854 auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
3855 unsigned AS = getLoadStoreAddressSpace(InsertPos);
3856
3857 unsigned InterleaveFactor = IG->getFactor();
3858 auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
3859
3860 // Holds the indices of existing members in the interleaved group.
3862 for (unsigned IF = 0; IF < InterleaveFactor; IF++)
3863 if (IG->getMember(IF))
3864 Indices.push_back(IF);
3865
3866 // Calculate the cost of the whole interleaved group.
3868 InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
3869 IG->getAlign(), AS, Ctx.CostKind, getMask(), NeedsMaskForGaps);
3870
3871 if (!IG->isReverse())
3872 return Cost;
3873
3874 return Cost + IG->getNumMembers() *
3876 VectorTy, VectorTy, {}, Ctx.CostKind,
3877 0);
3878}
3879
3880#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3882 VPSlotTracker &SlotTracker) const {
3883 O << Indent << "EMIT ";
3885 O << " = CANONICAL-INDUCTION ";
3887}
3888#endif
3889
3891 return IsScalarAfterVectorization &&
3892 (!IsScalable || vputils::onlyFirstLaneUsed(this));
3893}
3894
3895#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3897 VPSlotTracker &SlotTracker) const {
3898 assert((getNumOperands() == 3 || getNumOperands() == 5) &&
3899 "unexpected number of operands");
3900 O << Indent << "EMIT ";
3902 O << " = WIDEN-POINTER-INDUCTION ";
3904 O << ", ";
3906 O << ", ";
3908 if (getNumOperands() == 5) {
3909 O << ", ";
3911 O << ", ";
3913 }
3914}
3915
3917 VPSlotTracker &SlotTracker) const {
3918 O << Indent << "EMIT ";
3920 O << " = EXPAND SCEV " << *Expr;
3921}
3922#endif
3923
3925 Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);
3926 Type *STy = CanonicalIV->getType();
3927 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
3928 ElementCount VF = State.VF;
3929 Value *VStart = VF.isScalar()
3930 ? CanonicalIV
3931 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
3932 Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
3933 if (VF.isVector()) {
3934 VStep = Builder.CreateVectorSplat(VF, VStep);
3935 VStep =
3936 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
3937 }
3938 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
3939 State.set(this, CanonicalVectorIV);
3940}
3941
3942#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3944 VPSlotTracker &SlotTracker) const {
3945 O << Indent << "EMIT ";
3947 O << " = WIDEN-CANONICAL-INDUCTION ";
3949}
3950#endif
3951
3953 auto &Builder = State.Builder;
3954 // Create a vector from the initial value.
3955 auto *VectorInit = getStartValue()->getLiveInIRValue();
3956
3957 Type *VecTy = State.VF.isScalar()
3958 ? VectorInit->getType()
3959 : VectorType::get(VectorInit->getType(), State.VF);
3960
3961 BasicBlock *VectorPH =
3962 State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
3963 if (State.VF.isVector()) {
3964 auto *IdxTy = Builder.getInt32Ty();
3965 auto *One = ConstantInt::get(IdxTy, 1);
3966 IRBuilder<>::InsertPointGuard Guard(Builder);
3967 Builder.SetInsertPoint(VectorPH->getTerminator());
3968 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
3969 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
3970 VectorInit = Builder.CreateInsertElement(
3971 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
3972 }
3973
3974 // Create a phi node for the new recurrence.
3975 PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
3976 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
3977 Phi->addIncoming(VectorInit, VectorPH);
3978 State.set(this, Phi);
3979}
3980
3983 VPCostContext &Ctx) const {
3984 if (VF.isScalar())
3985 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
3986
3987 return 0;
3988}
3989
3990#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3992 VPSlotTracker &SlotTracker) const {
3993 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
3995 O << " = phi ";
3997}
3998#endif
3999
4001 // Reductions do not have to start at zero. They can start with
4002 // any loop invariant values.
4003 VPValue *StartVPV = getStartValue();
4004
4005 // In order to support recurrences we need to be able to vectorize Phi nodes.
4006 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
4007 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
4008 // this value when we vectorize all of the instructions that use the PHI.
4009 BasicBlock *VectorPH =
4010 State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
4011 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
4012 Value *StartV = State.get(StartVPV, ScalarPHI);
4013 Type *VecTy = StartV->getType();
4014
4015 BasicBlock *HeaderBB = State.CFG.PrevBB;
4016 assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
4017 "recipe must be in the vector loop header");
4018 auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
4019 Phi->insertBefore(HeaderBB->getFirstInsertionPt());
4020 State.set(this, Phi, IsInLoop);
4021
4022 Phi->addIncoming(StartV, VectorPH);
4023}
4024
4025#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4027 VPSlotTracker &SlotTracker) const {
4028 O << Indent << "WIDEN-REDUCTION-PHI ";
4029
4031 O << " = phi ";
4033 if (VFScaleFactor != 1)
4034 O << " (VF scaled by 1/" << VFScaleFactor << ")";
4035}
4036#endif
4037
4039 Value *Op0 = State.get(getOperand(0));
4040 Type *VecTy = Op0->getType();
4041 Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
4042 State.set(this, VecPhi);
4043}
4044
4045#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4047 VPSlotTracker &SlotTracker) const {
4048 O << Indent << "WIDEN-PHI ";
4049
4051 O << " = phi ";
4053}
4054#endif
4055
4056// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
4057// remove VPActiveLaneMaskPHIRecipe.
4059 BasicBlock *VectorPH =
4060 State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
4061 Value *StartMask = State.get(getOperand(0));
4062 PHINode *Phi =
4063 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
4064 Phi->addIncoming(StartMask, VectorPH);
4065 State.set(this, Phi);
4066}
4067
4068#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4070 VPSlotTracker &SlotTracker) const {
4071 O << Indent << "ACTIVE-LANE-MASK-PHI ";
4072
4074 O << " = phi ";
4076}
4077#endif
4078
4079#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4081 VPSlotTracker &SlotTracker) const {
4082 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
4083
4085 O << " = phi ";
4087}
4088#endif
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:404
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
loop Loop Strength Reduction
This file provides a LoopVectorizationPlanner class.
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This file contains the declarations of different VPlan-related auxiliary helpers.
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static InstructionCost getCostForIntrinsics(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost for the intrinsic ID with Operands, produced by R.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride, unsigned CurrentPart, IRBuilderBase &Builder)
static void scalarizeInstruction(const Instruction *Instr, VPReplicateRecipe *RepRecipe, const VPLane &Lane, VPTransformState &State)
A helper function to scalarize a single Instruction in the innermost loop.
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static BranchInst * createCondBranch(Value *Cond, VPBasicBlock *VPBB, VPTransformState &State)
Create a conditional branch using Cond branching to the successors of VPBB.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
static const uint32_t IV[8]
Definition: blake3_impl.h:83
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:393
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:337
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:437
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
Definition: BasicBlock.cpp:252
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:233
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:248
Conditional or Unconditional Branch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1506
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:984
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:701
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:703
@ ICMP_EQ
equal
Definition: InstrTypes.h:699
static LLVM_ABI StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:131
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
This is an important base class in LLVM.
Definition: Constant.h:43
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:124
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:327
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:315
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:312
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:323
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:22
void setAllowContract(bool B=true)
Definition: FMF.h:90
bool noSignedZeros() const
Definition: FMF.h:67
bool noInfs() const
Definition: FMF.h:66
void setAllowReciprocal(bool B=true)
Definition: FMF.h:87
bool allowReciprocal() const
Definition: FMF.h:68
LLVM_ABI void print(raw_ostream &O) const
Print fast-math flags to O.
Definition: Operator.cpp:271
void setNoSignedZeros(bool B=true)
Definition: FMF.h:84
bool allowReassoc() const
Flag queries.
Definition: FMF.h:64
bool approxFunc() const
Definition: FMF.h:70
void setNoNaNs(bool B=true)
Definition: FMF.h:78
void setAllowReassoc(bool B=true)
Flag setters.
Definition: FMF.h:75
bool noNaNs() const
Definition: FMF.h:65
void setApproxFunc(bool B=true)
Definition: FMF.h:93
void setNoInfs(bool B=true)
Definition: FMF.h:81
bool allowContract() const
Definition: FMF.h:69
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:209
bool willReturn() const
Determine if the function will return.
Definition: Function.h:661
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:270
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:594
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:214
bool hasNoUnsignedSignedWrap() const
bool hasNoUnsignedWrap() const
bool isInBounds() const
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:114
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:497
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2571
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2625
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2155
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2559
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1864
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2100
LLVM_ABI Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
Definition: IRBuilder.cpp:1087
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1115
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2618
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:502
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:488
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1005
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2637
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:562
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:2036
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2238
Value * CreateVScale(Type *Ty, const Twine &Name="")
Create a call to llvm.vscale.<Ty>().
Definition: IRBuilder.h:958
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:201
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:345
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:567
LLVM_ABI Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition: IRBuilder.cpp:1071
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2333
Value * CreateFCmpFMF(CmpInst::Predicate P, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2457
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1923
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:527
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1781
LLVM_ABI CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:378
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:834
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:522
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2286
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2463
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1805
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2329
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:172
Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")
Create a call to llvm.experimental_cttz_elts.
Definition: IRBuilder.h:1134
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1420
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1197
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Definition: IRBuilder.cpp:922
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2082
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2593
LLVMContext & getContext() const
Definition: IRBuilder.h:203
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:508
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1403
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:507
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2508
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:605
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1708
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1725
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2341
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1883
LLVM_ABI Value * CreateVectorInterleave(ArrayRef< Value * > Ops, const Twine &Name="")
Definition: IRBuilder.cpp:1135
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2439
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition: IRBuilder.h:1573
LLVM_ABI Value * CreateStepVector(Type *DstType, const Twine &Name="")
Creates a vector of type DstType with the linear sequence <0, 1, ...>
Definition: IRBuilder.cpp:137
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2115
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1437
LLVM_ABI CallInst * CreateMaskedScatter(Value *Val, Value *Ptrs, Align Alignment, Value *Mask=nullptr)
Create a call to Masked Scatter intrinsic.
Definition: IRBuilder.cpp:569
LLVM_ABI CallInst * CreateMaskedGather(Type *Ty, Value *Ptrs, Align Alignment, Value *Mask=nullptr, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Gather intrinsic.
Definition: IRBuilder.cpp:538
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InstructionCost getInvalid(CostType Val=0)
bool isCast() const
Definition: Instruction.h:321
bool isBinaryOp() const
Definition: Instruction.h:317
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const char * getOpcodeName() const
Definition: Instruction.h:314
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:312
bool isUnaryOp() const
Definition: Instruction.h:316
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:319
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:524
uint32_t getFactor() const
Definition: VectorUtils.h:540
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:594
bool isReverse() const
Definition: VectorUtils.h:539
InstTy * getInsertPos() const
Definition: VectorUtils.h:610
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
Definition: VectorUtils.h:541
BlockT * getHeader() const
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
LLVM_ABI void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
Root of the metadata hierarchy.
Definition: Metadata.h:63
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
static bool isSignedRecurrenceKind(RecurKind Kind)
Returns true if recurrece kind is a signed redux kind.
unsigned getOpcode() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
const SDValue & getOperand(unsigned Num) const
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition: SetVector.h:59
Vector takeVector()
Clear the SetVector and return the underlying vector.
Definition: SetVector.h:93
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:757
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:476
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:938
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr) const
LLVM_ABI InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const
Estimate the overhead of scalarizing an instruction.
LLVM_ABI InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo Op1Info={OK_AnyValue, OP_None}, OperandValueInfo Op2Info={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
LLVM_ABI InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
LLVM_ABI InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
LLVM_ABI InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask={}, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
LLVM_ABI InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
LLVM_ABI InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
LLVM_ABI InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
LLVM_ABI InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
LLVM_ABI InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
LLVM_ABI InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
LLVM_ABI InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const
LLVM_ABI InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, PartialReductionExtendKind OpAExtend, PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const
LLVM_ABI InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
LLVM_ABI InstructionCost getInsertExtractValueCost(unsigned Opcode, TTI::TargetCostKind CostKind) const
@ TCC_Free
Expected to fold away in lowering.
LLVM_ABI InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
LLVM_ABI InstructionCost getOperandsScalarizationOverhead(ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing operands with the given types.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Reverse
Reverse the order of the vector.
LLVM_ABI InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
LLVM_ABI InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:267
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:261
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
value_op_iterator value_op_end()
Definition: User.h:313
void setOperand(unsigned i, Value *Val)
Definition: User.h:237
Value * getOperand(unsigned i) const
Definition: User.h:232
value_op_iterator value_op_begin()
Definition: User.h:310
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3639
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3692
iterator end()
Definition: VPlan.h:3676
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3705
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2403
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2408
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2398
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:81
VPRegionBlock * getParent()
Definition: VPlan.h:173
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:180
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:204
VPlan * getPlan()
Definition: VPlan.cpp:155
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition: VPlan.h:353
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:198
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:300
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:116
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition: VPlanValue.h:421
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition: VPlanValue.h:416
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition: VPlanValue.h:394
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition: VPlanValue.h:406
unsigned getVPDefID() const
Definition: VPlanValue.h:426
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3516
VPValue * getStartValue() const
Definition: VPlan.h:3515
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1988
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1694
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Class to record and manage LLVM IR flags.
Definition: VPlan.h:596
FastMathFlagsTy FMFs
Definition: VPlan.h:660
bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
WrapFlagsTy WrapFlags
Definition: VPlan.h:654
void printFlags(raw_ostream &O) const
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:658
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:812
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition: VPlan.h:655
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:794
ExactFlagsTy ExactFlags
Definition: VPlan.h:657
bool hasNoSignedWrap() const
Definition: VPlan.h:836
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:806
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition: VPlan.h:809
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:656
bool hasNoUnsignedWrap() const
Definition: VPlan.h:825
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:659
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition: VPlan.h:757
A recipe to wrap on original IR instruction not to be modified during execution, except for PHIs.
Definition: VPlan.h:1329
Instruction & getInstruction() const
Definition: VPlan.h:1360
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void extractLastLaneOfFirstOperand(VPBuilder &Builder)
Update the recipes first operand to the last lane of the operand using Builder.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition: VPlan.h:926
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetada object with MD, keeping only metadata nodes that are common to both.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition: VPlan.h:1040
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition: VPlan.h:996
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition: VPlan.h:1030
@ ExtractPenultimateElement
Definition: VPlan.h:1006
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition: VPlan.h:1043
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:973
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition: VPlan.h:1034
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition: VPlan.h:993
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition: VPlan.h:990
@ VScale
Returns the value for vscale.
Definition: VPlan.h:1045
@ CanonicalIVIncrementForPart
Definition: VPlan.h:983
@ ComputeReductionResult
Definition: VPlan.h:998
@ CalculateTripCountMinusVF
Definition: VPlan.h:981
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition: VPlan.h:1085
bool hasResult() const
Definition: VPlan.h:1124
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition: VPlan.h:1164
unsigned getOpcode() const
Definition: VPlan.h:1104
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2487
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2493
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2500
Instruction * getInsertPos() const
Definition: VPlan.h:2535
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2524
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlanHelpers.h:125
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlanHelpers.h:166
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlanHelpers.h:152
static VPLane getFirstLane()
Definition: VPlanHelpers.h:150
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition: VPlan.h:2671
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition: VPlan.h:1250
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition: VPlan.h:3783
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition: VPlan.h:1275
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition: VPlan.h:1242
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:391
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool isPhi() const
Returns true for PHI-like recipes.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
Definition: VPlan.h:412
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:479
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
bool isScalarCast() const
Return true if the recipe is a scalar cast.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2716
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2613
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2617
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2619
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition: VPlan.h:2609
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2611
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2615
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2731
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
bool isSingleScalar() const
Definition: VPlan.h:2776
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
unsigned getOpcode() const
Definition: VPlan.h:2805
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:3581
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:518
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:582
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
This class can be used to assign names to VPValues.
Definition: VPlanHelpers.h:382
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Definition: VPlanAnalysis.h:67
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition: VPlan.h:914
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:197
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1446
operand_range operands()
Definition: VPlanValue.h:265
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:241
unsigned getNumOperands() const
Definition: VPlanValue.h:235
operand_iterator op_begin()
Definition: VPlanValue.h:261
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:236
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlanValue.h:280
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:230
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition: VPlan.cpp:1400
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:125
friend class VPExpressionRecipe
Definition: VPlanValue.h:53
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1442
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:140
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:174
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:85
void replaceAllUsesWith(VPValue *New)
Definition: VPlan.cpp:1403
user_iterator user_begin()
Definition: VPlanValue.h:130
unsigned getNumUsers() const
Definition: VPlanValue.h:113
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:169
user_range users()
Definition: VPlanValue.h:134
VPDef * Def
Pointer to the VPDef that defines this VPValue.
Definition: VPlanValue.h:65
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
operand_range args()
Definition: VPlan.h:1651
Function * getCalledScalarFunction() const
Definition: VPlan.h:1647
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1520
void execute(VPTransformState &State) override
Produce widened copies of the cast.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2041
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2152
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2161
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition: VPlan.h:1585
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1588
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:3019
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:3016
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:3056
Instruction & Ingredient
Definition: VPlan.h:3010
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:3013
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:3070
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:3063
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:3060
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition: VPlan.h:1424
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getOpcode() const
Definition: VPlan.h:1457
unsigned getUF() const
Definition: VPlan.h:4159
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.cpp:1037
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:390
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1098
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:838
bool hasName() const
Definition: Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:322
Base class of all SIMD vector types.
Definition: DerivedTypes.h:430
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:695
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
Definition: DerivedTypes.h:463
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:255
const ParentTy * getParent() const
Definition: ilist_node.h:34
self_iterator getIterator()
Definition: ilist_node.h:134
iterator erase(iterator where)
Definition: ilist.h:204
pointer remove(iterator &IT)
Definition: ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:126
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor)
Returns the corresponding llvm.vector.deinterleaveN intrinsic for factor N.
LLVM_ABI StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition: Intrinsics.cpp:44
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
Definition: VPlanUtils.h:44
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlanUtils.cpp:22
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlanUtils.cpp:17
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
Definition: VPlanUtils.cpp:27
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:338
LLVM_ABI Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition: LoopUtils.cpp:1313
@ Offset
Definition: DWP.cpp:477
LLVM_ABI Value * createFindLastIVReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind, Value *Start, Value *Sentinel)
Create a reduction of the given vector Src for a reduction of the kind RecurKind::FindLastIV.
Definition: LoopUtils.cpp:1247
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition: LoopUtils.cpp:1023
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2491
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2250
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:1116
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1300
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
cl::opt< unsigned > ForceTargetInstructionCost
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:345
@ Other
Any other memory.
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
Definition: IVDescriptors.h:34
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Mul
Product of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition: LoopUtils.cpp:1305
DWARFExpression::Operation Op
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1916
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI Value * createOrderedReduction(IRBuilderBase &B, RecurKind RdxKind, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence kind RdxKind.
Definition: LoopUtils.cpp:1366
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
InstructionCost Cost
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI Value * createAnyOfReduction(IRBuilderBase &B, Value *Src, Value *InitVal, PHINode *OrigPhi)
Create a reduction of the given vector Src for a reduction of kind RecurKind::AnyOf.
Definition: LoopUtils.cpp:1217
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Struct to hold various analysis needed for cost computations.
Definition: VPlanHelpers.h:344
LLVMContext & LLVMCtx
Definition: VPlanHelpers.h:348
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1630
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
TargetTransformInfo::TargetCostKind CostKind
Definition: VPlanHelpers.h:351
VPTypeAnalysis Types
Definition: VPlanHelpers.h:347
const TargetLibraryInfo & TLI
Definition: VPlanHelpers.h:346
const TargetTransformInfo & TTI
Definition: VPlanHelpers.h:345
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlanHelpers.h:350
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
Definition: VPlan.h:1397
PHINode & getIRPhi()
Definition: VPlan.h:1405
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition: VPlan.h:865
std::optional< InstructionCost > getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlanHelpers.h:303
SmallDenseMap< const VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlanHelpers.h:311
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlanHelpers.h:205
void reset(const VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlanHelpers.h:261
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlanHelpers.h:337
struct llvm::VPTransformState::CFGState CFG
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:283
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlanHelpers.h:219
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlanHelpers.h:328
bool hasScalarValue(const VPValue *Def, VPLane Lane)
Definition: VPlanHelpers.h:240
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlanHelpers.h:211
void set(const VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlanHelpers.h:250
bool hasVectorValue(const VPValue *Def)
Definition: VPlanHelpers.h:236
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlanHelpers.h:214
Value * packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue, const VPLane &Lane)
Insert the scalar value of Def at Lane into Lane of WideValue and return the resulting value.
Definition: VPlan.cpp:393
AssumptionCache * AC
Hold a pointer to AssumptionCache to register new assumptions after replicating assume calls.
Definition: VPlanHelpers.h:325
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:371
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
Definition: VPlanHelpers.h:334
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3143
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition: VPlan.h:1740
VPValue * getCond() const
Definition: VPlan.h:1736
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3224
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3227
void execute(VPTransformState &State) override
Generate a wide store or scatter.
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3188
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.