LLVM 22.0.0git
VPlanRecipes.cpp
Go to the documentation of this file.
1//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains implementations for different VPlan recipes.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanHelpers.h"
18#include "VPlanPatternMatch.h"
19#include "VPlanUtils.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Twine.h"
26#include "llvm/IR/BasicBlock.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/Type.h"
32#include "llvm/IR/Value.h"
35#include "llvm/Support/Debug.h"
40#include <cassert>
41
42using namespace llvm;
43
45
46#define LV_NAME "loop-vectorize"
47#define DEBUG_TYPE LV_NAME
48
50 switch (getVPDefID()) {
51 case VPExpressionSC:
52 return cast<VPExpressionRecipe>(this)->mayReadOrWriteMemory();
53 case VPInstructionSC:
54 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
55 case VPInterleaveEVLSC:
56 case VPInterleaveSC:
57 return cast<VPInterleaveBase>(this)->getNumStoreOperands() > 0;
58 case VPWidenStoreEVLSC:
59 case VPWidenStoreSC:
60 return true;
61 case VPReplicateSC:
62 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
63 ->mayWriteToMemory();
64 case VPWidenCallSC:
65 return !cast<VPWidenCallRecipe>(this)
66 ->getCalledScalarFunction()
67 ->onlyReadsMemory();
68 case VPWidenIntrinsicSC:
69 return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
70 case VPCanonicalIVPHISC:
71 case VPBranchOnMaskSC:
72 case VPFirstOrderRecurrencePHISC:
73 case VPReductionPHISC:
74 case VPScalarIVStepsSC:
75 case VPPredInstPHISC:
76 return false;
77 case VPBlendSC:
78 case VPReductionEVLSC:
79 case VPReductionSC:
80 case VPVectorPointerSC:
81 case VPWidenCanonicalIVSC:
82 case VPWidenCastSC:
83 case VPWidenGEPSC:
84 case VPWidenIntOrFpInductionSC:
85 case VPWidenLoadEVLSC:
86 case VPWidenLoadSC:
87 case VPWidenPHISC:
88 case VPWidenSC:
89 case VPWidenSelectSC: {
90 const Instruction *I =
91 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
92 (void)I;
93 assert((!I || !I->mayWriteToMemory()) &&
94 "underlying instruction may write to memory");
95 return false;
96 }
97 default:
98 return true;
99 }
100}
101
103 switch (getVPDefID()) {
104 case VPExpressionSC:
105 return cast<VPExpressionRecipe>(this)->mayReadOrWriteMemory();
106 case VPInstructionSC:
107 return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
108 case VPWidenLoadEVLSC:
109 case VPWidenLoadSC:
110 return true;
111 case VPReplicateSC:
112 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
113 ->mayReadFromMemory();
114 case VPWidenCallSC:
115 return !cast<VPWidenCallRecipe>(this)
116 ->getCalledScalarFunction()
117 ->onlyWritesMemory();
118 case VPWidenIntrinsicSC:
119 return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
120 case VPBranchOnMaskSC:
121 case VPFirstOrderRecurrencePHISC:
122 case VPPredInstPHISC:
123 case VPScalarIVStepsSC:
124 case VPWidenStoreEVLSC:
125 case VPWidenStoreSC:
126 return false;
127 case VPBlendSC:
128 case VPReductionEVLSC:
129 case VPReductionSC:
130 case VPVectorPointerSC:
131 case VPWidenCanonicalIVSC:
132 case VPWidenCastSC:
133 case VPWidenGEPSC:
134 case VPWidenIntOrFpInductionSC:
135 case VPWidenPHISC:
136 case VPWidenSC:
137 case VPWidenSelectSC: {
138 const Instruction *I =
139 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
140 (void)I;
141 assert((!I || !I->mayReadFromMemory()) &&
142 "underlying instruction may read from memory");
143 return false;
144 }
145 default:
146 // FIXME: Return false if the recipe represents an interleaved store.
147 return true;
148 }
149}
150
152 switch (getVPDefID()) {
153 case VPExpressionSC:
154 return cast<VPExpressionRecipe>(this)->mayHaveSideEffects();
155 case VPDerivedIVSC:
156 case VPFirstOrderRecurrencePHISC:
157 case VPPredInstPHISC:
158 case VPVectorEndPointerSC:
159 return false;
160 case VPInstructionSC:
161 return mayWriteToMemory();
162 case VPWidenCallSC: {
163 Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
164 return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
165 }
166 case VPWidenIntrinsicSC:
167 return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
168 case VPBlendSC:
169 case VPReductionEVLSC:
170 case VPReductionSC:
171 case VPScalarIVStepsSC:
172 case VPVectorPointerSC:
173 case VPWidenCanonicalIVSC:
174 case VPWidenCastSC:
175 case VPWidenGEPSC:
176 case VPWidenIntOrFpInductionSC:
177 case VPWidenPHISC:
178 case VPWidenPointerInductionSC:
179 case VPWidenSC:
180 case VPWidenSelectSC: {
181 const Instruction *I =
182 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
183 (void)I;
184 assert((!I || !I->mayHaveSideEffects()) &&
185 "underlying instruction has side-effects");
186 return false;
187 }
188 case VPInterleaveEVLSC:
189 case VPInterleaveSC:
190 return mayWriteToMemory();
191 case VPWidenLoadEVLSC:
192 case VPWidenLoadSC:
193 case VPWidenStoreEVLSC:
194 case VPWidenStoreSC:
195 assert(
196 cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
198 "mayHaveSideffects result for ingredient differs from this "
199 "implementation");
200 return mayWriteToMemory();
201 case VPReplicateSC: {
202 auto *R = cast<VPReplicateRecipe>(this);
203 return R->getUnderlyingInstr()->mayHaveSideEffects();
204 }
205 default:
206 return true;
207 }
208}
209
211 assert(!Parent && "Recipe already in some VPBasicBlock");
212 assert(InsertPos->getParent() &&
213 "Insertion position not in any VPBasicBlock");
214 InsertPos->getParent()->insert(this, InsertPos->getIterator());
215}
216
217void VPRecipeBase::insertBefore(VPBasicBlock &BB,
219 assert(!Parent && "Recipe already in some VPBasicBlock");
220 assert(I == BB.end() || I->getParent() == &BB);
221 BB.insert(this, I);
222}
223
225 assert(!Parent && "Recipe already in some VPBasicBlock");
226 assert(InsertPos->getParent() &&
227 "Insertion position not in any VPBasicBlock");
228 InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
229}
230
232 assert(getParent() && "Recipe not in any VPBasicBlock");
234 Parent = nullptr;
235}
236
238 assert(getParent() && "Recipe not in any VPBasicBlock");
240}
241
244 insertAfter(InsertPos);
245}
246
252
254 // Get the underlying instruction for the recipe, if there is one. It is used
255 // to
256 // * decide if cost computation should be skipped for this recipe,
257 // * apply forced target instruction cost.
258 Instruction *UI = nullptr;
259 if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
260 UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
261 else if (auto *IG = dyn_cast<VPInterleaveBase>(this))
262 UI = IG->getInsertPos();
263 else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
264 UI = &WidenMem->getIngredient();
265
266 InstructionCost RecipeCost;
267 if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
268 RecipeCost = 0;
269 } else {
270 RecipeCost = computeCost(VF, Ctx);
271 if (UI && ForceTargetInstructionCost.getNumOccurrences() > 0 &&
272 RecipeCost.isValid())
274 }
275
276 LLVM_DEBUG({
277 dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
278 dump();
279 });
280 return RecipeCost;
281}
282
284 VPCostContext &Ctx) const {
285 llvm_unreachable("subclasses should implement computeCost");
286}
287
289 return (getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC) ||
291}
292
294 auto *VPI = dyn_cast<VPInstruction>(this);
295 return VPI && Instruction::isCast(VPI->getOpcode());
296}
297
300 VPCostContext &Ctx) const {
301 std::optional<unsigned> Opcode;
302 VPValue *Op = getOperand(0);
303 VPRecipeBase *OpR = Op->getDefiningRecipe();
304
305 // If the partial reduction is predicated, a select will be operand 0
306 using namespace llvm::VPlanPatternMatch;
308 OpR = Op->getDefiningRecipe();
309 }
310
311 Type *InputTypeA = nullptr, *InputTypeB = nullptr;
313 ExtBType = TTI::PR_None;
314
315 auto GetExtendKind = [](VPRecipeBase *R) {
316 if (!R)
317 return TTI::PR_None;
318 auto *WidenCastR = dyn_cast<VPWidenCastRecipe>(R);
319 if (!WidenCastR)
320 return TTI::PR_None;
321 if (WidenCastR->getOpcode() == Instruction::CastOps::ZExt)
322 return TTI::PR_ZeroExtend;
323 if (WidenCastR->getOpcode() == Instruction::CastOps::SExt)
324 return TTI::PR_SignExtend;
325 return TTI::PR_None;
326 };
327
328 // Pick out opcode, type/ext information and use sub side effects from a widen
329 // recipe.
330 auto HandleWiden = [&](VPWidenRecipe *Widen) {
332 Widen = dyn_cast<VPWidenRecipe>(Op->getDefiningRecipe());
333 }
334 Opcode = Widen->getOpcode();
335 VPRecipeBase *ExtAR = Widen->getOperand(0)->getDefiningRecipe();
336 VPRecipeBase *ExtBR = Widen->getOperand(1)->getDefiningRecipe();
337 InputTypeA = Ctx.Types.inferScalarType(ExtAR ? ExtAR->getOperand(0)
338 : Widen->getOperand(0));
339 InputTypeB = Ctx.Types.inferScalarType(ExtBR ? ExtBR->getOperand(0)
340 : Widen->getOperand(1));
341 ExtAType = GetExtendKind(ExtAR);
342 ExtBType = GetExtendKind(ExtBR);
343 };
344
345 if (isa<VPWidenCastRecipe>(OpR)) {
346 InputTypeA = Ctx.Types.inferScalarType(OpR->getOperand(0));
347 ExtAType = GetExtendKind(OpR);
348 } else if (isa<VPReductionPHIRecipe>(OpR)) {
349 auto RedPhiOp1R = getOperand(1)->getDefiningRecipe();
350 if (isa<VPWidenCastRecipe>(RedPhiOp1R)) {
351 InputTypeA = Ctx.Types.inferScalarType(RedPhiOp1R->getOperand(0));
352 ExtAType = GetExtendKind(RedPhiOp1R);
353 } else if (auto Widen = dyn_cast<VPWidenRecipe>(RedPhiOp1R))
354 HandleWiden(Widen);
355 } else if (auto Widen = dyn_cast<VPWidenRecipe>(OpR)) {
356 HandleWiden(Widen);
357 } else if (auto Reduction = dyn_cast<VPPartialReductionRecipe>(OpR)) {
358 return Reduction->computeCost(VF, Ctx);
359 }
360 auto *PhiType = Ctx.Types.inferScalarType(getOperand(1));
361 return Ctx.TTI.getPartialReductionCost(getOpcode(), InputTypeA, InputTypeB,
362 PhiType, VF, ExtAType, ExtBType,
363 Opcode, Ctx.CostKind);
364}
365
367 auto &Builder = State.Builder;
368
369 assert(getOpcode() == Instruction::Add &&
370 "Unhandled partial reduction opcode");
371
372 Value *BinOpVal = State.get(getOperand(1));
373 Value *PhiVal = State.get(getOperand(0));
374 assert(PhiVal && BinOpVal && "Phi and Mul must be set");
375
376 Type *RetTy = PhiVal->getType();
377
378 CallInst *V = Builder.CreateIntrinsic(
379 RetTy, Intrinsic::experimental_vector_partial_reduce_add,
380 {PhiVal, BinOpVal}, nullptr, "partial.reduce");
381
382 State.set(this, V);
383}
384
385#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
387 VPSlotTracker &SlotTracker) const {
388 O << Indent << "PARTIAL-REDUCE ";
390 O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
392}
393#endif
394
396 assert(OpType == Other.OpType && "OpType must match");
397 switch (OpType) {
398 case OperationType::OverflowingBinOp:
399 WrapFlags.HasNUW &= Other.WrapFlags.HasNUW;
400 WrapFlags.HasNSW &= Other.WrapFlags.HasNSW;
401 break;
402 case OperationType::Trunc:
403 TruncFlags.HasNUW &= Other.TruncFlags.HasNUW;
404 TruncFlags.HasNSW &= Other.TruncFlags.HasNSW;
405 break;
406 case OperationType::DisjointOp:
407 DisjointFlags.IsDisjoint &= Other.DisjointFlags.IsDisjoint;
408 break;
409 case OperationType::PossiblyExactOp:
410 ExactFlags.IsExact &= Other.ExactFlags.IsExact;
411 break;
412 case OperationType::GEPOp:
413 GEPFlags &= Other.GEPFlags;
414 break;
415 case OperationType::FPMathOp:
416 FMFs.NoNaNs &= Other.FMFs.NoNaNs;
417 FMFs.NoInfs &= Other.FMFs.NoInfs;
418 break;
419 case OperationType::NonNegOp:
420 NonNegFlags.NonNeg &= Other.NonNegFlags.NonNeg;
421 break;
422 case OperationType::Cmp:
423 assert(CmpPredicate == Other.CmpPredicate && "Cannot drop CmpPredicate");
424 break;
425 case OperationType::Other:
426 assert(AllFlags == Other.AllFlags && "Cannot drop other flags");
427 break;
428 }
429}
430
432 assert(OpType == OperationType::FPMathOp &&
433 "recipe doesn't have fast math flags");
434 FastMathFlags Res;
435 Res.setAllowReassoc(FMFs.AllowReassoc);
436 Res.setNoNaNs(FMFs.NoNaNs);
437 Res.setNoInfs(FMFs.NoInfs);
438 Res.setNoSignedZeros(FMFs.NoSignedZeros);
439 Res.setAllowReciprocal(FMFs.AllowReciprocal);
440 Res.setAllowContract(FMFs.AllowContract);
441 Res.setApproxFunc(FMFs.ApproxFunc);
442 return Res;
443}
444
445#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
447#endif
448
449template <unsigned PartOpIdx>
450VPValue *
452 if (U.getNumOperands() == PartOpIdx + 1)
453 return U.getOperand(PartOpIdx);
454 return nullptr;
455}
456
457template <unsigned PartOpIdx>
459 if (auto *UnrollPartOp = getUnrollPartOperand(U))
460 return cast<ConstantInt>(UnrollPartOp->getLiveInIRValue())->getZExtValue();
461 return 0;
462}
463
464namespace llvm {
465template class VPUnrollPartAccessor<1>;
466template class VPUnrollPartAccessor<2>;
467template class VPUnrollPartAccessor<3>;
468}
469
471 const VPIRFlags &Flags, DebugLoc DL,
472 const Twine &Name)
473 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, Flags, DL),
474 VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {
476 "Set flags not supported for the provided opcode");
477 assert((getNumOperandsForOpcode(Opcode) == -1u ||
478 getNumOperandsForOpcode(Opcode) == getNumOperands()) &&
479 "number of operands does not match opcode");
480}
481
482#ifndef NDEBUG
483unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
484 if (Instruction::isUnaryOp(Opcode) || Instruction::isCast(Opcode))
485 return 1;
486
487 if (Instruction::isBinaryOp(Opcode))
488 return 2;
489
490 switch (Opcode) {
493 return 0;
494 case Instruction::Alloca:
495 case Instruction::ExtractValue:
496 case Instruction::Freeze:
497 case Instruction::Load:
509 return 1;
510 case Instruction::ICmp:
511 case Instruction::FCmp:
512 case Instruction::Store:
520 return 2;
521 case Instruction::Select:
525 return 3;
527 return 4;
528 case Instruction::Call:
529 case Instruction::GetElementPtr:
530 case Instruction::PHI:
531 case Instruction::Switch:
532 // Cannot determine the number of operands from the opcode.
533 return -1u;
534 }
535 llvm_unreachable("all cases should be handled above");
536}
537#endif
538
539bool VPInstruction::doesGeneratePerAllLanes() const {
540 return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
541}
542
543bool VPInstruction::canGenerateScalarForFirstLane() const {
545 return true;
547 return true;
548 switch (Opcode) {
549 case Instruction::Freeze:
550 case Instruction::ICmp:
551 case Instruction::PHI:
552 case Instruction::Select:
561 return true;
562 default:
563 return false;
564 }
565}
566
567Value *VPInstruction::generatePerLane(VPTransformState &State,
568 const VPLane &Lane) {
569 IRBuilderBase &Builder = State.Builder;
570
572 "only PtrAdd opcodes are supported for now");
573 return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
574 State.get(getOperand(1), Lane), Name);
575}
576
577/// Create a conditional branch using \p Cond branching to the successors of \p
578/// VPBB. Note that the first successor is always forward (i.e. not created yet)
579/// while the second successor may already have been created (if it is a header
580/// block and VPBB is a latch).
582 VPTransformState &State) {
583 // Replace the temporary unreachable terminator with a new conditional
584 // branch, hooking it up to backward destination (header) for latch blocks
585 // now, and to forward destination(s) later when they are created.
586 // Second successor may be backwards - iff it is already in VPBB2IRBB.
587 VPBasicBlock *SecondVPSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
588 BasicBlock *SecondIRSucc = State.CFG.VPBB2IRBB.lookup(SecondVPSucc);
589 BasicBlock *IRBB = State.CFG.VPBB2IRBB[VPBB];
590 BranchInst *CondBr = State.Builder.CreateCondBr(Cond, IRBB, SecondIRSucc);
591 // First successor is always forward, reset it to nullptr
592 CondBr->setSuccessor(0, nullptr);
594 return CondBr;
595}
596
597Value *VPInstruction::generate(VPTransformState &State) {
598 IRBuilderBase &Builder = State.Builder;
599
601 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
602 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
603 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
604 auto *Res =
605 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
606 if (auto *I = dyn_cast<Instruction>(Res))
607 applyFlags(*I);
608 return Res;
609 }
610
611 switch (getOpcode()) {
612 case VPInstruction::Not: {
613 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
614 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
615 return Builder.CreateNot(A, Name);
616 }
617 case Instruction::ExtractElement: {
618 assert(State.VF.isVector() && "Only extract elements from vectors");
619 if (getOperand(1)->isLiveIn()) {
620 unsigned IdxToExtract =
621 cast<ConstantInt>(getOperand(1)->getLiveInIRValue())->getZExtValue();
622 return State.get(getOperand(0), VPLane(IdxToExtract));
623 }
624 Value *Vec = State.get(getOperand(0));
625 Value *Idx = State.get(getOperand(1), /*IsScalar=*/true);
626 return Builder.CreateExtractElement(Vec, Idx, Name);
627 }
628 case Instruction::Freeze: {
630 return Builder.CreateFreeze(Op, Name);
631 }
632 case Instruction::FCmp:
633 case Instruction::ICmp: {
634 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
635 Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
636 Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
637 return Builder.CreateCmp(getPredicate(), A, B, Name);
638 }
639 case Instruction::PHI: {
640 llvm_unreachable("should be handled by VPPhi::execute");
641 }
642 case Instruction::Select: {
643 bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
644 Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
645 Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
646 Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
647 return Builder.CreateSelect(Cond, Op1, Op2, Name);
648 }
650 // Get first lane of vector induction variable.
651 Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
652 // Get the original loop tripcount.
653 Value *ScalarTC = State.get(getOperand(1), VPLane(0));
654
655 // If this part of the active lane mask is scalar, generate the CMP directly
656 // to avoid unnecessary extracts.
657 if (State.VF.isScalar())
658 return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
659 Name);
660
661 auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
662 auto PredTy = VectorType::get(
663 Int1Ty, State.VF * cast<ConstantInt>(getOperand(2)->getLiveInIRValue())
664 ->getZExtValue());
665 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
666 {PredTy, ScalarTC->getType()},
667 {VIVElem0, ScalarTC}, nullptr, Name);
668 }
670 // Generate code to combine the previous and current values in vector v3.
671 //
672 // vector.ph:
673 // v_init = vector(..., ..., ..., a[-1])
674 // br vector.body
675 //
676 // vector.body
677 // i = phi [0, vector.ph], [i+4, vector.body]
678 // v1 = phi [v_init, vector.ph], [v2, vector.body]
679 // v2 = a[i, i+1, i+2, i+3];
680 // v3 = vector(v1(3), v2(0, 1, 2))
681
682 auto *V1 = State.get(getOperand(0));
683 if (!V1->getType()->isVectorTy())
684 return V1;
685 Value *V2 = State.get(getOperand(1));
686 return Builder.CreateVectorSplice(V1, V2, -1, Name);
687 }
689 unsigned UF = getParent()->getPlan()->getUF();
690 Value *ScalarTC = State.get(getOperand(0), VPLane(0));
691 Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
692 Value *Sub = Builder.CreateSub(ScalarTC, Step);
693 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
694 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
695 return Builder.CreateSelect(Cmp, Sub, Zero);
696 }
698 // TODO: Restructure this code with an explicit remainder loop, vsetvli can
699 // be outside of the main loop.
700 Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
701 // Compute EVL
702 assert(AVL->getType()->isIntegerTy() &&
703 "Requested vector length should be an integer.");
704
705 assert(State.VF.isScalable() && "Expected scalable vector factor.");
706 Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
707
708 Value *EVL = State.Builder.CreateIntrinsic(
709 State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
710 {AVL, VFArg, State.Builder.getTrue()});
711 return EVL;
712 }
714 unsigned Part = getUnrollPart(*this);
715 auto *IV = State.get(getOperand(0), VPLane(0));
716 assert(Part != 0 && "Must have a positive part");
717 // The canonical IV is incremented by the vectorization factor (num of
718 // SIMD elements) times the unroll part.
719 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
720 return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
722 }
724 Value *Cond = State.get(getOperand(0), VPLane(0));
725 auto *Br = createCondBranch(Cond, getParent(), State);
726 applyMetadata(*Br);
727 return Br;
728 }
730 // First create the compare.
731 Value *IV = State.get(getOperand(0), /*IsScalar*/ true);
732 Value *TC = State.get(getOperand(1), /*IsScalar*/ true);
733 Value *Cond = Builder.CreateICmpEQ(IV, TC);
734 return createCondBranch(Cond, getParent(), State);
735 }
737 return Builder.CreateVectorSplat(
738 State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
739 }
741 // For struct types, we need to build a new 'wide' struct type, where each
742 // element is widened, i.e., we create a struct of vectors.
743 auto *StructTy =
745 Value *Res = PoisonValue::get(toVectorizedTy(StructTy, State.VF));
746 for (const auto &[LaneIndex, Op] : enumerate(operands())) {
747 for (unsigned FieldIndex = 0; FieldIndex != StructTy->getNumElements();
748 FieldIndex++) {
749 Value *ScalarValue =
750 Builder.CreateExtractValue(State.get(Op, true), FieldIndex);
751 Value *VectorValue = Builder.CreateExtractValue(Res, FieldIndex);
752 VectorValue =
753 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneIndex);
754 Res = Builder.CreateInsertValue(Res, VectorValue, FieldIndex);
755 }
756 }
757 return Res;
758 }
760 auto *ScalarTy = State.TypeAnalysis.inferScalarType(getOperand(0));
761 auto NumOfElements = ElementCount::getFixed(getNumOperands());
762 Value *Res = PoisonValue::get(toVectorizedTy(ScalarTy, NumOfElements));
763 for (const auto &[Idx, Op] : enumerate(operands()))
764 Res = State.Builder.CreateInsertElement(Res, State.get(Op, true),
765 State.Builder.getInt32(Idx));
766 return Res;
767 }
769 if (State.VF.isScalar())
770 return State.get(getOperand(0), true);
771 IRBuilderBase::FastMathFlagGuard FMFG(Builder);
773 // If this start vector is scaled then it should produce a vector with fewer
774 // elements than the VF.
775 ElementCount VF = State.VF.divideCoefficientBy(
776 cast<ConstantInt>(getOperand(2)->getLiveInIRValue())->getZExtValue());
777 auto *Iden = Builder.CreateVectorSplat(VF, State.get(getOperand(1), true));
778 Constant *Zero = Builder.getInt32(0);
779 return Builder.CreateInsertElement(Iden, State.get(getOperand(0), true),
780 Zero);
781 }
783 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
784 // and will be removed by breaking up the recipe further.
785 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
786 auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
787 Value *ReducedPartRdx = State.get(getOperand(2));
788 for (unsigned Idx = 3; Idx < getNumOperands(); ++Idx)
789 ReducedPartRdx = Builder.CreateBinOp(
792 State.get(getOperand(Idx)), ReducedPartRdx, "bin.rdx");
793 return createAnyOfReduction(Builder, ReducedPartRdx,
794 State.get(getOperand(1), VPLane(0)), OrigPhi);
795 }
797 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
798 // and will be removed by breaking up the recipe further.
799 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
800 // Get its reduction variable descriptor.
801 RecurKind RK = PhiR->getRecurrenceKind();
803 "Unexpected reduction kind");
804 assert(!PhiR->isInLoop() &&
805 "In-loop FindLastIV reduction is not supported yet");
806
807 // The recipe's operands are the reduction phi, the start value, the
808 // sentinel value, followed by one operand for each part of the reduction.
809 unsigned UF = getNumOperands() - 3;
810 Value *ReducedPartRdx = State.get(getOperand(3));
811 RecurKind MinMaxKind;
814 MinMaxKind = IsSigned ? RecurKind::SMax : RecurKind::UMax;
815 else
816 MinMaxKind = IsSigned ? RecurKind::SMin : RecurKind::UMin;
817 for (unsigned Part = 1; Part < UF; ++Part)
818 ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
819 State.get(getOperand(3 + Part)));
820
821 Value *Start = State.get(getOperand(1), true);
823 return createFindLastIVReduction(Builder, ReducedPartRdx, RK, Start,
824 Sentinel);
825 }
827 // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
828 // and will be removed by breaking up the recipe further.
829 auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
830 // Get its reduction variable descriptor.
831
832 RecurKind RK = PhiR->getRecurrenceKind();
834 "should be handled by ComputeFindIVResult");
835
836 // The recipe's operands are the reduction phi, followed by one operand for
837 // each part of the reduction.
838 unsigned UF = getNumOperands() - 1;
839 VectorParts RdxParts(UF);
840 for (unsigned Part = 0; Part < UF; ++Part)
841 RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
842
843 IRBuilderBase::FastMathFlagGuard FMFG(Builder);
844 if (hasFastMathFlags())
846
847 // Reduce all of the unrolled parts into a single vector.
848 Value *ReducedPartRdx = RdxParts[0];
849 if (PhiR->isOrdered()) {
850 ReducedPartRdx = RdxParts[UF - 1];
851 } else {
852 // Floating-point operations should have some FMF to enable the reduction.
853 for (unsigned Part = 1; Part < UF; ++Part) {
854 Value *RdxPart = RdxParts[Part];
856 ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
857 else {
859 // For sub-recurrences, each UF's reduction variable is already
860 // negative, we need to do: reduce.add(-acc_uf0 + -acc_uf1)
861 if (RK == RecurKind::Sub)
862 Opcode = Instruction::Add;
863 else
864 Opcode =
866 ReducedPartRdx =
867 Builder.CreateBinOp(Opcode, RdxPart, ReducedPartRdx, "bin.rdx");
868 }
869 }
870 }
871
872 // Create the reduction after the loop. Note that inloop reductions create
873 // the target reduction in the loop using a Reduction recipe.
874 if (State.VF.isVector() && !PhiR->isInLoop()) {
875 // TODO: Support in-order reductions based on the recurrence descriptor.
876 // All ops in the reduction inherit fast-math-flags from the recurrence
877 // descriptor.
878 ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);
879 }
880
881 return ReducedPartRdx;
882 }
885 unsigned Offset = getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
886 Value *Res;
887 if (State.VF.isVector()) {
888 assert(Offset <= State.VF.getKnownMinValue() &&
889 "invalid offset to extract from");
890 // Extract lane VF - Offset from the operand.
891 Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
892 } else {
893 assert(Offset <= 1 && "invalid offset to extract from");
894 Res = State.get(getOperand(0));
895 }
897 Res->setName(Name);
898 return Res;
899 }
901 Value *A = State.get(getOperand(0));
902 Value *B = State.get(getOperand(1));
903 return Builder.CreateLogicalAnd(A, B, Name);
904 }
907 "can only generate first lane for PtrAdd");
908 Value *Ptr = State.get(getOperand(0), VPLane(0));
909 Value *Addend = State.get(getOperand(1), VPLane(0));
910 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
911 }
913 Value *Ptr =
915 Value *Addend = State.get(getOperand(1));
916 return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
917 }
919 Value *Res = Builder.CreateFreeze(State.get(getOperand(0)));
920 for (VPValue *Op : drop_begin(operands()))
921 Res = Builder.CreateOr(Res, Builder.CreateFreeze(State.get(Op)));
922 return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);
923 }
925 Value *LaneToExtract = State.get(getOperand(0), true);
926 Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0));
927 Value *Res = nullptr;
928 Value *RuntimeVF = getRuntimeVF(State.Builder, IdxTy, State.VF);
929
930 for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) {
931 Value *VectorStart =
932 Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));
933 Value *VectorIdx = Idx == 1
934 ? LaneToExtract
935 : Builder.CreateSub(LaneToExtract, VectorStart);
936 Value *Ext = State.VF.isScalar()
937 ? State.get(getOperand(Idx))
938 : Builder.CreateExtractElement(
939 State.get(getOperand(Idx)), VectorIdx);
940 if (Res) {
941 Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);
942 Res = Builder.CreateSelect(Cmp, Ext, Res);
943 } else {
944 Res = Ext;
945 }
946 }
947 return Res;
948 }
950 if (getNumOperands() == 1) {
951 Value *Mask = State.get(getOperand(0));
952 return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask,
953 true, Name);
954 }
955 // If there are multiple operands, create a chain of selects to pick the
956 // first operand with an active lane and add the number of lanes of the
957 // preceding operands.
958 Value *RuntimeVF =
959 getRuntimeVF(State.Builder, State.Builder.getInt64Ty(), State.VF);
960 unsigned LastOpIdx = getNumOperands() - 1;
961 Value *Res = nullptr;
962 for (int Idx = LastOpIdx; Idx >= 0; --Idx) {
963 Value *TrailingZeros =
964 State.VF.isScalar()
965 ? Builder.CreateZExt(
966 Builder.CreateICmpEQ(State.get(getOperand(Idx)),
967 Builder.getFalse()),
968 Builder.getInt64Ty())
969 : Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(),
970 State.get(getOperand(Idx)),
971 true, Name);
972 Value *Current = Builder.CreateAdd(
973 Builder.CreateMul(RuntimeVF, Builder.getInt64(Idx)), TrailingZeros);
974 if (Res) {
975 Value *Cmp = Builder.CreateICmpNE(TrailingZeros, RuntimeVF);
976 Res = Builder.CreateSelect(Cmp, Current, Res);
977 } else {
978 Res = Current;
979 }
980 }
981
982 return Res;
983 }
985 return State.get(getOperand(0), true);
986 default:
987 llvm_unreachable("Unsupported opcode for instruction");
988 }
989}
990
992 unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const {
993 Type *ScalarTy = Ctx.Types.inferScalarType(this);
994 Type *ResultTy = VF.isVector() ? toVectorTy(ScalarTy, VF) : ScalarTy;
995 switch (Opcode) {
996 case Instruction::FNeg:
997 return Ctx.TTI.getArithmeticInstrCost(Opcode, ResultTy, Ctx.CostKind);
998 case Instruction::UDiv:
999 case Instruction::SDiv:
1000 case Instruction::SRem:
1001 case Instruction::URem:
1002 case Instruction::Add:
1003 case Instruction::FAdd:
1004 case Instruction::Sub:
1005 case Instruction::FSub:
1006 case Instruction::Mul:
1007 case Instruction::FMul:
1008 case Instruction::FDiv:
1009 case Instruction::FRem:
1010 case Instruction::Shl:
1011 case Instruction::LShr:
1012 case Instruction::AShr:
1013 case Instruction::And:
1014 case Instruction::Or:
1015 case Instruction::Xor: {
1018
1019 if (VF.isVector()) {
1020 // Certain instructions can be cheaper to vectorize if they have a
1021 // constant second vector operand. One example of this are shifts on x86.
1022 VPValue *RHS = getOperand(1);
1023 RHSInfo = Ctx.getOperandInfo(RHS);
1024
1025 if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&
1028 }
1029
1032 if (CtxI)
1033 Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
1034 return Ctx.TTI.getArithmeticInstrCost(
1035 Opcode, ResultTy, Ctx.CostKind,
1036 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
1037 RHSInfo, Operands, CtxI, &Ctx.TLI);
1038 }
1039 case Instruction::Freeze:
1040 // This opcode is unknown. Assume that it is the same as 'mul'.
1041 return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, ResultTy,
1042 Ctx.CostKind);
1043 case Instruction::ExtractValue:
1044 return Ctx.TTI.getInsertExtractValueCost(Instruction::ExtractValue,
1045 Ctx.CostKind);
1046 case Instruction::ICmp:
1047 case Instruction::FCmp: {
1048 Type *ScalarOpTy = Ctx.Types.inferScalarType(getOperand(0));
1049 Type *OpTy = VF.isVector() ? toVectorTy(ScalarOpTy, VF) : ScalarOpTy;
1051 return Ctx.TTI.getCmpSelInstrCost(
1052 Opcode, OpTy, CmpInst::makeCmpResultType(OpTy), getPredicate(),
1053 Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None},
1054 {TTI::OK_AnyValue, TTI::OP_None}, CtxI);
1055 }
1056 }
1057 return std::nullopt;
1058}
1059
1061 VPCostContext &Ctx) const {
1063 if (!getUnderlyingValue() && getOpcode() != Instruction::FMul) {
1064 // TODO: Compute cost for VPInstructions without underlying values once
1065 // the legacy cost model has been retired.
1066 return 0;
1067 }
1068
1069 assert(!doesGeneratePerAllLanes() &&
1070 "Should only generate a vector value or single scalar, not scalars "
1071 "for all lanes.");
1073 getOpcode(),
1075 }
1076
1077 switch (getOpcode()) {
1078 case Instruction::Select: {
1079 // TODO: It may be possible to improve this by analyzing where the
1080 // condition operand comes from.
1082 auto *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1083 auto *VecTy = Ctx.Types.inferScalarType(getOperand(1));
1084 if (!vputils::onlyFirstLaneUsed(this)) {
1085 CondTy = toVectorTy(CondTy, VF);
1086 VecTy = toVectorTy(VecTy, VF);
1087 }
1088 return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VecTy, CondTy, Pred,
1089 Ctx.CostKind);
1090 }
1091 case Instruction::ExtractElement:
1093 if (VF.isScalar()) {
1094 // ExtractLane with VF=1 takes care of handling extracting across multiple
1095 // parts.
1096 return 0;
1097 }
1098
1099 // Add on the cost of extracting the element.
1100 auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1101 return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,
1102 Ctx.CostKind);
1103 }
1104 case VPInstruction::AnyOf: {
1105 auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1106 return Ctx.TTI.getArithmeticReductionCost(
1107 Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind);
1108 }
1110 Type *ScalarTy = Ctx.Types.inferScalarType(getOperand(0));
1111 if (VF.isScalar())
1112 return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
1114 CmpInst::ICMP_EQ, Ctx.CostKind);
1115 // Calculate the cost of determining the lane index.
1116 auto *PredTy = toVectorTy(ScalarTy, VF);
1117 IntrinsicCostAttributes Attrs(Intrinsic::experimental_cttz_elts,
1118 Type::getInt64Ty(Ctx.LLVMCtx),
1119 {PredTy, Type::getInt1Ty(Ctx.LLVMCtx)});
1120 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1121 }
1123 assert(VF.isVector() && "Scalar FirstOrderRecurrenceSplice?");
1125 std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
1126 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1127
1128 return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
1129 cast<VectorType>(VectorTy),
1130 cast<VectorType>(VectorTy), Mask,
1131 Ctx.CostKind, VF.getKnownMinValue() - 1);
1132 }
1134 Type *ArgTy = Ctx.Types.inferScalarType(getOperand(0));
1135 unsigned Multiplier =
1136 cast<ConstantInt>(getOperand(2)->getLiveInIRValue())->getZExtValue();
1137 Type *RetTy = toVectorTy(Type::getInt1Ty(Ctx.LLVMCtx), VF * Multiplier);
1138 IntrinsicCostAttributes Attrs(Intrinsic::get_active_lane_mask, RetTy,
1139 {ArgTy, ArgTy});
1140 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1141 }
1143 Type *Arg0Ty = Ctx.Types.inferScalarType(getOperand(0));
1144 Type *I32Ty = Type::getInt32Ty(Ctx.LLVMCtx);
1145 Type *I1Ty = Type::getInt1Ty(Ctx.LLVMCtx);
1146 IntrinsicCostAttributes Attrs(Intrinsic::experimental_get_vector_length,
1147 I32Ty, {Arg0Ty, I32Ty, I1Ty});
1148 return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
1149 }
1151 // Add on the cost of extracting the element.
1152 auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1153 return Ctx.TTI.getIndexedVectorInstrCostFromEnd(Instruction::ExtractElement,
1154 VecTy, Ctx.CostKind, 0);
1155 }
1157 if (VF == ElementCount::getScalable(1))
1160 default:
1161 // TODO: Compute cost other VPInstructions once the legacy cost model has
1162 // been retired.
1164 "unexpected VPInstruction witht underlying value");
1165 return 0;
1166 }
1167}
1168
1180
1182 switch (getOpcode()) {
1183 case Instruction::PHI:
1187 return true;
1188 default:
1189 return isScalarCast();
1190 }
1191}
1192
1194 assert(!State.Lane && "VPInstruction executing an Lane");
1195 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
1197 "Set flags not supported for the provided opcode");
1198 if (hasFastMathFlags())
1199 State.Builder.setFastMathFlags(getFastMathFlags());
1200 bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
1203 bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
1204 if (GeneratesPerAllLanes) {
1205 for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
1206 Lane != NumLanes; ++Lane) {
1207 Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
1208 assert(GeneratedValue && "generatePerLane must produce a value");
1209 State.set(this, GeneratedValue, VPLane(Lane));
1210 }
1211 return;
1212 }
1213
1214 Value *GeneratedValue = generate(State);
1215 if (!hasResult())
1216 return;
1217 assert(GeneratedValue && "generate must produce a value");
1218 assert((((GeneratedValue->getType()->isVectorTy() ||
1219 GeneratedValue->getType()->isStructTy()) ==
1220 !GeneratesPerFirstLaneOnly) ||
1221 State.VF.isScalar()) &&
1222 "scalar value but not only first lane defined");
1223 State.set(this, GeneratedValue,
1224 /*IsScalar*/ GeneratesPerFirstLaneOnly);
1225}
1226
1229 return false;
1230 switch (getOpcode()) {
1231 case Instruction::ExtractElement:
1232 case Instruction::Freeze:
1233 case Instruction::FCmp:
1234 case Instruction::ICmp:
1235 case Instruction::Select:
1236 case Instruction::PHI:
1248 case VPInstruction::Not:
1255 return false;
1256 default:
1257 return true;
1258 }
1259}
1260
1262 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1264 return vputils::onlyFirstLaneUsed(this);
1265
1266 switch (getOpcode()) {
1267 default:
1268 return false;
1269 case Instruction::ExtractElement:
1270 return Op == getOperand(1);
1271 case Instruction::PHI:
1272 return true;
1273 case Instruction::FCmp:
1274 case Instruction::ICmp:
1275 case Instruction::Select:
1276 case Instruction::Or:
1277 case Instruction::Freeze:
1278 case VPInstruction::Not:
1279 // TODO: Cover additional opcodes.
1280 return vputils::onlyFirstLaneUsed(this);
1289 return true;
1291 return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
1293 return Op == getOperand(0);
1296 return Op == getOperand(1);
1298 return Op == getOperand(0);
1299 };
1300 llvm_unreachable("switch should return");
1301}
1302
1304 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1306 return vputils::onlyFirstPartUsed(this);
1307
1308 switch (getOpcode()) {
1309 default:
1310 return false;
1311 case Instruction::FCmp:
1312 case Instruction::ICmp:
1313 case Instruction::Select:
1314 return vputils::onlyFirstPartUsed(this);
1318 return true;
1319 };
1320 llvm_unreachable("switch should return");
1321}
1322
1323#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1325 VPSlotTracker SlotTracker(getParent()->getPlan());
1326 print(dbgs(), "", SlotTracker);
1327}
1328
1330 VPSlotTracker &SlotTracker) const {
1331 O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";
1332
1333 if (hasResult()) {
1335 O << " = ";
1336 }
1337
1338 switch (getOpcode()) {
1339 case VPInstruction::Not:
1340 O << "not";
1341 break;
1343 O << "combined load";
1344 break;
1346 O << "combined store";
1347 break;
1349 O << "active lane mask";
1350 break;
1352 O << "EXPLICIT-VECTOR-LENGTH";
1353 break;
1355 O << "first-order splice";
1356 break;
1358 O << "branch-on-cond";
1359 break;
1361 O << "TC > VF ? TC - VF : 0";
1362 break;
1364 O << "VF * Part +";
1365 break;
1367 O << "branch-on-count";
1368 break;
1370 O << "broadcast";
1371 break;
1373 O << "buildstructvector";
1374 break;
1376 O << "buildvector";
1377 break;
1379 O << "extract-lane";
1380 break;
1382 O << "extract-last-element";
1383 break;
1385 O << "extract-penultimate-element";
1386 break;
1388 O << "compute-anyof-result";
1389 break;
1391 O << "compute-find-iv-result";
1392 break;
1394 O << "compute-reduction-result";
1395 break;
1397 O << "logical-and";
1398 break;
1400 O << "ptradd";
1401 break;
1403 O << "wide-ptradd";
1404 break;
1406 O << "any-of";
1407 break;
1409 O << "first-active-lane";
1410 break;
1412 O << "reduction-start-vector";
1413 break;
1415 O << "resume-for-epilogue";
1416 break;
1417 default:
1419 }
1420
1421 printFlags(O);
1423
1424 if (auto DL = getDebugLoc()) {
1425 O << ", !dbg ";
1426 DL.print(O);
1427 }
1428}
1429#endif
1430
1432 State.setDebugLocFrom(getDebugLoc());
1433 if (isScalarCast()) {
1434 Value *Op = State.get(getOperand(0), VPLane(0));
1435 Value *Cast = State.Builder.CreateCast(Instruction::CastOps(getOpcode()),
1436 Op, ResultTy);
1437 State.set(this, Cast, VPLane(0));
1438 return;
1439 }
1440 switch (getOpcode()) {
1442 Value *StepVector =
1443 State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF));
1444 State.set(this, StepVector);
1445 break;
1446 }
1447 case VPInstruction::VScale: {
1448 Value *VScale = State.Builder.CreateVScale(ResultTy);
1449 State.set(this, VScale, true);
1450 break;
1451 }
1452
1453 default:
1454 llvm_unreachable("opcode not implemented yet");
1455 }
1456}
1457
1458#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1460 VPSlotTracker &SlotTracker) const {
1461 O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";
1463 O << " = ";
1464
1465 switch (getOpcode()) {
1467 O << "wide-iv-step ";
1469 break;
1471 O << "step-vector " << *ResultTy;
1472 break;
1474 O << "vscale " << *ResultTy;
1475 break;
1476 default:
1477 assert(Instruction::isCast(getOpcode()) && "unhandled opcode");
1480 O << " to " << *ResultTy;
1481 }
1482}
1483#endif
1484
1486 State.setDebugLocFrom(getDebugLoc());
1487 PHINode *NewPhi = State.Builder.CreatePHI(
1488 State.TypeAnalysis.inferScalarType(this), 2, getName());
1489 unsigned NumIncoming = getNumIncoming();
1490 if (getParent() != getParent()->getPlan()->getScalarPreheader()) {
1491 // TODO: Fixup all incoming values of header phis once recipes defining them
1492 // are introduced.
1493 NumIncoming = 1;
1494 }
1495 for (unsigned Idx = 0; Idx != NumIncoming; ++Idx) {
1496 Value *IncV = State.get(getIncomingValue(Idx), VPLane(0));
1497 BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(getIncomingBlock(Idx));
1498 NewPhi->addIncoming(IncV, PredBB);
1499 }
1500 State.set(this, NewPhi, VPLane(0));
1501}
1502
1503#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1504void VPPhi::print(raw_ostream &O, const Twine &Indent,
1505 VPSlotTracker &SlotTracker) const {
1506 O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";
1508 O << " = phi ";
1510}
1511#endif
1512
1513VPIRInstruction *VPIRInstruction ::create(Instruction &I) {
1514 if (auto *Phi = dyn_cast<PHINode>(&I))
1515 return new VPIRPhi(*Phi);
1516 return new VPIRInstruction(I);
1517}
1518
1520 assert(!isa<VPIRPhi>(this) && getNumOperands() == 0 &&
1521 "PHINodes must be handled by VPIRPhi");
1522 // Advance the insert point after the wrapped IR instruction. This allows
1523 // interleaving VPIRInstructions and other recipes.
1524 State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
1525}
1526
1528 VPCostContext &Ctx) const {
1529 // The recipe wraps an existing IR instruction on the border of VPlan's scope,
1530 // hence it does not contribute to the cost-modeling for the VPlan.
1531 return 0;
1532}
1533
1536 "can only update exiting operands to phi nodes");
1537 assert(getNumOperands() > 0 && "must have at least one operand");
1538 VPValue *Exiting = getOperand(0);
1539 if (Exiting->isLiveIn())
1540 return;
1541
1542 Exiting = Builder.createNaryOp(VPInstruction::ExtractLastElement, {Exiting});
1543 setOperand(0, Exiting);
1544}
1545
1546#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1548 VPSlotTracker &SlotTracker) const {
1549 O << Indent << "IR " << I;
1550}
1551#endif
1552
1554 PHINode *Phi = &getIRPhi();
1555 for (const auto &[Idx, Op] : enumerate(operands())) {
1556 VPValue *ExitValue = Op;
1557 auto Lane = vputils::isSingleScalar(ExitValue)
1559 : VPLane::getLastLaneForVF(State.VF);
1560 VPBlockBase *Pred = getParent()->getPredecessors()[Idx];
1561 auto *PredVPBB = Pred->getExitingBasicBlock();
1562 BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
1563 // Set insertion point in PredBB in case an extract needs to be generated.
1564 // TODO: Model extracts explicitly.
1565 State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
1566 Value *V = State.get(ExitValue, VPLane(Lane));
1567 // If there is no existing block for PredBB in the phi, add a new incoming
1568 // value. Otherwise update the existing incoming value for PredBB.
1569 if (Phi->getBasicBlockIndex(PredBB) == -1)
1570 Phi->addIncoming(V, PredBB);
1571 else
1572 Phi->setIncomingValueForBlock(PredBB, V);
1573 }
1574
1575 // Advance the insert point after the wrapped IR instruction. This allows
1576 // interleaving VPIRInstructions and other recipes.
1577 State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));
1578}
1579
1581 VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());
1582 assert(R->getNumOperands() == R->getParent()->getNumPredecessors() &&
1583 "Number of phi operands must match number of predecessors");
1584 unsigned Position = R->getParent()->getIndexForPredecessor(IncomingBlock);
1585 R->removeOperand(Position);
1586}
1587
1588#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1590 VPSlotTracker &SlotTracker) const {
1591 interleaveComma(enumerate(getAsRecipe()->operands()), O,
1592 [this, &O, &SlotTracker](auto Op) {
1593 O << "[ ";
1594 Op.value()->printAsOperand(O, SlotTracker);
1595 O << ", ";
1596 getIncomingBlock(Op.index())->printAsOperand(O);
1597 O << " ]";
1598 });
1599}
1600#endif
1601
1602#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1603void VPIRPhi::print(raw_ostream &O, const Twine &Indent,
1604 VPSlotTracker &SlotTracker) const {
1606
1607 if (getNumOperands() != 0) {
1608 O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
1610 [&O, &SlotTracker](auto Op) {
1611 std::get<0>(Op)->printAsOperand(O, SlotTracker);
1612 O << " from ";
1613 std::get<1>(Op)->printAsOperand(O);
1614 });
1615 O << ")";
1616 }
1617}
1618#endif
1619
1621 : VPIRMetadata(I) {
1622 if (!LVer || !isa<LoadInst, StoreInst>(&I))
1623 return;
1624 const auto &[AliasScopeMD, NoAliasMD] = LVer->getNoAliasMetadataFor(&I);
1625 if (AliasScopeMD)
1626 Metadata.emplace_back(LLVMContext::MD_alias_scope, AliasScopeMD);
1627 if (NoAliasMD)
1628 Metadata.emplace_back(LLVMContext::MD_noalias, NoAliasMD);
1629}
1630
1632 for (const auto &[Kind, Node] : Metadata)
1633 I.setMetadata(Kind, Node);
1634}
1635
1637 SmallVector<std::pair<unsigned, MDNode *>> MetadataIntersection;
1638 for (const auto &[KindA, MDA] : Metadata) {
1639 for (const auto &[KindB, MDB] : Other.Metadata) {
1640 if (KindA == KindB && MDA == MDB) {
1641 MetadataIntersection.emplace_back(KindA, MDA);
1642 break;
1643 }
1644 }
1645 }
1646 Metadata = std::move(MetadataIntersection);
1647}
1648
1650 assert(State.VF.isVector() && "not widening");
1651 assert(Variant != nullptr && "Can't create vector function.");
1652
1653 FunctionType *VFTy = Variant->getFunctionType();
1654 // Add return type if intrinsic is overloaded on it.
1656 for (const auto &I : enumerate(args())) {
1657 Value *Arg;
1658 // Some vectorized function variants may also take a scalar argument,
1659 // e.g. linear parameters for pointers. This needs to be the scalar value
1660 // from the start of the respective part when interleaving.
1661 if (!VFTy->getParamType(I.index())->isVectorTy())
1662 Arg = State.get(I.value(), VPLane(0));
1663 else
1664 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
1665 Args.push_back(Arg);
1666 }
1667
1670 if (CI)
1671 CI->getOperandBundlesAsDefs(OpBundles);
1672
1673 CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
1674 applyFlags(*V);
1675 applyMetadata(*V);
1676 V->setCallingConv(Variant->getCallingConv());
1677
1678 if (!V->getType()->isVoidTy())
1679 State.set(this, V);
1680}
1681
1683 VPCostContext &Ctx) const {
1684 return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),
1685 Variant->getFunctionType()->params(),
1686 Ctx.CostKind);
1687}
1688
1689#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1691 VPSlotTracker &SlotTracker) const {
1692 O << Indent << "WIDEN-CALL ";
1693
1694 Function *CalledFn = getCalledScalarFunction();
1695 if (CalledFn->getReturnType()->isVoidTy())
1696 O << "void ";
1697 else {
1699 O << " = ";
1700 }
1701
1702 O << "call";
1703 printFlags(O);
1704 O << " @" << CalledFn->getName() << "(";
1705 interleaveComma(args(), O, [&O, &SlotTracker](VPValue *Op) {
1706 Op->printAsOperand(O, SlotTracker);
1707 });
1708 O << ")";
1709
1710 O << " (using library function";
1711 if (Variant->hasName())
1712 O << ": " << Variant->getName();
1713 O << ")";
1714}
1715#endif
1716
1718 assert(State.VF.isVector() && "not widening");
1719
1720 SmallVector<Type *, 2> TysForDecl;
1721 // Add return type if intrinsic is overloaded on it.
1722 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
1723 TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
1725 for (const auto &I : enumerate(operands())) {
1726 // Some intrinsics have a scalar argument - don't replace it with a
1727 // vector.
1728 Value *Arg;
1729 if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
1730 State.TTI))
1731 Arg = State.get(I.value(), VPLane(0));
1732 else
1733 Arg = State.get(I.value(), onlyFirstLaneUsed(I.value()));
1734 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
1735 State.TTI))
1736 TysForDecl.push_back(Arg->getType());
1737 Args.push_back(Arg);
1738 }
1739
1740 // Use vector version of the intrinsic.
1741 Module *M = State.Builder.GetInsertBlock()->getModule();
1742 Function *VectorF =
1743 Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1744 assert(VectorF &&
1745 "Can't retrieve vector intrinsic or vector-predication intrinsics.");
1746
1749 if (CI)
1750 CI->getOperandBundlesAsDefs(OpBundles);
1751
1752 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1753
1754 applyFlags(*V);
1755 applyMetadata(*V);
1756
1757 if (!V->getType()->isVoidTy())
1758 State.set(this, V);
1759}
1760
1761/// Compute the cost for the intrinsic \p ID with \p Operands, produced by \p R.
1764 const VPRecipeWithIRFlags &R,
1765 ElementCount VF,
1766 VPCostContext &Ctx) {
1767 // Some backends analyze intrinsic arguments to determine cost. Use the
1768 // underlying value for the operand if it has one. Otherwise try to use the
1769 // operand of the underlying call instruction, if there is one. Otherwise
1770 // clear Arguments.
1771 // TODO: Rework TTI interface to be independent of concrete IR values.
1773 for (const auto &[Idx, Op] : enumerate(Operands)) {
1774 auto *V = Op->getUnderlyingValue();
1775 if (!V) {
1776 if (auto *UI = dyn_cast_or_null<CallBase>(R.getUnderlyingValue())) {
1777 Arguments.push_back(UI->getArgOperand(Idx));
1778 continue;
1779 }
1780 Arguments.clear();
1781 break;
1782 }
1783 Arguments.push_back(V);
1784 }
1785
1786 Type *ScalarRetTy = Ctx.Types.inferScalarType(&R);
1787 Type *RetTy = VF.isVector() ? toVectorizedTy(ScalarRetTy, VF) : ScalarRetTy;
1788 SmallVector<Type *> ParamTys;
1789 for (const VPValue *Op : Operands) {
1790 ParamTys.push_back(VF.isVector()
1791 ? toVectorTy(Ctx.Types.inferScalarType(Op), VF)
1792 : Ctx.Types.inferScalarType(Op));
1793 }
1794
1795 // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
1796 FastMathFlags FMF =
1797 R.hasFastMathFlags() ? R.getFastMathFlags() : FastMathFlags();
1798 IntrinsicCostAttributes CostAttrs(
1799 ID, RetTy, Arguments, ParamTys, FMF,
1800 dyn_cast_or_null<IntrinsicInst>(R.getUnderlyingValue()),
1801 InstructionCost::getInvalid(), &Ctx.TLI);
1802 return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);
1803}
1804
1806 VPCostContext &Ctx) const {
1808 return getCostForIntrinsics(VectorIntrinsicID, ArgOps, *this, VF, Ctx);
1809}
1810
1812 return Intrinsic::getBaseName(VectorIntrinsicID);
1813}
1814
1816 assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1817 return all_of(enumerate(operands()), [this, &Op](const auto &X) {
1818 auto [Idx, V] = X;
1820 Idx, nullptr);
1821 });
1822}
1823
1824#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1826 VPSlotTracker &SlotTracker) const {
1827 O << Indent << "WIDEN-INTRINSIC ";
1828 if (ResultTy->isVoidTy()) {
1829 O << "void ";
1830 } else {
1832 O << " = ";
1833 }
1834
1835 O << "call";
1836 printFlags(O);
1837 O << getIntrinsicName() << "(";
1838
1840 Op->printAsOperand(O, SlotTracker);
1841 });
1842 O << ")";
1843}
1844#endif
1845
1847 IRBuilderBase &Builder = State.Builder;
1848
1849 Value *Address = State.get(getOperand(0));
1850 Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);
1851 VectorType *VTy = cast<VectorType>(Address->getType());
1852
1853 // The histogram intrinsic requires a mask even if the recipe doesn't;
1854 // if the mask operand was omitted then all lanes should be executed and
1855 // we just need to synthesize an all-true mask.
1856 Value *Mask = nullptr;
1857 if (VPValue *VPMask = getMask())
1858 Mask = State.get(VPMask);
1859 else
1860 Mask =
1861 Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));
1862
1863 // If this is a subtract, we want to invert the increment amount. We may
1864 // add a separate intrinsic in future, but for now we'll try this.
1865 if (Opcode == Instruction::Sub)
1866 IncAmt = Builder.CreateNeg(IncAmt);
1867 else
1868 assert(Opcode == Instruction::Add && "only add or sub supported for now");
1869
1870 State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
1871 {VTy, IncAmt->getType()},
1872 {Address, IncAmt, Mask});
1873}
1874
1876 VPCostContext &Ctx) const {
1877 // FIXME: Take the gather and scatter into account as well. For now we're
1878 // generating the same cost as the fallback path, but we'll likely
1879 // need to create a new TTI method for determining the cost, including
1880 // whether we can use base + vec-of-smaller-indices or just
1881 // vec-of-pointers.
1882 assert(VF.isVector() && "Invalid VF for histogram cost");
1883 Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
1884 VPValue *IncAmt = getOperand(1);
1885 Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
1886 VectorType *VTy = VectorType::get(IncTy, VF);
1887
1888 // Assume that a non-constant update value (or a constant != 1) requires
1889 // a multiply, and add that into the cost.
1890 InstructionCost MulCost =
1891 Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind);
1892 if (IncAmt->isLiveIn()) {
1894
1895 if (CI && CI->getZExtValue() == 1)
1896 MulCost = TTI::TCC_Free;
1897 }
1898
1899 // Find the cost of the histogram operation itself.
1900 Type *PtrTy = VectorType::get(AddressTy, VF);
1901 Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
1902 IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
1903 Type::getVoidTy(Ctx.LLVMCtx),
1904 {PtrTy, IncTy, MaskTy});
1905
1906 // Add the costs together with the add/sub operation.
1907 return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost +
1908 Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);
1909}
1910
1911#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1913 VPSlotTracker &SlotTracker) const {
1914 O << Indent << "WIDEN-HISTOGRAM buckets: ";
1916
1917 if (Opcode == Instruction::Sub)
1918 O << ", dec: ";
1919 else {
1920 assert(Opcode == Instruction::Add);
1921 O << ", inc: ";
1922 }
1924
1925 if (VPValue *Mask = getMask()) {
1926 O << ", mask: ";
1927 Mask->printAsOperand(O, SlotTracker);
1928 }
1929}
1930
1932 VPSlotTracker &SlotTracker) const {
1933 O << Indent << "WIDEN-SELECT ";
1935 O << " = select ";
1936 printFlags(O);
1938 O << ", ";
1940 O << ", ";
1942 O << (isInvariantCond() ? " (condition is loop invariant)" : "");
1943}
1944#endif
1945
1947 // The condition can be loop invariant but still defined inside the
1948 // loop. This means that we can't just use the original 'cond' value.
1949 // We have to take the 'vectorized' value and pick the first lane.
1950 // Instcombine will make this a no-op.
1951 auto *InvarCond =
1952 isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
1953
1954 Value *Cond = InvarCond ? InvarCond : State.get(getCond());
1955 Value *Op0 = State.get(getOperand(1));
1956 Value *Op1 = State.get(getOperand(2));
1957 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
1958 State.set(this, Sel);
1959 if (auto *I = dyn_cast<Instruction>(Sel)) {
1961 applyFlags(*I);
1962 applyMetadata(*I);
1963 }
1964}
1965
1967 VPCostContext &Ctx) const {
1969 bool ScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();
1970 Type *ScalarTy = Ctx.Types.inferScalarType(this);
1971 Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
1972
1973 VPValue *Op0, *Op1;
1974 using namespace llvm::VPlanPatternMatch;
1975 if (!ScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&
1976 (match(this, m_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1))) ||
1977 match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1))))) {
1978 // select x, y, false --> x & y
1979 // select x, true, y --> x | y
1980 const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);
1981 const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);
1982
1984 if (all_of(operands(),
1985 [](VPValue *Op) { return Op->getUnderlyingValue(); }))
1986 Operands.append(SI->op_begin(), SI->op_end());
1987 bool IsLogicalOr = match(this, m_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));
1988 return Ctx.TTI.getArithmeticInstrCost(
1989 IsLogicalOr ? Instruction::Or : Instruction::And, VectorTy,
1990 Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);
1991 }
1992
1993 Type *CondTy = Ctx.Types.inferScalarType(getOperand(0));
1994 if (!ScalarCond)
1995 CondTy = VectorType::get(CondTy, VF);
1996
1998 if (auto *Cmp = dyn_cast<CmpInst>(SI->getCondition()))
1999 Pred = Cmp->getPredicate();
2000 return Ctx.TTI.getCmpSelInstrCost(
2001 Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind,
2002 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, SI);
2003}
2004
2005VPIRFlags::FastMathFlagsTy::FastMathFlagsTy(const FastMathFlags &FMF) {
2006 AllowReassoc = FMF.allowReassoc();
2007 NoNaNs = FMF.noNaNs();
2008 NoInfs = FMF.noInfs();
2009 NoSignedZeros = FMF.noSignedZeros();
2010 AllowReciprocal = FMF.allowReciprocal();
2011 AllowContract = FMF.allowContract();
2012 ApproxFunc = FMF.approxFunc();
2013}
2014
2015#if !defined(NDEBUG)
2016bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
2017 switch (OpType) {
2018 case OperationType::OverflowingBinOp:
2019 return Opcode == Instruction::Add || Opcode == Instruction::Sub ||
2020 Opcode == Instruction::Mul ||
2021 Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;
2022 case OperationType::Trunc:
2023 return Opcode == Instruction::Trunc;
2024 case OperationType::DisjointOp:
2025 return Opcode == Instruction::Or;
2026 case OperationType::PossiblyExactOp:
2027 return Opcode == Instruction::AShr;
2028 case OperationType::GEPOp:
2029 return Opcode == Instruction::GetElementPtr ||
2030 Opcode == VPInstruction::PtrAdd ||
2031 Opcode == VPInstruction::WidePtrAdd;
2032 case OperationType::FPMathOp:
2033 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
2034 Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||
2035 Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
2036 Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
2037 Opcode == VPInstruction::WideIVStep ||
2040 case OperationType::NonNegOp:
2041 return Opcode == Instruction::ZExt;
2042 break;
2043 case OperationType::Cmp:
2044 return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;
2045 case OperationType::Other:
2046 return true;
2047 }
2048 llvm_unreachable("Unknown OperationType enum");
2049}
2050#endif
2051
2052#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2054 switch (OpType) {
2055 case OperationType::Cmp:
2057 break;
2058 case OperationType::DisjointOp:
2059 if (DisjointFlags.IsDisjoint)
2060 O << " disjoint";
2061 break;
2062 case OperationType::PossiblyExactOp:
2063 if (ExactFlags.IsExact)
2064 O << " exact";
2065 break;
2066 case OperationType::OverflowingBinOp:
2067 if (WrapFlags.HasNUW)
2068 O << " nuw";
2069 if (WrapFlags.HasNSW)
2070 O << " nsw";
2071 break;
2072 case OperationType::Trunc:
2073 if (TruncFlags.HasNUW)
2074 O << " nuw";
2075 if (TruncFlags.HasNSW)
2076 O << " nsw";
2077 break;
2078 case OperationType::FPMathOp:
2080 break;
2081 case OperationType::GEPOp:
2082 if (GEPFlags.isInBounds())
2083 O << " inbounds";
2084 else if (GEPFlags.hasNoUnsignedSignedWrap())
2085 O << " nusw";
2086 if (GEPFlags.hasNoUnsignedWrap())
2087 O << " nuw";
2088 break;
2089 case OperationType::NonNegOp:
2090 if (NonNegFlags.NonNeg)
2091 O << " nneg";
2092 break;
2093 case OperationType::Other:
2094 break;
2095 }
2096 O << " ";
2097}
2098#endif
2099
2101 auto &Builder = State.Builder;
2102 switch (Opcode) {
2103 case Instruction::Call:
2104 case Instruction::Br:
2105 case Instruction::PHI:
2106 case Instruction::GetElementPtr:
2107 case Instruction::Select:
2108 llvm_unreachable("This instruction is handled by a different recipe.");
2109 case Instruction::UDiv:
2110 case Instruction::SDiv:
2111 case Instruction::SRem:
2112 case Instruction::URem:
2113 case Instruction::Add:
2114 case Instruction::FAdd:
2115 case Instruction::Sub:
2116 case Instruction::FSub:
2117 case Instruction::FNeg:
2118 case Instruction::Mul:
2119 case Instruction::FMul:
2120 case Instruction::FDiv:
2121 case Instruction::FRem:
2122 case Instruction::Shl:
2123 case Instruction::LShr:
2124 case Instruction::AShr:
2125 case Instruction::And:
2126 case Instruction::Or:
2127 case Instruction::Xor: {
2128 // Just widen unops and binops.
2130 for (VPValue *VPOp : operands())
2131 Ops.push_back(State.get(VPOp));
2132
2133 Value *V = Builder.CreateNAryOp(Opcode, Ops);
2134
2135 if (auto *VecOp = dyn_cast<Instruction>(V)) {
2136 applyFlags(*VecOp);
2137 applyMetadata(*VecOp);
2138 }
2139
2140 // Use this vector value for all users of the original instruction.
2141 State.set(this, V);
2142 break;
2143 }
2144 case Instruction::ExtractValue: {
2145 assert(getNumOperands() == 2 && "expected single level extractvalue");
2146 Value *Op = State.get(getOperand(0));
2148 Value *Extract = Builder.CreateExtractValue(Op, CI->getZExtValue());
2149 State.set(this, Extract);
2150 break;
2151 }
2152 case Instruction::Freeze: {
2153 Value *Op = State.get(getOperand(0));
2154 Value *Freeze = Builder.CreateFreeze(Op);
2155 State.set(this, Freeze);
2156 break;
2157 }
2158 case Instruction::ICmp:
2159 case Instruction::FCmp: {
2160 // Widen compares. Generate vector compares.
2161 bool FCmp = Opcode == Instruction::FCmp;
2162 Value *A = State.get(getOperand(0));
2163 Value *B = State.get(getOperand(1));
2164 Value *C = nullptr;
2165 if (FCmp) {
2166 // Propagate fast math flags.
2167 C = Builder.CreateFCmpFMF(
2168 getPredicate(), A, B,
2170 } else {
2171 C = Builder.CreateICmp(getPredicate(), A, B);
2172 }
2173 if (auto *I = dyn_cast<Instruction>(C))
2174 applyMetadata(*I);
2175 State.set(this, C);
2176 break;
2177 }
2178 default:
2179 // This instruction is not vectorized by simple widening.
2180 LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
2181 << Instruction::getOpcodeName(Opcode));
2182 llvm_unreachable("Unhandled instruction!");
2183 } // end of switch.
2184
2185#if !defined(NDEBUG)
2186 // Verify that VPlan type inference results agree with the type of the
2187 // generated values.
2188 assert(VectorType::get(State.TypeAnalysis.inferScalarType(this), State.VF) ==
2189 State.get(this)->getType() &&
2190 "inferred type and type from generated instructions do not match");
2191#endif
2192}
2193
2195 VPCostContext &Ctx) const {
2196 switch (Opcode) {
2197 case Instruction::UDiv:
2198 case Instruction::SDiv:
2199 case Instruction::SRem:
2200 case Instruction::URem:
2201 // If the div/rem operation isn't safe to speculate and requires
2202 // predication, then the only way we can even create a vplan is to insert
2203 // a select on the second input operand to ensure we use the value of 1
2204 // for the inactive lanes. The select will be costed separately.
2205 case Instruction::FNeg:
2206 case Instruction::Add:
2207 case Instruction::FAdd:
2208 case Instruction::Sub:
2209 case Instruction::FSub:
2210 case Instruction::Mul:
2211 case Instruction::FMul:
2212 case Instruction::FDiv:
2213 case Instruction::FRem:
2214 case Instruction::Shl:
2215 case Instruction::LShr:
2216 case Instruction::AShr:
2217 case Instruction::And:
2218 case Instruction::Or:
2219 case Instruction::Xor:
2220 case Instruction::Freeze:
2221 case Instruction::ExtractValue:
2222 case Instruction::ICmp:
2223 case Instruction::FCmp:
2224 return *getCostForRecipeWithOpcode(getOpcode(), VF, Ctx);
2225 default:
2226 llvm_unreachable("Unsupported opcode for instruction");
2227 }
2228}
2229
2230#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2232 VPSlotTracker &SlotTracker) const {
2233 O << Indent << "WIDEN ";
2235 O << " = " << Instruction::getOpcodeName(Opcode);
2236 printFlags(O);
2238}
2239#endif
2240
2242 auto &Builder = State.Builder;
2243 /// Vectorize casts.
2244 assert(State.VF.isVector() && "Not vectorizing?");
2245 Type *DestTy = VectorType::get(getResultType(), State.VF);
2246 VPValue *Op = getOperand(0);
2247 Value *A = State.get(Op);
2248 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
2249 State.set(this, Cast);
2250 if (auto *CastOp = dyn_cast<Instruction>(Cast)) {
2251 applyFlags(*CastOp);
2252 applyMetadata(*CastOp);
2253 }
2254}
2255
2257 VPCostContext &Ctx) const {
2258 // TODO: In some cases, VPWidenCastRecipes are created but not considered in
2259 // the legacy cost model, including truncates/extends when evaluating a
2260 // reduction in a smaller type.
2261 if (!getUnderlyingValue())
2262 return 0;
2263 // Computes the CastContextHint from a recipes that may access memory.
2264 auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
2265 if (VF.isScalar())
2267 if (isa<VPInterleaveBase>(R))
2269 if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R))
2270 return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked
2272 const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);
2273 if (WidenMemoryRecipe == nullptr)
2275 if (!WidenMemoryRecipe->isConsecutive())
2277 if (WidenMemoryRecipe->isReverse())
2279 if (WidenMemoryRecipe->isMasked())
2282 };
2283
2284 VPValue *Operand = getOperand(0);
2286 // For Trunc/FPTrunc, get the context from the only user.
2287 if ((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
2289 if (auto *StoreRecipe = dyn_cast<VPRecipeBase>(*user_begin()))
2290 CCH = ComputeCCH(StoreRecipe);
2291 }
2292 // For Z/Sext, get the context from the operand.
2293 else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
2294 Opcode == Instruction::FPExt) {
2295 if (Operand->isLiveIn())
2297 else if (Operand->getDefiningRecipe())
2298 CCH = ComputeCCH(Operand->getDefiningRecipe());
2299 }
2300
2301 auto *SrcTy =
2302 cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(Operand), VF));
2303 auto *DestTy = cast<VectorType>(toVectorTy(getResultType(), VF));
2304 // Arm TTI will use the underlying instruction to determine the cost.
2305 return Ctx.TTI.getCastInstrCost(
2306 Opcode, DestTy, SrcTy, CCH, Ctx.CostKind,
2308}
2309
2310#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2312 VPSlotTracker &SlotTracker) const {
2313 O << Indent << "WIDEN-CAST ";
2315 O << " = " << Instruction::getOpcodeName(Opcode);
2316 printFlags(O);
2318 O << " to " << *getResultType();
2319}
2320#endif
2321
2323 VPCostContext &Ctx) const {
2324 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
2325}
2326
2327/// A helper function that returns an integer or floating-point constant with
2328/// value C.
2330 return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
2331 : ConstantFP::get(Ty, C);
2332}
2333
2334#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2336 VPSlotTracker &SlotTracker) const {
2337 O << Indent;
2339 O << " = WIDEN-INDUCTION ";
2341
2342 if (auto *TI = getTruncInst())
2343 O << " (truncated to " << *TI->getType() << ")";
2344}
2345#endif
2346
2348 // The step may be defined by a recipe in the preheader (e.g. if it requires
2349 // SCEV expansion), but for the canonical induction the step is required to be
2350 // 1, which is represented as live-in.
2352 return false;
2355 auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
2356 return StartC && StartC->isZero() && StepC && StepC->isOne() &&
2357 getScalarType() == CanIV->getScalarType();
2358}
2359
2360#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2362 VPSlotTracker &SlotTracker) const {
2363 O << Indent;
2365 O << " = DERIVED-IV ";
2366 getStartValue()->printAsOperand(O, SlotTracker);
2367 O << " + ";
2368 getOperand(1)->printAsOperand(O, SlotTracker);
2369 O << " * ";
2370 getStepValue()->printAsOperand(O, SlotTracker);
2371}
2372#endif
2373
2375 // Fast-math-flags propagate from the original induction instruction.
2376 IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
2377 if (hasFastMathFlags())
2378 State.Builder.setFastMathFlags(getFastMathFlags());
2379
2380 /// Compute scalar induction steps. \p ScalarIV is the scalar induction
2381 /// variable on which to base the steps, \p Step is the size of the step.
2382
2383 Value *BaseIV = State.get(getOperand(0), VPLane(0));
2384 Value *Step = State.get(getStepValue(), VPLane(0));
2385 IRBuilderBase &Builder = State.Builder;
2386
2387 // Ensure step has the same type as that of scalar IV.
2388 Type *BaseIVTy = BaseIV->getType()->getScalarType();
2389 assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
2390
2391 // We build scalar steps for both integer and floating-point induction
2392 // variables. Here, we determine the kind of arithmetic we will perform.
2395 if (BaseIVTy->isIntegerTy()) {
2396 AddOp = Instruction::Add;
2397 MulOp = Instruction::Mul;
2398 } else {
2399 AddOp = InductionOpcode;
2400 MulOp = Instruction::FMul;
2401 }
2402
2403 // Determine the number of scalars we need to generate for each unroll
2404 // iteration.
2405 bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
2406 // Compute the scalar steps and save the results in State.
2407 Type *IntStepTy =
2408 IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
2409 Type *VecIVTy = nullptr;
2410 Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
2411 if (!FirstLaneOnly && State.VF.isScalable()) {
2412 VecIVTy = VectorType::get(BaseIVTy, State.VF);
2413 UnitStepVec =
2414 Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
2415 SplatStep = Builder.CreateVectorSplat(State.VF, Step);
2416 SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
2417 }
2418
2419 unsigned StartLane = 0;
2420 unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
2421 if (State.Lane) {
2422 StartLane = State.Lane->getKnownLane();
2423 EndLane = StartLane + 1;
2424 }
2425 Value *StartIdx0;
2426 if (getUnrollPart(*this) == 0)
2427 StartIdx0 = ConstantInt::get(IntStepTy, 0);
2428 else {
2429 StartIdx0 = State.get(getOperand(2), true);
2430 if (getUnrollPart(*this) != 1) {
2431 StartIdx0 =
2432 Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->getType(),
2433 getUnrollPart(*this)));
2434 }
2435 StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);
2436 }
2437
2438 if (!FirstLaneOnly && State.VF.isScalable()) {
2439 auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
2440 auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
2441 if (BaseIVTy->isFloatingPointTy())
2442 InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
2443 auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
2444 auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
2445 State.set(this, Add);
2446 // It's useful to record the lane values too for the known minimum number
2447 // of elements so we do those below. This improves the code quality when
2448 // trying to extract the first element, for example.
2449 }
2450
2451 if (BaseIVTy->isFloatingPointTy())
2452 StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
2453
2454 for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
2455 Value *StartIdx = Builder.CreateBinOp(
2456 AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
2457 // The step returned by `createStepForVF` is a runtime-evaluated value
2458 // when VF is scalable. Otherwise, it should be folded into a Constant.
2459 assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
2460 "Expected StartIdx to be folded to a constant when VF is not "
2461 "scalable");
2462 auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
2463 auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
2464 State.set(this, Add, VPLane(Lane));
2465 }
2466}
2467
2468#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2470 VPSlotTracker &SlotTracker) const {
2471 O << Indent;
2473 O << " = SCALAR-STEPS ";
2475}
2476#endif
2477
2479 assert(State.VF.isVector() && "not widening");
2481 // Construct a vector GEP by widening the operands of the scalar GEP as
2482 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
2483 // results in a vector of pointers when at least one operand of the GEP
2484 // is vector-typed. Thus, to keep the representation compact, we only use
2485 // vector-typed operands for loop-varying values.
2486
2487 if (areAllOperandsInvariant()) {
2488 // If we are vectorizing, but the GEP has only loop-invariant operands,
2489 // the GEP we build (by only using vector-typed operands for
2490 // loop-varying values) would be a scalar pointer. Thus, to ensure we
2491 // produce a vector of pointers, we need to either arbitrarily pick an
2492 // operand to broadcast, or broadcast a clone of the original GEP.
2493 // Here, we broadcast a clone of the original.
2494 //
2495 // TODO: If at some point we decide to scalarize instructions having
2496 // loop-invariant operands, this special case will no longer be
2497 // required. We would add the scalarization decision to
2498 // collectLoopScalars() and teach getVectorValue() to broadcast
2499 // the lane-zero scalar value.
2501 for (unsigned I = 0, E = getNumOperands(); I != E; I++)
2502 Ops.push_back(State.get(getOperand(I), VPLane(0)));
2503
2504 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
2505 ArrayRef(Ops).drop_front(), "",
2507 Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
2508 State.set(this, Splat);
2509 } else {
2510 // If the GEP has at least one loop-varying operand, we are sure to
2511 // produce a vector of pointers unless VF is scalar.
2512 // The pointer operand of the new GEP. If it's loop-invariant, we
2513 // won't broadcast it.
2514 auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
2515 : State.get(getOperand(0));
2516
2517 // Collect all the indices for the new GEP. If any index is
2518 // loop-invariant, we won't broadcast it.
2520 for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
2521 VPValue *Operand = getOperand(I);
2522 if (isIndexLoopInvariant(I - 1))
2523 Indices.push_back(State.get(Operand, VPLane(0)));
2524 else
2525 Indices.push_back(State.get(Operand));
2526 }
2527
2528 // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
2529 // but it should be a vector, otherwise.
2530 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
2531 Indices, "", getGEPNoWrapFlags());
2532 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
2533 "NewGEP is not a pointer vector");
2534 State.set(this, NewGEP);
2535 }
2536}
2537
2538#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2540 VPSlotTracker &SlotTracker) const {
2541 O << Indent << "WIDEN-GEP ";
2542 O << (isPointerLoopInvariant() ? "Inv" : "Var");
2543 for (size_t I = 0; I < getNumOperands() - 1; ++I)
2544 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
2545
2546 O << " ";
2548 O << " = getelementptr";
2549 printFlags(O);
2551}
2552#endif
2553
2554static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride,
2555 unsigned CurrentPart, IRBuilderBase &Builder) {
2556 // Use i32 for the gep index type when the value is constant,
2557 // or query DataLayout for a more suitable index type otherwise.
2558 const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2559 return !IsUnitStride || (IsScalable && (IsReverse || CurrentPart > 0))
2560 ? DL.getIndexType(Builder.getPtrTy(0))
2561 : Builder.getInt32Ty();
2562}
2563
2565 auto &Builder = State.Builder;
2566 unsigned CurrentPart = getUnrollPart(*this);
2567 bool IsUnitStride = Stride == 1 || Stride == -1;
2568 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
2569 IsUnitStride, CurrentPart, Builder);
2570
2571 // The wide store needs to start at the last vector element.
2572 Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
2573 if (IndexTy != RunTimeVF->getType())
2574 RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
2575 // NumElt = Stride * CurrentPart * RunTimeVF
2576 Value *NumElt = Builder.CreateMul(
2577 ConstantInt::get(IndexTy, Stride * (int64_t)CurrentPart), RunTimeVF);
2578 // LastLane = Stride * (RunTimeVF - 1)
2579 Value *LastLane = Builder.CreateSub(RunTimeVF, ConstantInt::get(IndexTy, 1));
2580 if (Stride != 1)
2581 LastLane = Builder.CreateMul(ConstantInt::get(IndexTy, Stride), LastLane);
2582 Value *Ptr = State.get(getOperand(0), VPLane(0));
2583 Value *ResultPtr =
2584 Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", getGEPNoWrapFlags());
2585 ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "",
2587
2588 State.set(this, ResultPtr, /*IsScalar*/ true);
2589}
2590
2591#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2593 VPSlotTracker &SlotTracker) const {
2594 O << Indent;
2596 O << " = vector-end-pointer";
2597 printFlags(O);
2599}
2600#endif
2601
2603 auto &Builder = State.Builder;
2604 unsigned CurrentPart = getUnrollPart(*this);
2605 Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2606 /*IsUnitStride*/ true, CurrentPart, Builder);
2607 Value *Ptr = State.get(getOperand(0), VPLane(0));
2608
2609 Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2610 Value *ResultPtr =
2611 Builder.CreateGEP(IndexedTy, Ptr, Increment, "", getGEPNoWrapFlags());
2612
2613 State.set(this, ResultPtr, /*IsScalar*/ true);
2614}
2615
2616#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2618 VPSlotTracker &SlotTracker) const {
2619 O << Indent;
2621 O << " = vector-pointer ";
2622
2624}
2625#endif
2626
2628 VPCostContext &Ctx) const {
2629 // Handle cases where only the first lane is used the same way as the legacy
2630 // cost model.
2632 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
2633
2634 Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
2635 Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2636 return (getNumIncomingValues() - 1) *
2637 Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2638 CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind);
2639}
2640
2641#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2643 VPSlotTracker &SlotTracker) const {
2644 O << Indent << "BLEND ";
2646 O << " =";
2647 if (getNumIncomingValues() == 1) {
2648 // Not a User of any mask: not really blending, this is a
2649 // single-predecessor phi.
2650 O << " ";
2651 getIncomingValue(0)->printAsOperand(O, SlotTracker);
2652 } else {
2653 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
2654 O << " ";
2655 getIncomingValue(I)->printAsOperand(O, SlotTracker);
2656 if (I == 0)
2657 continue;
2658 O << "/";
2659 getMask(I)->printAsOperand(O, SlotTracker);
2660 }
2661 }
2662}
2663#endif
2664
2666 assert(!State.Lane && "Reduction being replicated.");
2667 Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2670 "In-loop AnyOf reductions aren't currently supported");
2671 // Propagate the fast-math flags carried by the underlying instruction.
2672 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
2673 State.Builder.setFastMathFlags(getFastMathFlags());
2674 Value *NewVecOp = State.get(getVecOp());
2675 if (VPValue *Cond = getCondOp()) {
2676 Value *NewCond = State.get(Cond, State.VF.isScalar());
2677 VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
2678 Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
2679
2680 Value *Start = getRecurrenceIdentity(Kind, ElementTy, getFastMathFlags());
2681 if (State.VF.isVector())
2682 Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
2683
2684 Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
2685 NewVecOp = Select;
2686 }
2687 Value *NewRed;
2688 Value *NextInChain;
2689 if (IsOrdered) {
2690 if (State.VF.isVector())
2691 NewRed =
2692 createOrderedReduction(State.Builder, Kind, NewVecOp, PrevInChain);
2693 else
2694 NewRed = State.Builder.CreateBinOp(
2696 PrevInChain, NewVecOp);
2697 PrevInChain = NewRed;
2698 NextInChain = NewRed;
2699 } else {
2700 PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
2701 NewRed = createSimpleReduction(State.Builder, NewVecOp, Kind);
2703 NextInChain = createMinMaxOp(State.Builder, Kind, NewRed, PrevInChain);
2704 else
2705 NextInChain = State.Builder.CreateBinOp(
2707 PrevInChain, NewRed);
2708 }
2709 State.set(this, NextInChain, /*IsScalar*/ true);
2710}
2711
2713 assert(!State.Lane && "Reduction being replicated.");
2714
2715 auto &Builder = State.Builder;
2716 // Propagate the fast-math flags carried by the underlying instruction.
2717 IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
2718 Builder.setFastMathFlags(getFastMathFlags());
2719
2721 Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
2722 Value *VecOp = State.get(getVecOp());
2723 Value *EVL = State.get(getEVL(), VPLane(0));
2724
2725 Value *Mask;
2726 if (VPValue *CondOp = getCondOp())
2727 Mask = State.get(CondOp);
2728 else
2729 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
2730
2731 Value *NewRed;
2732 if (isOrdered()) {
2733 NewRed = createOrderedReduction(Builder, Kind, VecOp, Prev, Mask, EVL);
2734 } else {
2735 NewRed = createSimpleReduction(Builder, VecOp, Kind, Mask, EVL);
2737 NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
2738 else
2739 NewRed = Builder.CreateBinOp(
2741 Prev);
2742 }
2743 State.set(this, NewRed, /*IsScalar*/ true);
2744}
2745
2747 VPCostContext &Ctx) const {
2748 RecurKind RdxKind = getRecurrenceKind();
2749 Type *ElementTy = Ctx.Types.inferScalarType(this);
2750 auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
2751 unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);
2753 std::optional<FastMathFlags> OptionalFMF =
2754 ElementTy->isFloatingPointTy() ? std::make_optional(FMFs) : std::nullopt;
2755
2756 // TODO: Support any-of reductions.
2757 assert(
2759 ForceTargetInstructionCost.getNumOccurrences() > 0) &&
2760 "Any-of reduction not implemented in VPlan-based cost model currently.");
2761
2762 // Note that TTI should model the cost of moving result to the scalar register
2763 // and the BinOp cost in the getMinMaxReductionCost().
2766 return Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind);
2767 }
2768
2769 // Note that TTI should model the cost of moving result to the scalar register
2770 // and the BinOp cost in the getArithmeticReductionCost().
2771 return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, OptionalFMF,
2772 Ctx.CostKind);
2773}
2774
2776 ExpressionTypes ExpressionType,
2777 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes)
2778 : VPSingleDefRecipe(VPDef::VPExpressionSC, {}, {}),
2779 ExpressionRecipes(SetVector<VPSingleDefRecipe *>(
2780 ExpressionRecipes.begin(), ExpressionRecipes.end())
2781 .takeVector()),
2782 ExpressionType(ExpressionType) {
2783 assert(!ExpressionRecipes.empty() && "Nothing to combine?");
2784 assert(
2785 none_of(ExpressionRecipes,
2786 [](VPSingleDefRecipe *R) { return R->mayHaveSideEffects(); }) &&
2787 "expression cannot contain recipes with side-effects");
2788
2789 // Maintain a copy of the expression recipes as a set of users.
2790 SmallPtrSet<VPUser *, 4> ExpressionRecipesAsSetOfUsers;
2791 for (auto *R : ExpressionRecipes)
2792 ExpressionRecipesAsSetOfUsers.insert(R);
2793
2794 // Recipes in the expression, except the last one, must only be used by
2795 // (other) recipes inside the expression. If there are other users, external
2796 // to the expression, use a clone of the recipe for external users.
2797 for (VPSingleDefRecipe *R : ExpressionRecipes) {
2798 if (R != ExpressionRecipes.back() &&
2799 any_of(R->users(), [&ExpressionRecipesAsSetOfUsers](VPUser *U) {
2800 return !ExpressionRecipesAsSetOfUsers.contains(U);
2801 })) {
2802 // There are users outside of the expression. Clone the recipe and use the
2803 // clone those external users.
2804 VPSingleDefRecipe *CopyForExtUsers = R->clone();
2805 R->replaceUsesWithIf(CopyForExtUsers, [&ExpressionRecipesAsSetOfUsers](
2806 VPUser &U, unsigned) {
2807 return !ExpressionRecipesAsSetOfUsers.contains(&U);
2808 });
2809 CopyForExtUsers->insertBefore(R);
2810 }
2811 if (R->getParent())
2812 R->removeFromParent();
2813 }
2814
2815 // Internalize all external operands to the expression recipes. To do so,
2816 // create new temporary VPValues for all operands defined by a recipe outside
2817 // the expression. The original operands are added as operands of the
2818 // VPExpressionRecipe itself.
2819 for (auto *R : ExpressionRecipes) {
2820 for (const auto &[Idx, Op] : enumerate(R->operands())) {
2821 auto *Def = Op->getDefiningRecipe();
2822 if (Def && ExpressionRecipesAsSetOfUsers.contains(Def))
2823 continue;
2824 addOperand(Op);
2825 LiveInPlaceholders.push_back(new VPValue());
2826 R->setOperand(Idx, LiveInPlaceholders.back());
2827 }
2828 }
2829}
2830
2832 for (auto *R : ExpressionRecipes)
2833 R->insertBefore(this);
2834
2835 for (const auto &[Idx, Op] : enumerate(operands()))
2836 LiveInPlaceholders[Idx]->replaceAllUsesWith(Op);
2837
2838 replaceAllUsesWith(ExpressionRecipes.back());
2839 ExpressionRecipes.clear();
2840}
2841
2843 VPCostContext &Ctx) const {
2844 Type *RedTy = Ctx.Types.inferScalarType(this);
2845 auto *SrcVecTy = cast<VectorType>(
2846 toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF));
2847 assert(RedTy->isIntegerTy() &&
2848 "VPExpressionRecipe only supports integer types currently.");
2849 unsigned Opcode = RecurrenceDescriptor::getOpcode(
2850 cast<VPReductionRecipe>(ExpressionRecipes.back())->getRecurrenceKind());
2851 switch (ExpressionType) {
2852 case ExpressionTypes::ExtendedReduction: {
2853 return Ctx.TTI.getExtendedReductionCost(
2854 Opcode,
2855 cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
2856 Instruction::ZExt,
2857 RedTy, SrcVecTy, std::nullopt, Ctx.CostKind);
2858 }
2859 case ExpressionTypes::MulAccReduction:
2860 return Ctx.TTI.getMulAccReductionCost(false, Opcode, RedTy, SrcVecTy,
2861 Ctx.CostKind);
2862
2863 case ExpressionTypes::ExtMulAccReduction:
2864 return Ctx.TTI.getMulAccReductionCost(
2865 cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
2866 Instruction::ZExt,
2867 Opcode, RedTy, SrcVecTy, Ctx.CostKind);
2868 }
2869 llvm_unreachable("Unknown VPExpressionRecipe::ExpressionTypes enum");
2870}
2871
2873 return any_of(ExpressionRecipes, [](VPSingleDefRecipe *R) {
2874 return R->mayReadFromMemory() || R->mayWriteToMemory();
2875 });
2876}
2877
2879 assert(
2880 none_of(ExpressionRecipes,
2881 [](VPSingleDefRecipe *R) { return R->mayHaveSideEffects(); }) &&
2882 "expression cannot contain recipes with side-effects");
2883 return false;
2884}
2885
2886#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2887
2889 VPSlotTracker &SlotTracker) const {
2890 O << Indent << "EXPRESSION ";
2892 O << " = ";
2893 auto *Red = cast<VPReductionRecipe>(ExpressionRecipes.back());
2894 unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
2895
2896 switch (ExpressionType) {
2897 case ExpressionTypes::ExtendedReduction: {
2899 O << " +";
2900 O << " reduce." << Instruction::getOpcodeName(Opcode) << " (";
2902 Red->printFlags(O);
2903
2904 auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
2905 O << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "
2906 << *Ext0->getResultType();
2907 if (Red->isConditional()) {
2908 O << ", ";
2909 Red->getCondOp()->printAsOperand(O, SlotTracker);
2910 }
2911 O << ")";
2912 break;
2913 }
2914 case ExpressionTypes::MulAccReduction:
2915 case ExpressionTypes::ExtMulAccReduction: {
2917 O << " + ";
2918 O << "reduce."
2920 RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
2921 << " (";
2922 O << "mul";
2923 bool IsExtended = ExpressionType == ExpressionTypes::ExtMulAccReduction;
2924 auto *Mul = cast<VPWidenRecipe>(IsExtended ? ExpressionRecipes[2]
2925 : ExpressionRecipes[0]);
2926 Mul->printFlags(O);
2927 if (IsExtended)
2928 O << "(";
2930 if (IsExtended) {
2931 auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
2932 O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "
2933 << *Ext0->getResultType() << "), (";
2934 } else {
2935 O << ", ";
2936 }
2938 if (IsExtended) {
2939 auto *Ext1 = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);
2940 O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to "
2941 << *Ext1->getResultType() << ")";
2942 }
2943 if (Red->isConditional()) {
2944 O << ", ";
2945 Red->getCondOp()->printAsOperand(O, SlotTracker);
2946 }
2947 O << ")";
2948 break;
2949 }
2950 }
2951}
2952
2954 VPSlotTracker &SlotTracker) const {
2955 O << Indent << "REDUCE ";
2957 O << " = ";
2959 O << " +";
2960 printFlags(O);
2961 O << " reduce."
2964 << " (";
2966 if (isConditional()) {
2967 O << ", ";
2969 }
2970 O << ")";
2971}
2972
2974 VPSlotTracker &SlotTracker) const {
2975 O << Indent << "REDUCE ";
2977 O << " = ";
2979 O << " +";
2980 printFlags(O);
2981 O << " vp.reduce."
2984 << " (";
2986 O << ", ";
2988 if (isConditional()) {
2989 O << ", ";
2991 }
2992 O << ")";
2993}
2994
2995#endif
2996
2997/// A helper function to scalarize a single Instruction in the innermost loop.
2998/// Generates a sequence of scalar instances for lane \p Lane. Uses the VPValue
2999/// operands from \p RepRecipe instead of \p Instr's operands.
3000static void scalarizeInstruction(const Instruction *Instr,
3001 VPReplicateRecipe *RepRecipe,
3002 const VPLane &Lane, VPTransformState &State) {
3003 assert((!Instr->getType()->isAggregateType() ||
3004 canVectorizeTy(Instr->getType())) &&
3005 "Expected vectorizable or non-aggregate type.");
3006
3007 // Does this instruction return a value ?
3008 bool IsVoidRetTy = Instr->getType()->isVoidTy();
3009
3010 Instruction *Cloned = Instr->clone();
3011 if (!IsVoidRetTy) {
3012 Cloned->setName(Instr->getName() + ".cloned");
3013 Type *ResultTy = State.TypeAnalysis.inferScalarType(RepRecipe);
3014 // The operands of the replicate recipe may have been narrowed, resulting in
3015 // a narrower result type. Update the type of the cloned instruction to the
3016 // correct type.
3017 if (ResultTy != Cloned->getType())
3018 Cloned->mutateType(ResultTy);
3019 }
3020
3021 RepRecipe->applyFlags(*Cloned);
3022 RepRecipe->applyMetadata(*Cloned);
3023
3024 if (RepRecipe->hasPredicate())
3025 cast<CmpInst>(Cloned)->setPredicate(RepRecipe->getPredicate());
3026
3027 if (auto DL = RepRecipe->getDebugLoc())
3028 State.setDebugLocFrom(DL);
3029
3030 // Replace the operands of the cloned instructions with their scalar
3031 // equivalents in the new loop.
3032 for (const auto &I : enumerate(RepRecipe->operands())) {
3033 auto InputLane = Lane;
3034 VPValue *Operand = I.value();
3035 if (vputils::isSingleScalar(Operand))
3036 InputLane = VPLane::getFirstLane();
3037 Cloned->setOperand(I.index(), State.get(Operand, InputLane));
3038 }
3039
3040 // Place the cloned scalar in the new loop.
3041 State.Builder.Insert(Cloned);
3042
3043 State.set(RepRecipe, Cloned, Lane);
3044
3045 // If we just cloned a new assumption, add it the assumption cache.
3046 if (auto *II = dyn_cast<AssumeInst>(Cloned))
3047 State.AC->registerAssumption(II);
3048
3049 assert(
3050 (RepRecipe->getParent()->getParent() ||
3051 !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
3052 all_of(RepRecipe->operands(),
3053 [](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
3054 "Expected a recipe is either within a region or all of its operands "
3055 "are defined outside the vectorized region.");
3056}
3057
3060
3061 if (!State.Lane) {
3062 assert(IsSingleScalar && "VPReplicateRecipes outside replicate regions "
3063 "must have already been unrolled");
3064 scalarizeInstruction(UI, this, VPLane(0), State);
3065 return;
3066 }
3067
3068 assert((State.VF.isScalar() || !isSingleScalar()) &&
3069 "uniform recipe shouldn't be predicated");
3070 assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
3071 scalarizeInstruction(UI, this, *State.Lane, State);
3072 // Insert scalar instance packing it into a vector.
3073 if (State.VF.isVector() && shouldPack()) {
3074 Value *WideValue =
3075 State.Lane->isFirstLane()
3076 ? PoisonValue::get(VectorType::get(UI->getType(), State.VF))
3077 : State.get(this);
3078 State.set(this, State.packScalarIntoVectorizedValue(this, WideValue,
3079 *State.Lane));
3080 }
3081}
3082
3084 // Find if the recipe is used by a widened recipe via an intervening
3085 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
3086 return any_of(users(), [](const VPUser *U) {
3087 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
3088 return !vputils::onlyScalarValuesUsed(PredR);
3089 return false;
3090 });
3091}
3092
3094 VPCostContext &Ctx) const {
3096 // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
3097 // transform, avoid computing their cost multiple times for now.
3098 Ctx.SkipCostComputation.insert(UI);
3099
3100 switch (UI->getOpcode()) {
3101 case Instruction::GetElementPtr:
3102 // We mark this instruction as zero-cost because the cost of GEPs in
3103 // vectorized code depends on whether the corresponding memory instruction
3104 // is scalarized or not. Therefore, we handle GEPs with the memory
3105 // instruction cost.
3106 return 0;
3107 case Instruction::Call: {
3108 auto *CalledFn =
3110
3113 for (const VPValue *ArgOp : ArgOps)
3114 Tys.push_back(Ctx.Types.inferScalarType(ArgOp));
3115
3116 if (CalledFn->isIntrinsic())
3117 // Various pseudo-intrinsics with costs of 0 are scalarized instead of
3118 // vectorized via VPWidenIntrinsicRecipe. Return 0 for them early.
3119 switch (CalledFn->getIntrinsicID()) {
3120 case Intrinsic::assume:
3121 case Intrinsic::lifetime_end:
3122 case Intrinsic::lifetime_start:
3123 case Intrinsic::sideeffect:
3124 case Intrinsic::pseudoprobe:
3125 case Intrinsic::experimental_noalias_scope_decl: {
3126 assert(getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,
3127 ElementCount::getFixed(1), Ctx) == 0 &&
3128 "scalarizing intrinsic should be free");
3129 return InstructionCost(0);
3130 }
3131 default:
3132 break;
3133 }
3134
3135 Type *ResultTy = Ctx.Types.inferScalarType(this);
3136 InstructionCost ScalarCallCost =
3137 Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);
3138 if (isSingleScalar()) {
3139 if (CalledFn->isIntrinsic())
3140 ScalarCallCost = std::min(
3141 ScalarCallCost,
3142 getCostForIntrinsics(CalledFn->getIntrinsicID(), ArgOps, *this,
3143 ElementCount::getFixed(1), Ctx));
3144 return ScalarCallCost;
3145 }
3146
3147 if (VF.isScalable())
3149
3150 // Compute the cost of scalarizing the result and operands if needed.
3151 InstructionCost ScalarizationCost = 0;
3152 if (VF.isVector()) {
3153 if (!ResultTy->isVoidTy()) {
3154 for (Type *VectorTy :
3155 to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) {
3156 ScalarizationCost += Ctx.TTI.getScalarizationOverhead(
3158 /*Insert=*/true,
3159 /*Extract=*/false, Ctx.CostKind);
3160 }
3161 }
3162 // Skip operands that do not require extraction/scalarization and do not
3163 // incur any overhead.
3164 SmallPtrSet<const VPValue *, 4> UniqueOperands;
3165 Tys.clear();
3166 for (auto *Op : ArgOps) {
3168 !UniqueOperands.insert(Op).second)
3169 continue;
3170 Tys.push_back(toVectorizedTy(Ctx.Types.inferScalarType(Op), VF));
3171 }
3172 ScalarizationCost +=
3173 Ctx.TTI.getOperandsScalarizationOverhead(Tys, Ctx.CostKind);
3174 }
3175
3176 return ScalarCallCost * VF.getFixedValue() + ScalarizationCost;
3177 }
3178 case Instruction::Add:
3179 case Instruction::Sub:
3180 case Instruction::FAdd:
3181 case Instruction::FSub:
3182 case Instruction::Mul:
3183 case Instruction::FMul:
3184 case Instruction::FDiv:
3185 case Instruction::FRem:
3186 case Instruction::Shl:
3187 case Instruction::LShr:
3188 case Instruction::AShr:
3189 case Instruction::And:
3190 case Instruction::Or:
3191 case Instruction::Xor:
3192 case Instruction::ICmp:
3193 case Instruction::FCmp:
3195 Ctx) *
3196 (isSingleScalar() ? 1 : VF.getFixedValue());
3197 case Instruction::Load:
3198 case Instruction::Store: {
3199 if (isSingleScalar()) {
3200 bool IsLoad = UI->getOpcode() == Instruction::Load;
3201 Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));
3202 Type *ScalarPtrTy = Ctx.Types.inferScalarType(getOperand(IsLoad ? 0 : 1));
3203 const Align Alignment = getLoadStoreAlignment(UI);
3204 unsigned AS = getLoadStoreAddressSpace(UI);
3206 InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost(
3207 UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo, UI);
3208 return ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
3209 ScalarPtrTy, nullptr, nullptr, Ctx.CostKind);
3210 }
3211 // TODO: See getMemInstScalarizationCost for how to handle replicating and
3212 // predicated cases.
3213 break;
3214 }
3215 }
3216
3217 return Ctx.getLegacyCost(UI, VF);
3218}
3219
3220#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3222 VPSlotTracker &SlotTracker) const {
3223 O << Indent << (IsSingleScalar ? "CLONE " : "REPLICATE ");
3224
3225 if (!getUnderlyingInstr()->getType()->isVoidTy()) {
3227 O << " = ";
3228 }
3229 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
3230 O << "call";
3231 printFlags(O);
3232 O << "@" << CB->getCalledFunction()->getName() << "(";
3234 O, [&O, &SlotTracker](VPValue *Op) {
3235 Op->printAsOperand(O, SlotTracker);
3236 });
3237 O << ")";
3238 } else {
3240 printFlags(O);
3242 }
3243
3244 if (shouldPack())
3245 O << " (S->V)";
3246}
3247#endif
3248
3250 assert(State.Lane && "Branch on Mask works only on single instance.");
3251
3252 VPValue *BlockInMask = getOperand(0);
3253 Value *ConditionBit = State.get(BlockInMask, *State.Lane);
3254
3255 // Replace the temporary unreachable terminator with a new conditional branch,
3256 // whose two destinations will be set later when they are created.
3257 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
3258 assert(isa<UnreachableInst>(CurrentTerminator) &&
3259 "Expected to replace unreachable terminator with conditional branch.");
3260 auto CondBr =
3261 State.Builder.CreateCondBr(ConditionBit, State.CFG.PrevBB, nullptr);
3262 CondBr->setSuccessor(0, nullptr);
3263 CurrentTerminator->eraseFromParent();
3264}
3265
3267 VPCostContext &Ctx) const {
3268 // The legacy cost model doesn't assign costs to branches for individual
3269 // replicate regions. Match the current behavior in the VPlan cost model for
3270 // now.
3271 return 0;
3272}
3273
3275 assert(State.Lane && "Predicated instruction PHI works per instance.");
3276 Instruction *ScalarPredInst =
3277 cast<Instruction>(State.get(getOperand(0), *State.Lane));
3278 BasicBlock *PredicatedBB = ScalarPredInst->getParent();
3279 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
3280 assert(PredicatingBB && "Predicated block has no single predecessor.");
3282 "operand must be VPReplicateRecipe");
3283
3284 // By current pack/unpack logic we need to generate only a single phi node: if
3285 // a vector value for the predicated instruction exists at this point it means
3286 // the instruction has vector users only, and a phi for the vector value is
3287 // needed. In this case the recipe of the predicated instruction is marked to
3288 // also do that packing, thereby "hoisting" the insert-element sequence.
3289 // Otherwise, a phi node for the scalar value is needed.
3290 if (State.hasVectorValue(getOperand(0))) {
3291 Value *VectorValue = State.get(getOperand(0));
3292 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
3293 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
3294 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
3295 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
3296 if (State.hasVectorValue(this))
3297 State.reset(this, VPhi);
3298 else
3299 State.set(this, VPhi);
3300 // NOTE: Currently we need to update the value of the operand, so the next
3301 // predicated iteration inserts its generated value in the correct vector.
3302 State.reset(getOperand(0), VPhi);
3303 } else {
3304 if (vputils::onlyFirstLaneUsed(this) && !State.Lane->isFirstLane())
3305 return;
3306
3307 Type *PredInstType = State.TypeAnalysis.inferScalarType(getOperand(0));
3308 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
3309 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
3310 PredicatingBB);
3311 Phi->addIncoming(ScalarPredInst, PredicatedBB);
3312 if (State.hasScalarValue(this, *State.Lane))
3313 State.reset(this, Phi, *State.Lane);
3314 else
3315 State.set(this, Phi, *State.Lane);
3316 // NOTE: Currently we need to update the value of the operand, so the next
3317 // predicated iteration inserts its generated value in the correct vector.
3318 State.reset(getOperand(0), Phi, *State.Lane);
3319 }
3320}
3321
3322#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3324 VPSlotTracker &SlotTracker) const {
3325 O << Indent << "PHI-PREDICATED-INSTRUCTION ";
3327 O << " = ";
3329}
3330#endif
3331
3333 VPCostContext &Ctx) const {
3335 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3336 unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
3337 ->getAddressSpace();
3338 unsigned Opcode = isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this)
3339 ? Instruction::Load
3340 : Instruction::Store;
3341
3342 if (!Consecutive) {
3343 // TODO: Using the original IR may not be accurate.
3344 // Currently, ARM will use the underlying IR to calculate gather/scatter
3345 // instruction cost.
3346 assert(!Reverse &&
3347 "Inconsecutive memory access should not have the order.");
3348
3350 Type *PtrTy = Ptr->getType();
3351
3352 // If the address value is uniform across all lanes, then the address can be
3353 // calculated with scalar type and broadcast.
3355 PtrTy = toVectorTy(PtrTy, VF);
3356
3357 return Ctx.TTI.getAddressComputationCost(PtrTy, nullptr, nullptr,
3358 Ctx.CostKind) +
3359 Ctx.TTI.getGatherScatterOpCost(Opcode, Ty, Ptr, IsMasked, Alignment,
3360 Ctx.CostKind, &Ingredient);
3361 }
3362
3364 if (IsMasked) {
3365 Cost +=
3366 Ctx.TTI.getMaskedMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind);
3367 } else {
3368 TTI::OperandValueInfo OpInfo = Ctx.getOperandInfo(
3370 : getOperand(1));
3371 Cost += Ctx.TTI.getMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind,
3372 OpInfo, &Ingredient);
3373 }
3374 if (!Reverse)
3375 return Cost;
3376
3377 return Cost += Ctx.TTI.getShuffleCost(
3379 cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
3380}
3381
3383 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
3384 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
3385 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3386 bool CreateGather = !isConsecutive();
3387
3388 auto &Builder = State.Builder;
3389 Value *Mask = nullptr;
3390 if (auto *VPMask = getMask()) {
3391 // Mask reversal is only needed for non-all-one (null) masks, as reverse
3392 // of a null all-one mask is a null mask.
3393 Mask = State.get(VPMask);
3394 if (isReverse())
3395 Mask = Builder.CreateVectorReverse(Mask, "reverse");
3396 }
3397
3398 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
3399 Value *NewLI;
3400 if (CreateGather) {
3401 NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
3402 "wide.masked.gather");
3403 } else if (Mask) {
3404 NewLI =
3405 Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
3406 PoisonValue::get(DataTy), "wide.masked.load");
3407 } else {
3408 NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
3409 }
3411 if (Reverse)
3412 NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
3413 State.set(this, NewLI);
3414}
3415
3416#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3418 VPSlotTracker &SlotTracker) const {
3419 O << Indent << "WIDEN ";
3421 O << " = load ";
3423}
3424#endif
3425
3426/// Use all-true mask for reverse rather than actual mask, as it avoids a
3427/// dependence w/o affecting the result.
3429 Value *EVL, const Twine &Name) {
3430 VectorType *ValTy = cast<VectorType>(Operand->getType());
3431 Value *AllTrueMask =
3432 Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
3433 return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
3434 {Operand, AllTrueMask, EVL}, nullptr, Name);
3435}
3436
3438 Type *ScalarDataTy = getLoadStoreType(&Ingredient);
3439 auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
3440 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3441 bool CreateGather = !isConsecutive();
3442
3443 auto &Builder = State.Builder;
3444 CallInst *NewLI;
3445 Value *EVL = State.get(getEVL(), VPLane(0));
3446 Value *Addr = State.get(getAddr(), !CreateGather);
3447 Value *Mask = nullptr;
3448 if (VPValue *VPMask = getMask()) {
3449 Mask = State.get(VPMask);
3450 if (isReverse())
3451 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
3452 } else {
3453 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
3454 }
3455
3456 if (CreateGather) {
3457 NewLI =
3458 Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
3459 nullptr, "wide.masked.gather");
3460 } else {
3461 NewLI = Builder.CreateIntrinsic(DataTy, Intrinsic::vp_load,
3462 {Addr, Mask, EVL}, nullptr, "vp.op.load");
3463 }
3464 NewLI->addParamAttr(
3465 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
3466 applyMetadata(*NewLI);
3467 Instruction *Res = NewLI;
3468 if (isReverse())
3469 Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
3470 State.set(this, Res);
3471}
3472
3474 VPCostContext &Ctx) const {
3475 if (!Consecutive || IsMasked)
3476 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3477
3478 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
3479 // here because the EVL recipes using EVL to replace the tail mask. But in the
3480 // legacy model, it will always calculate the cost of mask.
3481 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
3482 // don't need to compare to the legacy cost model.
3484 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3485 unsigned AS = getLoadStoreAddressSpace(&Ingredient);
3486 InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
3487 Instruction::Load, Ty, Alignment, AS, Ctx.CostKind);
3488 if (!Reverse)
3489 return Cost;
3490
3491 return Cost + Ctx.TTI.getShuffleCost(
3493 cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
3494}
3495
3496#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3498 VPSlotTracker &SlotTracker) const {
3499 O << Indent << "WIDEN ";
3501 O << " = vp.load ";
3503}
3504#endif
3505
3507 VPValue *StoredVPValue = getStoredValue();
3508 bool CreateScatter = !isConsecutive();
3509 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3510
3511 auto &Builder = State.Builder;
3512
3513 Value *Mask = nullptr;
3514 if (auto *VPMask = getMask()) {
3515 // Mask reversal is only needed for non-all-one (null) masks, as reverse
3516 // of a null all-one mask is a null mask.
3517 Mask = State.get(VPMask);
3518 if (isReverse())
3519 Mask = Builder.CreateVectorReverse(Mask, "reverse");
3520 }
3521
3522 Value *StoredVal = State.get(StoredVPValue);
3523 if (isReverse()) {
3524 // If we store to reverse consecutive memory locations, then we need
3525 // to reverse the order of elements in the stored value.
3526 StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
3527 // We don't want to update the value in the map as it might be used in
3528 // another expression. So don't call resetVectorValue(StoredVal).
3529 }
3530 Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
3531 Instruction *NewSI = nullptr;
3532 if (CreateScatter)
3533 NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
3534 else if (Mask)
3535 NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
3536 else
3537 NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
3538 applyMetadata(*NewSI);
3539}
3540
3541#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3543 VPSlotTracker &SlotTracker) const {
3544 O << Indent << "WIDEN store ";
3546}
3547#endif
3548
3550 VPValue *StoredValue = getStoredValue();
3551 bool CreateScatter = !isConsecutive();
3552 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3553
3554 auto &Builder = State.Builder;
3555
3556 CallInst *NewSI = nullptr;
3557 Value *StoredVal = State.get(StoredValue);
3558 Value *EVL = State.get(getEVL(), VPLane(0));
3559 if (isReverse())
3560 StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
3561 Value *Mask = nullptr;
3562 if (VPValue *VPMask = getMask()) {
3563 Mask = State.get(VPMask);
3564 if (isReverse())
3565 Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
3566 } else {
3567 Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
3568 }
3569 Value *Addr = State.get(getAddr(), !CreateScatter);
3570 if (CreateScatter) {
3571 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
3572 Intrinsic::vp_scatter,
3573 {StoredVal, Addr, Mask, EVL});
3574 } else {
3575 NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
3576 Intrinsic::vp_store,
3577 {StoredVal, Addr, Mask, EVL});
3578 }
3579 NewSI->addParamAttr(
3580 1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
3581 applyMetadata(*NewSI);
3582}
3583
3585 VPCostContext &Ctx) const {
3586 if (!Consecutive || IsMasked)
3587 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3588
3589 // We need to use the getMaskedMemoryOpCost() instead of getMemoryOpCost()
3590 // here because the EVL recipes using EVL to replace the tail mask. But in the
3591 // legacy model, it will always calculate the cost of mask.
3592 // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
3593 // don't need to compare to the legacy cost model.
3595 const Align Alignment = getLoadStoreAlignment(&Ingredient);
3596 unsigned AS = getLoadStoreAddressSpace(&Ingredient);
3597 InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
3598 Instruction::Store, Ty, Alignment, AS, Ctx.CostKind);
3599 if (!Reverse)
3600 return Cost;
3601
3602 return Cost + Ctx.TTI.getShuffleCost(
3604 cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
3605}
3606
3607#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3609 VPSlotTracker &SlotTracker) const {
3610 O << Indent << "WIDEN vp.store ";
3612}
3613#endif
3614
3616 VectorType *DstVTy, const DataLayout &DL) {
3617 // Verify that V is a vector type with same number of elements as DstVTy.
3618 auto VF = DstVTy->getElementCount();
3619 auto *SrcVecTy = cast<VectorType>(V->getType());
3620 assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
3621 Type *SrcElemTy = SrcVecTy->getElementType();
3622 Type *DstElemTy = DstVTy->getElementType();
3623 assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
3624 "Vector elements must have same size");
3625
3626 // Do a direct cast if element types are castable.
3627 if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
3628 return Builder.CreateBitOrPointerCast(V, DstVTy);
3629 }
3630 // V cannot be directly casted to desired vector type.
3631 // May happen when V is a floating point vector but DstVTy is a vector of
3632 // pointers or vice-versa. Handle this using a two-step bitcast using an
3633 // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
3634 assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
3635 "Only one type should be a pointer type");
3636 assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
3637 "Only one type should be a floating point type");
3638 Type *IntTy =
3639 IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
3640 auto *VecIntTy = VectorType::get(IntTy, VF);
3641 Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
3642 return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
3643}
3644
3645/// Return a vector containing interleaved elements from multiple
3646/// smaller input vectors.
3648 const Twine &Name) {
3649 unsigned Factor = Vals.size();
3650 assert(Factor > 1 && "Tried to interleave invalid number of vectors");
3651
3652 VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
3653#ifndef NDEBUG
3654 for (Value *Val : Vals)
3655 assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
3656#endif
3657
3658 // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
3659 // must use intrinsics to interleave.
3660 if (VecTy->isScalableTy()) {
3661 assert(Factor <= 8 && "Unsupported interleave factor for scalable vectors");
3662 return Builder.CreateVectorInterleave(Vals, Name);
3663 }
3664
3665 // Fixed length. Start by concatenating all vectors into a wide vector.
3666 Value *WideVec = concatenateVectors(Builder, Vals);
3667
3668 // Interleave the elements into the wide vector.
3669 const unsigned NumElts = VecTy->getElementCount().getFixedValue();
3670 return Builder.CreateShuffleVector(
3671 WideVec, createInterleaveMask(NumElts, Factor), Name);
3672}
3673
3674// Try to vectorize the interleave group that \p Instr belongs to.
3675//
3676// E.g. Translate following interleaved load group (factor = 3):
3677// for (i = 0; i < N; i+=3) {
3678// R = Pic[i]; // Member of index 0
3679// G = Pic[i+1]; // Member of index 1
3680// B = Pic[i+2]; // Member of index 2
3681// ... // do something to R, G, B
3682// }
3683// To:
3684// %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
3685// %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
3686// %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
3687// %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
3688//
3689// Or translate following interleaved store group (factor = 3):
3690// for (i = 0; i < N; i+=3) {
3691// ... do something to R, G, B
3692// Pic[i] = R; // Member of index 0
3693// Pic[i+1] = G; // Member of index 1
3694// Pic[i+2] = B; // Member of index 2
3695// }
3696// To:
3697// %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
3698// %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
3699// %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
3700// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
3701// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
3703 assert(!State.Lane && "Interleave group being replicated.");
3704 assert((!needsMaskForGaps() || !State.VF.isScalable()) &&
3705 "Masking gaps for scalable vectors is not yet supported.");
3707 Instruction *Instr = Group->getInsertPos();
3708
3709 // Prepare for the vector type of the interleaved load/store.
3710 Type *ScalarTy = getLoadStoreType(Instr);
3711 unsigned InterleaveFactor = Group->getFactor();
3712 auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
3713
3714 VPValue *BlockInMask = getMask();
3715 VPValue *Addr = getAddr();
3716 Value *ResAddr = State.get(Addr, VPLane(0));
3717
3718 auto CreateGroupMask = [&BlockInMask, &State,
3719 &InterleaveFactor](Value *MaskForGaps) -> Value * {
3720 if (State.VF.isScalable()) {
3721 assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
3722 assert(InterleaveFactor <= 8 &&
3723 "Unsupported deinterleave factor for scalable vectors");
3724 auto *ResBlockInMask = State.get(BlockInMask);
3725 SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
3726 return interleaveVectors(State.Builder, Ops, "interleaved.mask");
3727 }
3728
3729 if (!BlockInMask)
3730 return MaskForGaps;
3731
3732 Value *ResBlockInMask = State.get(BlockInMask);
3733 Value *ShuffledMask = State.Builder.CreateShuffleVector(
3734 ResBlockInMask,
3735 createReplicatedMask(InterleaveFactor, State.VF.getFixedValue()),
3736 "interleaved.mask");
3737 return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
3738 ShuffledMask, MaskForGaps)
3739 : ShuffledMask;
3740 };
3741
3742 const DataLayout &DL = Instr->getDataLayout();
3743 // Vectorize the interleaved load group.
3744 if (isa<LoadInst>(Instr)) {
3745 Value *MaskForGaps = nullptr;
3746 if (needsMaskForGaps()) {
3747 MaskForGaps =
3748 createBitMaskForGaps(State.Builder, State.VF.getFixedValue(), *Group);
3749 assert(MaskForGaps && "Mask for Gaps is required but it is null");
3750 }
3751
3752 Instruction *NewLoad;
3753 if (BlockInMask || MaskForGaps) {
3754 Value *GroupMask = CreateGroupMask(MaskForGaps);
3755 Value *PoisonVec = PoisonValue::get(VecTy);
3756 NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
3757 Group->getAlign(), GroupMask,
3758 PoisonVec, "wide.masked.vec");
3759 } else
3760 NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
3761 Group->getAlign(), "wide.vec");
3762 applyMetadata(*NewLoad);
3763 // TODO: Also manage existing metadata using VPIRMetadata.
3764 Group->addMetadata(NewLoad);
3765
3767 if (VecTy->isScalableTy()) {
3768 // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3769 // so must use intrinsics to deinterleave.
3770 assert(InterleaveFactor <= 8 &&
3771 "Unsupported deinterleave factor for scalable vectors");
3772 NewLoad = State.Builder.CreateIntrinsic(
3773 Intrinsic::getDeinterleaveIntrinsicID(InterleaveFactor),
3774 NewLoad->getType(), NewLoad,
3775 /*FMFSource=*/nullptr, "strided.vec");
3776 }
3777
3778 auto CreateStridedVector = [&InterleaveFactor, &State,
3779 &NewLoad](unsigned Index) -> Value * {
3780 assert(Index < InterleaveFactor && "Illegal group index");
3781 if (State.VF.isScalable())
3782 return State.Builder.CreateExtractValue(NewLoad, Index);
3783
3784 // For fixed length VF, use shuffle to extract the sub-vectors from the
3785 // wide load.
3786 auto StrideMask =
3787 createStrideMask(Index, InterleaveFactor, State.VF.getFixedValue());
3788 return State.Builder.CreateShuffleVector(NewLoad, StrideMask,
3789 "strided.vec");
3790 };
3791
3792 for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
3793 Instruction *Member = Group->getMember(I);
3794
3795 // Skip the gaps in the group.
3796 if (!Member)
3797 continue;
3798
3799 Value *StridedVec = CreateStridedVector(I);
3800
3801 // If this member has different type, cast the result type.
3802 if (Member->getType() != ScalarTy) {
3803 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3804 StridedVec =
3805 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3806 }
3807
3808 if (Group->isReverse())
3809 StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
3810
3811 State.set(VPDefs[J], StridedVec);
3812 ++J;
3813 }
3814 return;
3815 }
3816
3817 // The sub vector type for current instruction.
3818 auto *SubVT = VectorType::get(ScalarTy, State.VF);
3819
3820 // Vectorize the interleaved store group.
3821 Value *MaskForGaps =
3822 createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
3823 assert(((MaskForGaps != nullptr) == needsMaskForGaps()) &&
3824 "Mismatch between NeedsMaskForGaps and MaskForGaps");
3825 ArrayRef<VPValue *> StoredValues = getStoredValues();
3826 // Collect the stored vector from each member.
3827 SmallVector<Value *, 4> StoredVecs;
3828 unsigned StoredIdx = 0;
3829 for (unsigned i = 0; i < InterleaveFactor; i++) {
3830 assert((Group->getMember(i) || MaskForGaps) &&
3831 "Fail to get a member from an interleaved store group");
3832 Instruction *Member = Group->getMember(i);
3833
3834 // Skip the gaps in the group.
3835 if (!Member) {
3836 Value *Undef = PoisonValue::get(SubVT);
3837 StoredVecs.push_back(Undef);
3838 continue;
3839 }
3840
3841 Value *StoredVec = State.get(StoredValues[StoredIdx]);
3842 ++StoredIdx;
3843
3844 if (Group->isReverse())
3845 StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
3846
3847 // If this member has different type, cast it to a unified type.
3848
3849 if (StoredVec->getType() != SubVT)
3850 StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3851
3852 StoredVecs.push_back(StoredVec);
3853 }
3854
3855 // Interleave all the smaller vectors into one wider vector.
3856 Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
3857 Instruction *NewStoreInstr;
3858 if (BlockInMask || MaskForGaps) {
3859 Value *GroupMask = CreateGroupMask(MaskForGaps);
3860 NewStoreInstr = State.Builder.CreateMaskedStore(
3861 IVec, ResAddr, Group->getAlign(), GroupMask);
3862 } else
3863 NewStoreInstr =
3864 State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
3865
3866 applyMetadata(*NewStoreInstr);
3867 // TODO: Also manage existing metadata using VPIRMetadata.
3868 Group->addMetadata(NewStoreInstr);
3869}
3870
3871#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3873 VPSlotTracker &SlotTracker) const {
3875 O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
3876 IG->getInsertPos()->printAsOperand(O, false);
3877 O << ", ";
3879 VPValue *Mask = getMask();
3880 if (Mask) {
3881 O << ", ";
3882 Mask->printAsOperand(O, SlotTracker);
3883 }
3884
3885 unsigned OpIdx = 0;
3886 for (unsigned i = 0; i < IG->getFactor(); ++i) {
3887 if (!IG->getMember(i))
3888 continue;
3889 if (getNumStoreOperands() > 0) {
3890 O << "\n" << Indent << " store ";
3892 O << " to index " << i;
3893 } else {
3894 O << "\n" << Indent << " ";
3896 O << " = load from index " << i;
3897 }
3898 ++OpIdx;
3899 }
3900}
3901#endif
3902
3904 assert(!State.Lane && "Interleave group being replicated.");
3905 assert(State.VF.isScalable() &&
3906 "Only support scalable VF for EVL tail-folding.");
3908 "Masking gaps for scalable vectors is not yet supported.");
3910 Instruction *Instr = Group->getInsertPos();
3911
3912 // Prepare for the vector type of the interleaved load/store.
3913 Type *ScalarTy = getLoadStoreType(Instr);
3914 unsigned InterleaveFactor = Group->getFactor();
3915 assert(InterleaveFactor <= 8 &&
3916 "Unsupported deinterleave/interleave factor for scalable vectors");
3917 ElementCount WideVF = State.VF * InterleaveFactor;
3918 auto *VecTy = VectorType::get(ScalarTy, WideVF);
3919
3920 VPValue *Addr = getAddr();
3921 Value *ResAddr = State.get(Addr, VPLane(0));
3922 Value *EVL = State.get(getEVL(), VPLane(0));
3923 Value *InterleaveEVL = State.Builder.CreateMul(
3924 EVL, ConstantInt::get(EVL->getType(), InterleaveFactor), "interleave.evl",
3925 /* NUW= */ true, /* NSW= */ true);
3926 LLVMContext &Ctx = State.Builder.getContext();
3927
3928 Value *GroupMask = nullptr;
3929 if (VPValue *BlockInMask = getMask()) {
3930 SmallVector<Value *> Ops(InterleaveFactor, State.get(BlockInMask));
3931 GroupMask = interleaveVectors(State.Builder, Ops, "interleaved.mask");
3932 } else {
3933 GroupMask =
3934 State.Builder.CreateVectorSplat(WideVF, State.Builder.getTrue());
3935 }
3936
3937 // Vectorize the interleaved load group.
3938 if (isa<LoadInst>(Instr)) {
3939 CallInst *NewLoad = State.Builder.CreateIntrinsic(
3940 VecTy, Intrinsic::vp_load, {ResAddr, GroupMask, InterleaveEVL}, nullptr,
3941 "wide.vp.load");
3942 NewLoad->addParamAttr(0,
3943 Attribute::getWithAlignment(Ctx, Group->getAlign()));
3944
3945 applyMetadata(*NewLoad);
3946 // TODO: Also manage existing metadata using VPIRMetadata.
3947 Group->addMetadata(NewLoad);
3948
3949 // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3950 // so must use intrinsics to deinterleave.
3951 NewLoad = State.Builder.CreateIntrinsic(
3952 Intrinsic::getDeinterleaveIntrinsicID(InterleaveFactor),
3953 NewLoad->getType(), NewLoad,
3954 /*FMFSource=*/nullptr, "strided.vec");
3955
3956 const DataLayout &DL = Instr->getDataLayout();
3957 for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
3958 Instruction *Member = Group->getMember(I);
3959 // Skip the gaps in the group.
3960 if (!Member)
3961 continue;
3962
3963 Value *StridedVec = State.Builder.CreateExtractValue(NewLoad, I);
3964 // If this member has different type, cast the result type.
3965 if (Member->getType() != ScalarTy) {
3966 VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
3967 StridedVec =
3968 createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
3969 }
3970
3971 State.set(getVPValue(J), StridedVec);
3972 ++J;
3973 }
3974 return;
3975 } // End for interleaved load.
3976
3977 // The sub vector type for current instruction.
3978 auto *SubVT = VectorType::get(ScalarTy, State.VF);
3979 // Vectorize the interleaved store group.
3980 ArrayRef<VPValue *> StoredValues = getStoredValues();
3981 // Collect the stored vector from each member.
3982 SmallVector<Value *, 4> StoredVecs;
3983 const DataLayout &DL = Instr->getDataLayout();
3984 for (unsigned I = 0, StoredIdx = 0; I < InterleaveFactor; I++) {
3985 Instruction *Member = Group->getMember(I);
3986 // Skip the gaps in the group.
3987 if (!Member) {
3988 StoredVecs.push_back(PoisonValue::get(SubVT));
3989 continue;
3990 }
3991
3992 Value *StoredVec = State.get(StoredValues[StoredIdx]);
3993 // If this member has different type, cast it to a unified type.
3994 if (StoredVec->getType() != SubVT)
3995 StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
3996
3997 StoredVecs.push_back(StoredVec);
3998 ++StoredIdx;
3999 }
4000
4001 // Interleave all the smaller vectors into one wider vector.
4002 Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
4003 CallInst *NewStore =
4004 State.Builder.CreateIntrinsic(Type::getVoidTy(Ctx), Intrinsic::vp_store,
4005 {IVec, ResAddr, GroupMask, InterleaveEVL});
4006 NewStore->addParamAttr(1,
4007 Attribute::getWithAlignment(Ctx, Group->getAlign()));
4008
4009 applyMetadata(*NewStore);
4010 // TODO: Also manage existing metadata using VPIRMetadata.
4011 Group->addMetadata(NewStore);
4012}
4013
4014#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4016 VPSlotTracker &SlotTracker) const {
4018 O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
4019 IG->getInsertPos()->printAsOperand(O, false);
4020 O << ", ";
4022 O << ", ";
4024 if (VPValue *Mask = getMask()) {
4025 O << ", ";
4026 Mask->printAsOperand(O, SlotTracker);
4027 }
4028
4029 unsigned OpIdx = 0;
4030 for (unsigned i = 0; i < IG->getFactor(); ++i) {
4031 if (!IG->getMember(i))
4032 continue;
4033 if (getNumStoreOperands() > 0) {
4034 O << "\n" << Indent << " vp.store ";
4036 O << " to index " << i;
4037 } else {
4038 O << "\n" << Indent << " ";
4040 O << " = vp.load from index " << i;
4041 }
4042 ++OpIdx;
4043 }
4044}
4045#endif
4046
4048 VPCostContext &Ctx) const {
4049 Instruction *InsertPos = getInsertPos();
4050 // Find the VPValue index of the interleave group. We need to skip gaps.
4051 unsigned InsertPosIdx = 0;
4052 for (unsigned Idx = 0; IG->getFactor(); ++Idx)
4053 if (auto *Member = IG->getMember(Idx)) {
4054 if (Member == InsertPos)
4055 break;
4056 InsertPosIdx++;
4057 }
4058 Type *ValTy = Ctx.Types.inferScalarType(
4059 getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
4060 : getStoredValues()[InsertPosIdx]);
4061 auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
4062 unsigned AS = getLoadStoreAddressSpace(InsertPos);
4063
4064 unsigned InterleaveFactor = IG->getFactor();
4065 auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
4066
4067 // Holds the indices of existing members in the interleaved group.
4069 for (unsigned IF = 0; IF < InterleaveFactor; IF++)
4070 if (IG->getMember(IF))
4071 Indices.push_back(IF);
4072
4073 // Calculate the cost of the whole interleaved group.
4074 InstructionCost Cost = Ctx.TTI.getInterleavedMemoryOpCost(
4075 InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,
4076 IG->getAlign(), AS, Ctx.CostKind, getMask(), NeedsMaskForGaps);
4077
4078 if (!IG->isReverse())
4079 return Cost;
4080
4081 return Cost + IG->getNumMembers() *
4082 Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
4083 VectorTy, VectorTy, {}, Ctx.CostKind,
4084 0);
4085}
4086
4087#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4089 VPSlotTracker &SlotTracker) const {
4090 O << Indent << "EMIT ";
4092 O << " = CANONICAL-INDUCTION ";
4094}
4095#endif
4096
4098 return IsScalarAfterVectorization &&
4099 (!IsScalable || vputils::onlyFirstLaneUsed(this));
4100}
4101
4102#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4104 VPSlotTracker &SlotTracker) const {
4105 assert((getNumOperands() == 3 || getNumOperands() == 5) &&
4106 "unexpected number of operands");
4107 O << Indent << "EMIT ";
4109 O << " = WIDEN-POINTER-INDUCTION ";
4111 O << ", ";
4113 O << ", ";
4115 if (getNumOperands() == 5) {
4116 O << ", ";
4118 O << ", ";
4120 }
4121}
4122
4124 VPSlotTracker &SlotTracker) const {
4125 O << Indent << "EMIT ";
4127 O << " = EXPAND SCEV " << *Expr;
4128}
4129#endif
4130
4132 Value *CanonicalIV = State.get(getOperand(0), /*IsScalar*/ true);
4133 Type *STy = CanonicalIV->getType();
4134 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
4135 ElementCount VF = State.VF;
4136 Value *VStart = VF.isScalar()
4137 ? CanonicalIV
4138 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
4139 Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
4140 if (VF.isVector()) {
4141 VStep = Builder.CreateVectorSplat(VF, VStep);
4142 VStep =
4143 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
4144 }
4145 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
4146 State.set(this, CanonicalVectorIV);
4147}
4148
4149#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4151 VPSlotTracker &SlotTracker) const {
4152 O << Indent << "EMIT ";
4154 O << " = WIDEN-CANONICAL-INDUCTION ";
4156}
4157#endif
4158
4160 auto &Builder = State.Builder;
4161 // Create a vector from the initial value.
4162 auto *VectorInit = getStartValue()->getLiveInIRValue();
4163
4164 Type *VecTy = State.VF.isScalar()
4165 ? VectorInit->getType()
4166 : VectorType::get(VectorInit->getType(), State.VF);
4167
4168 BasicBlock *VectorPH =
4169 State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
4170 if (State.VF.isVector()) {
4171 auto *IdxTy = Builder.getInt32Ty();
4172 auto *One = ConstantInt::get(IdxTy, 1);
4173 IRBuilder<>::InsertPointGuard Guard(Builder);
4174 Builder.SetInsertPoint(VectorPH->getTerminator());
4175 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
4176 auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
4177 VectorInit = Builder.CreateInsertElement(
4178 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
4179 }
4180
4181 // Create a phi node for the new recurrence.
4182 PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
4183 Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
4184 Phi->addIncoming(VectorInit, VectorPH);
4185 State.set(this, Phi);
4186}
4187
4190 VPCostContext &Ctx) const {
4191 if (VF.isScalar())
4192 return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
4193
4194 return 0;
4195}
4196
4197#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4199 VPSlotTracker &SlotTracker) const {
4200 O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
4202 O << " = phi ";
4204}
4205#endif
4206
4208 // Reductions do not have to start at zero. They can start with
4209 // any loop invariant values.
4210 VPValue *StartVPV = getStartValue();
4211
4212 // In order to support recurrences we need to be able to vectorize Phi nodes.
4213 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
4214 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
4215 // this value when we vectorize all of the instructions that use the PHI.
4216 BasicBlock *VectorPH =
4217 State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
4218 bool ScalarPHI = State.VF.isScalar() || IsInLoop;
4219 Value *StartV = State.get(StartVPV, ScalarPHI);
4220 Type *VecTy = StartV->getType();
4221
4222 BasicBlock *HeaderBB = State.CFG.PrevBB;
4223 assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
4224 "recipe must be in the vector loop header");
4225 auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
4226 Phi->insertBefore(HeaderBB->getFirstInsertionPt());
4227 State.set(this, Phi, IsInLoop);
4228
4229 Phi->addIncoming(StartV, VectorPH);
4230}
4231
4232#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4234 VPSlotTracker &SlotTracker) const {
4235 O << Indent << "WIDEN-REDUCTION-PHI ";
4236
4238 O << " = phi ";
4240 if (VFScaleFactor != 1)
4241 O << " (VF scaled by 1/" << VFScaleFactor << ")";
4242}
4243#endif
4244
4246 Value *Op0 = State.get(getOperand(0));
4247 Type *VecTy = Op0->getType();
4248 Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
4249 State.set(this, VecPhi);
4250}
4251
4252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4254 VPSlotTracker &SlotTracker) const {
4255 O << Indent << "WIDEN-PHI ";
4256
4258 O << " = phi ";
4260}
4261#endif
4262
4263// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
4264// remove VPActiveLaneMaskPHIRecipe.
4266 BasicBlock *VectorPH =
4267 State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
4268 Value *StartMask = State.get(getOperand(0));
4269 PHINode *Phi =
4270 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
4271 Phi->addIncoming(StartMask, VectorPH);
4272 State.set(this, Phi);
4273}
4274
4275#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4277 VPSlotTracker &SlotTracker) const {
4278 O << Indent << "ACTIVE-LANE-MASK-PHI ";
4279
4281 O << " = phi ";
4283}
4284#endif
4285
4286#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4288 VPSlotTracker &SlotTracker) const {
4289 O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
4290
4292 O << " = phi ";
4294}
4295#endif
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
AMDGPU Lower Kernel Arguments
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition Compiler.h:404
Hexagon Common GEP
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
static bool isOrdered(const Instruction *I)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
if(PassOpts->AAPipeline)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This file contains the declarations of different VPlan-related auxiliary helpers.
static Instruction * createReverseEVL(IRBuilderBase &Builder, Value *Operand, Value *EVL, const Twine &Name)
Use all-true mask for reverse rather than actual mask, as it avoids a dependence w/o affecting the re...
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
static InstructionCost getCostForIntrinsics(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost for the intrinsic ID with Operands, produced by R.
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
static Type * getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride, unsigned CurrentPart, IRBuilderBase &Builder)
SmallVector< Value *, 2 > VectorParts
static void scalarizeInstruction(const Instruction *Instr, VPReplicateRecipe *RepRecipe, const VPLane &Lane, VPTransformState &State)
A helper function to scalarize a single Instruction in the innermost loop.
static Constant * getSignedIntOrFpConstant(Type *Ty, int64_t C)
A helper function that returns an integer or floating-point constant with value C.
static BranchInst * createCondBranch(Value *Cond, VPBasicBlock *VPBB, VPTransformState &State)
Create a conditional branch using Cond branching to the successors of VPBB.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition blake3_impl.h:83
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:984
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
static LLVM_ABI StringRef getPredicateName(Predicate P)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
A debug info location.
Definition DebugLoc.h:124
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setAllowContract(bool B=true)
Definition FMF.h:90
bool noSignedZeros() const
Definition FMF.h:67
bool noInfs() const
Definition FMF.h:66
void setAllowReciprocal(bool B=true)
Definition FMF.h:87
bool allowReciprocal() const
Definition FMF.h:68
LLVM_ABI void print(raw_ostream &O) const
Print fast-math flags to O.
Definition Operator.cpp:271
void setNoSignedZeros(bool B=true)
Definition FMF.h:84
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
bool approxFunc() const
Definition FMF.h:70
void setNoNaNs(bool B=true)
Definition FMF.h:78
void setAllowReassoc(bool B=true)
Flag setters.
Definition FMF.h:75
bool noNaNs() const
Definition FMF.h:65
void setApproxFunc(bool B=true)
Definition FMF.h:93
void setNoInfs(bool B=true)
Definition FMF.h:81
bool allowContract() const
Definition FMF.h:69
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
bool willReturn() const
Determine if the function will return.
Definition Function.h:661
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:594
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2571
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2625
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2559
LLVM_ABI Value * CreateVectorSplice(Value *V1, Value *V2, int64_t Imm, const Twine &Name="")
Return a vector splice intrinsic if using scalable vectors, otherwise return a shufflevector.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2618
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2637
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition IRBuilder.h:562
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:2036
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition IRBuilder.h:345
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition IRBuilder.h:567
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2333
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition IRBuilder.h:527
LLVM_ABI CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:522
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2463
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1805
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2329
Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")
Create a call to llvm.experimental_cttz_elts.
Definition IRBuilder.h:1134
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2082
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:1708
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition IRBuilder.h:1725
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2439
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1437
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InstructionCost getInvalid(CostType Val=0)
bool isCast() const
bool isBinaryOp() const
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isUnaryOp() const
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
bool isReverse() const
InstTy * getInsertPos() const
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Align getAlign() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static bool isSignedRecurrenceKind(RecurKind Kind)
Returns true if recurrece kind is a signed redux kind.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindLastIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition SetVector.h:59
Vector takeVector()
Clear the SetVector and return the underlying vector.
Definition SetVector.h:93
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
@ TCC_Free
Expected to fold away in lowering.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Reverse
Reverse the order of the vector.
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
value_op_iterator value_op_end()
Definition User.h:313
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
value_op_iterator value_op_begin()
Definition User.h:310
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3754
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:3807
iterator end()
Definition VPlan.h:3791
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:3820
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2422
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2417
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
VPRegionBlock * getParent()
Definition VPlan.h:173
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
VPlan * getPlan()
Definition VPlan.cpp:165
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:356
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
VPlan-based builder utility analogous to IRBuilder.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:300
void dump() const
Dump the VPDef to stderr (for debugging).
Definition VPlan.cpp:126
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition VPlanValue.h:422
ArrayRef< VPValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition VPlanValue.h:417
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition VPlanValue.h:395
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition VPlanValue.h:407
friend class VPValue
Definition VPlanValue.h:301
unsigned getVPDefID() const
Definition VPlanValue.h:427
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition VPlan.h:3631
VPValue * getStartValue() const
Definition VPlan.h:3630
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2004
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1710
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Class to record and manage LLVM IR flags.
Definition VPlan.h:600
FastMathFlagsTy FMFs
Definition VPlan.h:664
bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
WrapFlagsTy WrapFlags
Definition VPlan.h:658
CmpInst::Predicate CmpPredicate
Definition VPlan.h:657
void printFlags(raw_ostream &O) const
GEPNoWrapFlags GEPFlags
Definition VPlan.h:662
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:819
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition VPlan.h:659
CmpInst::Predicate getPredicate() const
Definition VPlan.h:801
ExactFlagsTy ExactFlags
Definition VPlan.h:661
bool hasNoSignedWrap() const
Definition VPlan.h:843
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:813
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:816
DisjointFlagsTy DisjointFlags
Definition VPlan.h:660
unsigned AllFlags
Definition VPlan.h:665
bool hasNoUnsignedWrap() const
Definition VPlan.h:832
NonNegFlagsTy NonNegFlags
Definition VPlan.h:663
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:764
Instruction & getInstruction() const
Definition VPlan.h:1376
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void extractLastLaneOfFirstOperand(VPBuilder &Builder)
Update the recipes first operand to the last lane of the operand using Builder.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1351
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetada object with MD, keeping only metadata nodes that are common to both.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1101
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1056
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1012
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1046
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1059
@ FirstOrderRecurrenceSplice
Definition VPlan.h:985
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1050
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1009
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1006
@ VScale
Returns the value for vscale.
Definition VPlan.h:1061
@ CanonicalIVIncrementForPart
Definition VPlan.h:999
@ CalculateTripCountMinusVF
Definition VPlan.h:997
bool hasResult() const
Definition VPlan.h:1140
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1180
unsigned getOpcode() const
Definition VPlan.h:1120
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2532
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this recipe.
Instruction * getInsertPos() const
Definition VPlan.h:2536
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2534
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2526
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2555
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2520
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2629
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2648
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2599
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
static VPLane getLastLaneForVF(const ElementCount &VF)
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
static VPLane getFirstLane()
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition VPlan.h:2786
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1266
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:3898
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1291
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1258
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:394
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool isPhi() const
Returns true for PHI-like recipes.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
VPBasicBlock * getParent()
Definition VPlan.h:415
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:482
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
bool isScalarCast() const
Return true if the recipe is a scalar cast.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:405
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2831
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2728
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2732
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2734
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2724
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2730
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2846
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
bool isSingleScalar() const
Definition VPlan.h:2891
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
unsigned getOpcode() const
Definition VPlan.h:2920
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition VPlan.h:3696
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:521
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:586
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:523
This class can be used to assign names to VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:926
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:197
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1459
operand_range operands()
Definition VPlanValue.h:265
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:241
unsigned getNumOperands() const
Definition VPlanValue.h:235
operand_iterator op_begin()
Definition VPlanValue.h:261
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:236
virtual bool onlyFirstLaneUsed(const VPValue *Op) const
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlanValue.h:280
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1413
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:135
friend class VPExpressionRecipe
Definition VPlanValue.h:53
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1455
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Definition VPlanValue.h:140
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition VPlanValue.h:174
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:85
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:98
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1416
user_iterator user_begin()
Definition VPlanValue.h:130
unsigned getNumUsers() const
Definition VPlanValue.h:113
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition VPlanValue.h:169
user_range users()
Definition VPlanValue.h:134
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
operand_range args()
Definition VPlan.h:1667
Function * getCalledScalarFunction() const
Definition VPlan.h:1663
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1536
void execute(VPTransformState &State) override
Produce widened copies of the cast.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2060
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2171
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2180
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1601
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1604
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3134
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3131
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3171
Instruction & Ingredient
Definition VPlan.h:3125
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3128
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3185
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3178
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3175
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1440
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getUF() const
Definition VPlan.h:4274
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1050
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition Value.h:838
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:134
iterator erase(iterator where)
Definition ilist.h:204
pointer remove(iterator &IT)
Definition ilist.h:188
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor)
Returns the corresponding llvm.vector.deinterleaveN intrinsic for factor N.
LLVM_ABI StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
Definition VPlanUtils.h:44
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:330
LLVM_ABI Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Value * createFindLastIVReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind, Value *Start, Value *Sentinel)
Create a reduction of the given vector Src for a reduction of the kind RecurKind::FindLastIV.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
InstructionCost Cost
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2474
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:738
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2233
auto cast_or_null(const Y &Val)
Definition Casting.h:720
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1741
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:337
@ Other
Any other memory.
Definition ModRef.h:68
bool canVectorizeTy(Type *Ty)
Returns true if Ty is a valid vector element type, void, or an unpacked literal struct where all elem...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Mul
Product of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
DWARFExpression::Operation Op
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1899
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI Value * createOrderedReduction(IRBuilderBase &B, RecurKind RdxKind, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence kind RdxKind.
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI Value * createAnyOfReduction(IRBuilderBase &B, Value *Src, Value *InitVal, PHINode *OrigPhi)
Create a reduction of the given vector Src for a reduction of kind RecurKind::AnyOf.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
Definition VPlan.h:1413
PHINode & getIRPhi()
Definition VPlan.h:1421
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:872
std::optional< InstructionCost > getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:873
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:293
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3258
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate a wide load or gather.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isInvariantCond() const
Definition VPlan.h:1756
VPValue * getCond() const
Definition VPlan.h:1752
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3339
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3342
void execute(VPTransformState &State) override
Generate a wide store or scatter.
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3303
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.