LLVM 22.0.0git
PPCTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
16#include "llvm/IR/IntrinsicsPowerPC.h"
21#include <optional>
22
23using namespace llvm;
24
25#define DEBUG_TYPE "ppctti"
26
27static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
28cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);
29
30static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
31cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
32
33static cl::opt<bool>
34EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
35 cl::desc("Enable using coldcc calling conv for cold "
36 "internal functions"));
37
38static cl::opt<bool>
39LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false),
40 cl::desc("Do not add instruction count to lsr cost model"));
41
42// The latency of mtctr is only justified if there are more than 4
43// comparisons that will be removed as a result.
45SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
46 cl::desc("Loops with a constant trip count smaller than "
47 "this value will not use the count register."));
48
49//===----------------------------------------------------------------------===//
50//
51// PPC cost model.
52//
53//===----------------------------------------------------------------------===//
54
56PPCTTIImpl::getPopcntSupport(unsigned TyWidth) const {
57 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
58 if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64)
61 return TTI::PSK_Software;
62}
63
64std::optional<Instruction *>
66 Intrinsic::ID IID = II.getIntrinsicID();
67 switch (IID) {
68 default:
69 break;
70 case Intrinsic::ppc_altivec_lvx:
71 case Intrinsic::ppc_altivec_lvxl:
72 // Turn PPC lvx -> load if the pointer is known aligned.
74 II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
75 &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
76 Value *Ptr = II.getArgOperand(0);
77 return new LoadInst(II.getType(), Ptr, "", false, Align(16));
78 }
79 break;
80 case Intrinsic::ppc_vsx_lxvw4x:
81 case Intrinsic::ppc_vsx_lxvd2x: {
82 // Turn PPC VSX loads into normal loads.
83 Value *Ptr = II.getArgOperand(0);
84 return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
85 }
86 case Intrinsic::ppc_altivec_stvx:
87 case Intrinsic::ppc_altivec_stvxl:
88 // Turn stvx -> store if the pointer is known aligned.
90 II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
91 &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
92 Value *Ptr = II.getArgOperand(1);
93 return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
94 }
95 break;
96 case Intrinsic::ppc_vsx_stxvw4x:
97 case Intrinsic::ppc_vsx_stxvd2x: {
98 // Turn PPC VSX stores into normal stores.
99 Value *Ptr = II.getArgOperand(1);
100 return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
101 }
102 case Intrinsic::ppc_altivec_vperm:
103 // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
104 // Note that ppc_altivec_vperm has a big-endian bias, so when creating
105 // a vectorshuffle for little endian, we must undo the transformation
106 // performed on vec_perm in altivec.h. That is, we must complement
107 // the permutation mask with respect to 31 and reverse the order of
108 // V1 and V2.
109 if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {
110 assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&
111 "Bad type for intrinsic!");
112
113 // Check that all of the elements are integer constants or undefs.
114 bool AllEltsOk = true;
115 for (unsigned I = 0; I != 16; ++I) {
116 Constant *Elt = Mask->getAggregateElement(I);
117 if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
118 AllEltsOk = false;
119 break;
120 }
121 }
122
123 if (AllEltsOk) {
124 // Cast the input vectors to byte vectors.
125 Value *Op0 =
126 IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());
127 Value *Op1 =
128 IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());
129 Value *Result = PoisonValue::get(Op0->getType());
130
131 // Only extract each element once.
132 Value *ExtractedElts[32];
133 memset(ExtractedElts, 0, sizeof(ExtractedElts));
134
135 for (unsigned I = 0; I != 16; ++I) {
136 if (isa<UndefValue>(Mask->getAggregateElement(I)))
137 continue;
138 unsigned Idx =
139 cast<ConstantInt>(Mask->getAggregateElement(I))->getZExtValue();
140 Idx &= 31; // Match the hardware behavior.
141 if (DL.isLittleEndian())
142 Idx = 31 - Idx;
143
144 if (!ExtractedElts[Idx]) {
145 Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
146 Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
147 ExtractedElts[Idx] = IC.Builder.CreateExtractElement(
148 Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));
149 }
150
151 // Insert this value into the result vector.
152 Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],
153 IC.Builder.getInt32(I));
154 }
155 return CastInst::Create(Instruction::BitCast, Result, II.getType());
156 }
157 }
158 break;
159 }
160 return std::nullopt;
161}
162
166 return BaseT::getIntImmCost(Imm, Ty, CostKind);
167
168 assert(Ty->isIntegerTy());
169
170 unsigned BitSize = Ty->getPrimitiveSizeInBits();
171 if (BitSize == 0)
172 return ~0U;
173
174 if (Imm == 0)
175 return TTI::TCC_Free;
176
177 if (Imm.getBitWidth() <= 64) {
178 if (isInt<16>(Imm.getSExtValue()))
179 return TTI::TCC_Basic;
180
181 if (isInt<32>(Imm.getSExtValue())) {
182 // A constant that can be materialized using lis.
183 if ((Imm.getZExtValue() & 0xFFFF) == 0)
184 return TTI::TCC_Basic;
185
186 return 2 * TTI::TCC_Basic;
187 }
188 }
189
190 return 4 * TTI::TCC_Basic;
191}
192
195 const APInt &Imm, Type *Ty,
198 return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
199
200 assert(Ty->isIntegerTy());
201
202 unsigned BitSize = Ty->getPrimitiveSizeInBits();
203 if (BitSize == 0)
204 return ~0U;
205
206 switch (IID) {
207 default:
208 return TTI::TCC_Free;
209 case Intrinsic::sadd_with_overflow:
210 case Intrinsic::uadd_with_overflow:
211 case Intrinsic::ssub_with_overflow:
212 case Intrinsic::usub_with_overflow:
213 if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
214 return TTI::TCC_Free;
215 break;
216 case Intrinsic::experimental_stackmap:
217 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
218 return TTI::TCC_Free;
219 break;
220 case Intrinsic::experimental_patchpoint_void:
221 case Intrinsic::experimental_patchpoint:
222 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
223 return TTI::TCC_Free;
224 break;
225 }
226 return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
227}
228
230 const APInt &Imm, Type *Ty,
232 Instruction *Inst) const {
234 return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
235
236 assert(Ty->isIntegerTy());
237
238 unsigned BitSize = Ty->getPrimitiveSizeInBits();
239 if (BitSize == 0)
240 return ~0U;
241
242 unsigned ImmIdx = ~0U;
243 bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
244 ZeroFree = false;
245 switch (Opcode) {
246 default:
247 return TTI::TCC_Free;
248 case Instruction::GetElementPtr:
249 // Always hoist the base address of a GetElementPtr. This prevents the
250 // creation of new constants for every base constant that gets constant
251 // folded with the offset.
252 if (Idx == 0)
253 return 2 * TTI::TCC_Basic;
254 return TTI::TCC_Free;
255 case Instruction::And:
256 RunFree = true; // (for the rotate-and-mask instructions)
257 [[fallthrough]];
258 case Instruction::Add:
259 case Instruction::Or:
260 case Instruction::Xor:
261 ShiftedFree = true;
262 [[fallthrough]];
263 case Instruction::Sub:
264 case Instruction::Mul:
265 case Instruction::Shl:
266 case Instruction::LShr:
267 case Instruction::AShr:
268 ImmIdx = 1;
269 break;
270 case Instruction::ICmp:
271 UnsignedFree = true;
272 ImmIdx = 1;
273 // Zero comparisons can use record-form instructions.
274 [[fallthrough]];
275 case Instruction::Select:
276 ZeroFree = true;
277 break;
278 case Instruction::PHI:
279 case Instruction::Call:
280 case Instruction::Ret:
281 case Instruction::Load:
282 case Instruction::Store:
283 break;
284 }
285
286 if (ZeroFree && Imm == 0)
287 return TTI::TCC_Free;
288
289 if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
290 if (isInt<16>(Imm.getSExtValue()))
291 return TTI::TCC_Free;
292
293 if (RunFree) {
294 if (Imm.getBitWidth() <= 32 &&
295 (isShiftedMask_32(Imm.getZExtValue()) ||
296 isShiftedMask_32(~Imm.getZExtValue())))
297 return TTI::TCC_Free;
298
299 if (ST->isPPC64() &&
300 (isShiftedMask_64(Imm.getZExtValue()) ||
301 isShiftedMask_64(~Imm.getZExtValue())))
302 return TTI::TCC_Free;
303 }
304
305 if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
306 return TTI::TCC_Free;
307
308 if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
309 return TTI::TCC_Free;
310 }
311
312 return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
313}
314
315// Check if the current Type is an MMA vector type. Valid MMA types are
316// v256i1 and v512i1 respectively.
317static bool isMMAType(Type *Ty) {
318 return Ty->isVectorTy() && (Ty->getScalarSizeInBits() == 1) &&
319 (Ty->getPrimitiveSizeInBits() > 128);
320}
321
325 // We already implement getCastInstrCost and getMemoryOpCost where we perform
326 // the vector adjustment there.
327 if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
329
330 if (U->getType()->isVectorTy()) {
331 // Instructions that need to be split should cost more.
332 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(U->getType());
333 return LT.first * BaseT::getInstructionCost(U, Operands, CostKind);
334 }
335
337}
338
340 AssumptionCache &AC,
341 TargetLibraryInfo *LibInfo,
342 HardwareLoopInfo &HWLoopInfo) const {
343 const PPCTargetMachine &TM = ST->getTargetMachine();
344 TargetSchedModel SchedModel;
345 SchedModel.init(ST);
346
347 // FIXME: Sure there is no other way to get TTI? This should be cheap though.
349 TM.getTargetTransformInfo(*L->getHeader()->getParent());
350
351 // Do not convert small short loops to CTR loop.
352 unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
353 if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
355 CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
357 for (BasicBlock *BB : L->blocks())
358 Metrics.analyzeBasicBlock(BB, TTI, EphValues);
359 // 6 is an approximate latency for the mtctr instruction.
360 if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
361 return false;
362 }
363
364 // Check that there is no hardware loop related intrinsics in the loop.
365 for (auto *BB : L->getBlocks())
366 for (auto &I : *BB)
367 if (auto *Call = dyn_cast<IntrinsicInst>(&I))
368 if (Call->getIntrinsicID() == Intrinsic::set_loop_iterations ||
369 Call->getIntrinsicID() == Intrinsic::loop_decrement)
370 return false;
371
372 SmallVector<BasicBlock*, 4> ExitingBlocks;
373 L->getExitingBlocks(ExitingBlocks);
374
375 // If there is an exit edge known to be frequently taken,
376 // we should not transform this loop.
377 for (auto &BB : ExitingBlocks) {
378 Instruction *TI = BB->getTerminator();
379 if (!TI) continue;
380
381 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
382 uint64_t TrueWeight = 0, FalseWeight = 0;
383 if (!BI->isConditional() ||
384 !extractBranchWeights(*BI, TrueWeight, FalseWeight))
385 continue;
386
387 // If the exit path is more frequent than the loop path,
388 // we return here without further analysis for this loop.
389 bool TrueIsExit = !L->contains(BI->getSuccessor(0));
390 if (( TrueIsExit && FalseWeight < TrueWeight) ||
391 (!TrueIsExit && FalseWeight > TrueWeight))
392 return false;
393 }
394 }
395
396 LLVMContext &C = L->getHeader()->getContext();
397 HWLoopInfo.CountType = TM.isPPC64() ?
399 HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
400 return true;
401}
402
405 OptimizationRemarkEmitter *ORE) const {
406 if (ST->getCPUDirective() == PPC::DIR_A2) {
407 // The A2 is in-order with a deep pipeline, and concatenation unrolling
408 // helps expose latency-hiding opportunities to the instruction scheduler.
409 UP.Partial = UP.Runtime = true;
410
411 // We unroll a lot on the A2 (hundreds of instructions), and the benefits
412 // often outweigh the cost of a division to compute the trip count.
413 UP.AllowExpensiveTripCount = true;
414 }
415
416 BaseT::getUnrollingPreferences(L, SE, UP, ORE);
417}
418
420 TTI::PeelingPreferences &PP) const {
422}
423// This function returns true to allow using coldcc calling convention.
424// Returning true results in coldcc being used for functions which are cold at
425// all call sites when the callers of the functions are not calling any other
426// non coldcc functions.
428 return EnablePPCColdCC;
429}
430
431bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) const {
432 // On the A2, always unroll aggressively.
433 if (ST->getCPUDirective() == PPC::DIR_A2)
434 return true;
435
436 return LoopHasReductions;
437}
438
440PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
442 Options.LoadSizes = {8, 4, 2, 1};
443 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
444 return Options;
445}
446
448
449unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
450 assert(ClassID == GPRRC || ClassID == FPRRC ||
451 ClassID == VRRC || ClassID == VSXRC);
452 if (ST->hasVSX()) {
453 assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);
454 return ClassID == VSXRC ? 64 : 32;
455 }
456 assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
457 return 32;
458}
459
461 if (Vector)
462 return ST->hasVSX() ? VSXRC : VRRC;
463 if (Ty &&
464 (Ty->getScalarType()->isFloatTy() || Ty->getScalarType()->isDoubleTy()))
465 return ST->hasVSX() ? VSXRC : FPRRC;
466 if (Ty && (Ty->getScalarType()->isFP128Ty() ||
468 return VRRC;
469 if (Ty && Ty->getScalarType()->isHalfTy())
470 return VSXRC;
471 return GPRRC;
472}
473
474const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
475
476 switch (ClassID) {
477 default:
478 llvm_unreachable("unknown register class");
479 return "PPC::unknown register class";
480 case GPRRC: return "PPC::GPRRC";
481 case FPRRC: return "PPC::FPRRC";
482 case VRRC: return "PPC::VRRC";
483 case VSXRC: return "PPC::VSXRC";
484 }
485}
486
489 switch (K) {
491 return TypeSize::getFixed(ST->isPPC64() ? 64 : 32);
493 return TypeSize::getFixed(ST->hasAltivec() ? 128 : 0);
495 return TypeSize::getScalable(0);
496 }
497
498 llvm_unreachable("Unsupported register kind");
499}
500
502 // Starting with P7 we have a cache line size of 128.
503 unsigned Directive = ST->getCPUDirective();
504 // Assume that Future CPU has the same cache line size as the others.
508 return 128;
509
510 // On other processors return a default of 64 bytes.
511 return 64;
512}
513
515 return 300;
516}
517
519 unsigned Directive = ST->getCPUDirective();
520 // The 440 has no SIMD support, but floating-point instructions
521 // have a 5-cycle latency, so unroll by 5x for latency hiding.
522 if (Directive == PPC::DIR_440)
523 return 5;
524
525 // The A2 has no SIMD support, but floating-point instructions
526 // have a 6-cycle latency, so unroll by 6x for latency hiding.
527 if (Directive == PPC::DIR_A2)
528 return 6;
529
530 // FIXME: For lack of any better information, do no harm...
532 return 1;
533
534 // For P7 and P8, floating-point instructions have a 6-cycle latency and
535 // there are two execution units, so unroll by 12x for latency hiding.
536 // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
537 // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready
538 // Assume that future is the same as the others.
542 return 12;
543
544 // For most things, modern systems have two execution units (and
545 // out-of-order execution).
546 return 2;
547}
548
549// Returns a cost adjustment factor to adjust the cost of vector instructions
550// on targets which there is overlap between the vector and scalar units,
551// thereby reducing the overall throughput of vector code wrt. scalar code.
552// An invalid instruction cost is returned if the type is an MMA vector type.
554 Type *Ty1,
555 Type *Ty2) const {
556 // If the vector type is of an MMA type (v256i1, v512i1), an invalid
557 // instruction cost is returned. This is to signify to other cost computing
558 // functions to return the maximum instruction cost in order to prevent any
559 // opportunities for the optimizer to produce MMA types within the IR.
560 if (isMMAType(Ty1))
562
563 if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
564 return InstructionCost(1);
565
566 std::pair<InstructionCost, MVT> LT1 = getTypeLegalizationCost(Ty1);
567 // If type legalization involves splitting the vector, we don't want to
568 // double the cost at every step - only the last step.
569 if (LT1.first != 1 || !LT1.second.isVector())
570 return InstructionCost(1);
571
572 int ISD = TLI->InstructionOpcodeToISD(Opcode);
573 if (TLI->isOperationExpand(ISD, LT1.second))
574 return InstructionCost(1);
575
576 if (Ty2) {
577 std::pair<InstructionCost, MVT> LT2 = getTypeLegalizationCost(Ty2);
578 if (LT2.first != 1 || !LT2.second.isVector())
579 return InstructionCost(1);
580 }
581
582 return InstructionCost(2);
583}
584
586 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
588 ArrayRef<const Value *> Args, const Instruction *CxtI) const {
589 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
590
591 InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Ty, nullptr);
592 if (!CostFactor.isValid())
594
595 // TODO: Handle more cost kinds.
597 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
598 Op2Info, Args, CxtI);
599
600 // Fallback to the default implementation.
602 Opcode, Ty, CostKind, Op1Info, Op2Info);
603 return Cost * CostFactor;
604}
605
607 VectorType *DstTy, VectorType *SrcTy,
608 ArrayRef<int> Mask,
610 int Index, VectorType *SubTp,
612 const Instruction *CxtI) const {
613
614 InstructionCost CostFactor =
615 vectorCostAdjustmentFactor(Instruction::ShuffleVector, SrcTy, nullptr);
616 if (!CostFactor.isValid())
618
619 // Legalize the type.
620 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcTy);
621
622 // PPC, for both Altivec/VSX, support cheap arbitrary permutations
623 // (at least in the sense that there need only be one non-loop-invariant
624 // instruction). We need one such shuffle instruction for each actual
625 // register (this is not true for arbitrary shuffles, but is true for the
626 // structured types of shuffles covered by TTI::ShuffleKind).
627 return LT.first * CostFactor;
628}
629
632 const Instruction *I) const {
634 return Opcode == Instruction::PHI ? 0 : 1;
635 // Branches are assumed to be predicted.
636 return 0;
637}
638
640 Type *Src,
643 const Instruction *I) const {
644 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
645
646 InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Dst, Src);
647 if (!CostFactor.isValid())
649
651 BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
652 Cost *= CostFactor;
653 // TODO: Allow non-throughput costs that aren't binary.
655 return Cost == 0 ? 0 : 1;
656 return Cost;
657}
658
660 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
662 TTI::OperandValueInfo Op2Info, const Instruction *I) const {
663 InstructionCost CostFactor =
664 vectorCostAdjustmentFactor(Opcode, ValTy, nullptr);
665 if (!CostFactor.isValid())
667
669 Opcode, ValTy, CondTy, VecPred, CostKind, Op1Info, Op2Info, I);
670 // TODO: Handle other cost kinds.
672 return Cost;
673 return Cost * CostFactor;
674}
675
678 unsigned Index, const Value *Op0,
679 const Value *Op1) const {
680 assert(Val->isVectorTy() && "This must be a vector type");
681
682 int ISD = TLI->InstructionOpcodeToISD(Opcode);
683 assert(ISD && "Invalid opcode");
684
685 InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Val, nullptr);
686 if (!CostFactor.isValid())
688
690 BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
691 Cost *= CostFactor;
692
693 if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
694 // Double-precision scalars are already located in index #0 (or #1 if LE).
695 if (ISD == ISD::EXTRACT_VECTOR_ELT &&
696 Index == (ST->isLittleEndian() ? 1 : 0))
697 return 0;
698
699 return Cost;
700 }
701 if (Val->getScalarType()->isIntegerTy()) {
702 unsigned EltSize = Val->getScalarSizeInBits();
703 // Computing on 1 bit values requires extra mask or compare operations.
704 unsigned MaskCostForOneBitSize = (VecMaskCost && EltSize == 1) ? 1 : 0;
705 // Computing on non const index requires extra mask or compare operations.
706 unsigned MaskCostForIdx = (Index != -1U) ? 0 : 1;
707 if (ST->hasP9Altivec()) {
708 // P10 has vxform insert which can handle non const index. The
709 // MaskCostForIdx is for masking the index.
710 // P9 has insert for const index. A move-to VSR and a permute/insert.
711 // Assume vector operation cost for both (cost will be 2x on P9).
712 if (ISD == ISD::INSERT_VECTOR_ELT) {
713 if (ST->hasP10Vector())
714 return CostFactor + MaskCostForIdx;
715 if (Index != -1U)
716 return 2 * CostFactor;
717 } else if (ISD == ISD::EXTRACT_VECTOR_ELT) {
718 // It's an extract. Maybe we can do a cheap move-from VSR.
719 unsigned EltSize = Val->getScalarSizeInBits();
720 // P9 has both mfvsrd and mfvsrld for 64 bit integer.
721 if (EltSize == 64 && Index != -1U)
722 return 1;
723 if (EltSize == 32) {
724 unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
725 if (Index == MfvsrwzIndex)
726 return 1;
727
728 // For other indexs like non const, P9 has vxform extract. The
729 // MaskCostForIdx is for masking the index.
730 return CostFactor + MaskCostForIdx;
731 }
732
733 // We need a vector extract (or mfvsrld). Assume vector operation cost.
734 // The cost of the load constant for a vector extract is disregarded
735 // (invariant, easily schedulable).
736 return CostFactor + MaskCostForOneBitSize + MaskCostForIdx;
737 }
738 } else if (ST->hasDirectMove() && Index != -1U) {
739 // Assume permute has standard cost.
740 // Assume move-to/move-from VSR have 2x standard cost.
741 if (ISD == ISD::INSERT_VECTOR_ELT)
742 return 3;
743 return 3 + MaskCostForOneBitSize;
744 }
745 }
746
747 // Estimated cost of a load-hit-store delay. This was obtained
748 // experimentally as a minimum needed to prevent unprofitable
749 // vectorization for the paq8p benchmark. It may need to be
750 // raised further if other unprofitable cases remain.
751 unsigned LHSPenalty = 2;
752 if (ISD == ISD::INSERT_VECTOR_ELT)
753 LHSPenalty += 7;
754
755 // Vector element insert/extract with Altivec is very expensive,
756 // because they require store and reload with the attendant
757 // processor stall for load-hit-store. Until VSX is available,
758 // these need to be estimated as very costly.
759 if (ISD == ISD::EXTRACT_VECTOR_ELT ||
761 return LHSPenalty + Cost;
762
763 return Cost;
764}
765
767 Align Alignment,
768 unsigned AddressSpace,
771 const Instruction *I) const {
772
773 InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Src, nullptr);
774 if (!CostFactor.isValid())
776
777 if (TLI->getValueType(DL, Src, true) == MVT::Other)
778 return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
779 CostKind);
780 // Legalize the type.
781 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
782 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
783 "Invalid Opcode");
784
786 BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
787 // TODO: Handle other cost kinds.
789 return Cost;
790
791 Cost *= CostFactor;
792
793 bool IsAltivecType = ST->hasAltivec() &&
794 (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
795 LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
796 bool IsVSXType = ST->hasVSX() &&
797 (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
798
799 // VSX has 32b/64b load instructions. Legalization can handle loading of
800 // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
801 // PPCTargetLowering can't compute the cost appropriately. So here we
802 // explicitly check this case. There are also corresponding store
803 // instructions.
804 unsigned MemBits = Src->getPrimitiveSizeInBits();
805 unsigned SrcBytes = LT.second.getStoreSize();
806 if (ST->hasVSX() && IsAltivecType) {
807 if (MemBits == 64 || (ST->hasP8Vector() && MemBits == 32))
808 return 1;
809
810 // Use lfiwax/xxspltw
811 if (Opcode == Instruction::Load && MemBits == 32 && Alignment < SrcBytes)
812 return 2;
813 }
814
815 // Aligned loads and stores are easy.
816 if (!SrcBytes || Alignment >= SrcBytes)
817 return Cost;
818
819 // If we can use the permutation-based load sequence, then this is also
820 // relatively cheap (not counting loop-invariant instructions): one load plus
821 // one permute (the last load in a series has extra cost, but we're
822 // neglecting that here). Note that on the P7, we could do unaligned loads
823 // for Altivec types using the VSX instructions, but that's more expensive
824 // than using the permutation-based load sequence. On the P8, that's no
825 // longer true.
826 if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
827 Alignment >= LT.second.getScalarType().getStoreSize())
828 return Cost + LT.first; // Add the cost of the permutations.
829
830 // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
831 // P7, unaligned vector loads are more expensive than the permutation-based
832 // load sequence, so that might be used instead, but regardless, the net cost
833 // is about the same (not counting loop-invariant instructions).
834 if (IsVSXType || (ST->hasVSX() && IsAltivecType))
835 return Cost;
836
837 // Newer PPC supports unaligned memory access.
838 if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))
839 return Cost;
840
841 // PPC in general does not support unaligned loads and stores. They'll need
842 // to be decomposed based on the alignment factor.
843
844 // Add the cost of each scalar load or store.
845 Cost += LT.first * ((SrcBytes / Alignment.value()) - 1);
846
847 // For a vector type, there is also scalarization overhead (only for
848 // stores, loads are expanded using the vector-load + permutation sequence,
849 // which is much less expensive).
850 if (Src->isVectorTy() && Opcode == Instruction::Store)
851 for (int I = 0, E = cast<FixedVectorType>(Src)->getNumElements(); I < E;
852 ++I)
853 Cost += getVectorInstrCost(Instruction::ExtractElement, Src, CostKind, I,
854 nullptr, nullptr);
855
856 return Cost;
857}
858
860 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
861 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
862 bool UseMaskForCond, bool UseMaskForGaps) const {
863 InstructionCost CostFactor =
864 vectorCostAdjustmentFactor(Opcode, VecTy, nullptr);
865 if (!CostFactor.isValid())
867
868 if (UseMaskForCond || UseMaskForGaps)
869 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
870 Alignment, AddressSpace, CostKind,
871 UseMaskForCond, UseMaskForGaps);
872
873 assert(isa<VectorType>(VecTy) &&
874 "Expect a vector type for interleaved memory op");
875
876 // Legalize the type.
877 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VecTy);
878
879 // Firstly, the cost of load/store operation.
881 getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, CostKind);
882
883 // PPC, for both Altivec/VSX, support cheap arbitrary permutations
884 // (at least in the sense that there need only be one non-loop-invariant
885 // instruction). For each result vector, we need one shuffle per incoming
886 // vector (except that the first shuffle can take two incoming vectors
887 // because it does not need to take itself).
888 Cost += Factor*(LT.first-1);
889
890 return Cost;
891}
892
897}
898
900 const Function *Callee) const {
901 const TargetMachine &TM = getTLI()->getTargetMachine();
902
903 const FeatureBitset &CallerBits =
904 TM.getSubtargetImpl(*Caller)->getFeatureBits();
905 const FeatureBitset &CalleeBits =
906 TM.getSubtargetImpl(*Callee)->getFeatureBits();
907
908 // Check that targets features are exactly the same. We can revisit to see if
909 // we can improve this.
910 return CallerBits == CalleeBits;
911}
912
914 const Function *Callee,
915 const ArrayRef<Type *> &Types) const {
916
917 // We need to ensure that argument promotion does not
918 // attempt to promote pointers to MMA types (__vector_pair
919 // and __vector_quad) since these types explicitly cannot be
920 // passed as arguments. Both of these types are larger than
921 // the 128-bit Altivec vectors and have a scalar size of 1 bit.
922 if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
923 return false;
924
925 return llvm::none_of(Types, [](Type *Ty) {
926 if (Ty->isSized())
927 return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128;
928 return false;
929 });
930}
931
933 LoopInfo *LI, DominatorTree *DT,
934 AssumptionCache *AC,
935 TargetLibraryInfo *LibInfo) const {
936 // Process nested loops first.
937 for (Loop *I : *L)
938 if (canSaveCmp(I, BI, SE, LI, DT, AC, LibInfo))
939 return false; // Stop search.
940
941 HardwareLoopInfo HWLoopInfo(L);
942
943 if (!HWLoopInfo.canAnalyze(*LI))
944 return false;
945
946 if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
947 return false;
948
949 if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
950 return false;
951
952 *BI = HWLoopInfo.ExitBranch;
953 return true;
954}
955
957 const TargetTransformInfo::LSRCost &C2) const {
958 // PowerPC default behaviour here is "instruction number 1st priority".
959 // If LsrNoInsnsCost is set, call default implementation.
960 if (!LsrNoInsnsCost)
961 return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
962 C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
963 std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
964 C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
966}
967
968bool PPCTTIImpl::isNumRegsMajorCostOfLSR() const { return false; }
969
971 const PPCTargetMachine &TM = ST->getTargetMachine();
972 // XCOFF hasn't implemented lowerRelativeReference, disable non-ELF for now.
973 if (!TM.isELFv2ABI())
974 return false;
976}
977
979 MemIntrinsicInfo &Info) const {
980 switch (Inst->getIntrinsicID()) {
981 case Intrinsic::ppc_altivec_lvx:
982 case Intrinsic::ppc_altivec_lvxl:
983 case Intrinsic::ppc_altivec_lvebx:
984 case Intrinsic::ppc_altivec_lvehx:
985 case Intrinsic::ppc_altivec_lvewx:
986 case Intrinsic::ppc_vsx_lxvd2x:
987 case Intrinsic::ppc_vsx_lxvw4x:
988 case Intrinsic::ppc_vsx_lxvd2x_be:
989 case Intrinsic::ppc_vsx_lxvw4x_be:
990 case Intrinsic::ppc_vsx_lxvl:
991 case Intrinsic::ppc_vsx_lxvll:
992 case Intrinsic::ppc_vsx_lxvp: {
993 Info.PtrVal = Inst->getArgOperand(0);
994 Info.ReadMem = true;
995 Info.WriteMem = false;
996 return true;
997 }
998 case Intrinsic::ppc_altivec_stvx:
999 case Intrinsic::ppc_altivec_stvxl:
1000 case Intrinsic::ppc_altivec_stvebx:
1001 case Intrinsic::ppc_altivec_stvehx:
1002 case Intrinsic::ppc_altivec_stvewx:
1003 case Intrinsic::ppc_vsx_stxvd2x:
1004 case Intrinsic::ppc_vsx_stxvw4x:
1005 case Intrinsic::ppc_vsx_stxvd2x_be:
1006 case Intrinsic::ppc_vsx_stxvw4x_be:
1007 case Intrinsic::ppc_vsx_stxvl:
1008 case Intrinsic::ppc_vsx_stxvll:
1009 case Intrinsic::ppc_vsx_stxvp: {
1010 Info.PtrVal = Inst->getArgOperand(1);
1011 Info.ReadMem = false;
1012 Info.WriteMem = true;
1013 return true;
1014 }
1015 case Intrinsic::ppc_stbcx:
1016 case Intrinsic::ppc_sthcx:
1017 case Intrinsic::ppc_stdcx:
1018 case Intrinsic::ppc_stwcx: {
1019 Info.PtrVal = Inst->getArgOperand(0);
1020 Info.ReadMem = false;
1021 Info.WriteMem = true;
1022 return true;
1023 }
1024 default:
1025 break;
1026 }
1027
1028 return false;
1029}
1030
1032 return TLI->supportsTailCallFor(CB);
1033}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
Machine Trace Metrics
uint64_t IntrinsicInst * II
static cl::opt< bool > VecMaskCost("ppc-vec-mask-cost", cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden)
static cl::opt< bool > DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))
static bool isMMAType(Type *Ty)
static cl::opt< bool > EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), cl::desc("Enable using coldcc calling conv for cold " "internal functions"))
static cl::opt< bool > LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false), cl::desc("Do not add instruction count to lsr cost model"))
This file a TargetTransformInfoImplBase conforming object specific to the PPC target machine.
if(PassOpts->AAPipeline)
This file contains the declarations for profiling metadata utility functions.
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool shouldBuildRelLookupTables() const override
Definition: BasicTTIImpl.h:628
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition: BasicTTIImpl.h:702
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition: BasicTTIImpl.h:774
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: BasicTTIImpl.h:997
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1292
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:678
This is an important base class in LLVM.
Definition: Constant.h:43
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:198
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
Container class for subtarget features.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2571
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2559
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:522
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2204
The core instruction combiner logic.
Definition: InstCombiner.h:48
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:337
DominatorTree & getDominatorTree() const
Definition: InstCombiner.h:336
BuilderTy & Builder
Definition: InstCombiner.h:61
AssumptionCache & getAssumptionCache() const
Definition: InstCombiner.h:334
static InstructionCost getInvalid(CostType Val=0)
static InstructionCost getMax()
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:49
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:56
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
An instruction for reading from memory.
Definition: Instructions.h:180
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:40
The optimization diagnostic interface.
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:139
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:211
bool isLittleEndian() const
Definition: PPCSubtarget.h:186
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:160
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1, Type *Ty2) const
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool enableInterleavedAccessVectorization() const override
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const override
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
unsigned getCacheLineSize() const override
bool useColdCCForColdCall(Function &F) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isNumRegsMajorCostOfLSR() const override
unsigned getPrefetchDistance() const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
const char * getRegisterClassName(unsigned ClassID) const override
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
bool shouldBuildRelLookupTables() const override
bool supportsTailCallFor(const CallBase *CB) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool enableAggressiveInterleaving(bool LoopHasReductions) const override
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
bool supportsTailCallFor(const CallBase *CB) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
Common code between 32-bit and 64-bit PowerPC targets.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1885
The main scalar evolution driver.
LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:83
Provide an instruction scheduling machine model to CodeGen passes.
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const
virtual bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
CastContextHint
Represents a hint about the context in which a cast is used.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:349
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:246
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
Definition: Type.h:165
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isFP128Ty() const
Return true if this is 'fp128'.
Definition: Type.h:162
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:311
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:240
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
Base class of all SIMD vector types.
Definition: DerivedTypes.h:430
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:563
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:552
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:65
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:276
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:282
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1566
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
InstructionCost Cost
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:34
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
Attributes of a target dependent hardware loop.
LLVM_ABI bool canAnalyze(LoopInfo &LI)
LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
Parameters that control the generic loop unrolling transformation.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...