24#define DEBUG_TYPE "riscvtti"
27 "riscv-v-register-bit-width-lmul",
29 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
30 "by autovectorized code. Fractional LMULs are not supported."),
36 "Overrides result used for getMaximumVF query which is used "
37 "exclusively by SLP vectorizer."),
42 cl::desc(
"Set the lower bound of a trip count to decide on "
43 "vectorization while tail-folding."),
52 size_t NumInstr = OpCodes.
size();
57 return LMULCost * NumInstr;
59 for (
auto Op : OpCodes) {
61 case RISCV::VRGATHER_VI:
64 case RISCV::VRGATHER_VV:
67 case RISCV::VSLIDEUP_VI:
68 case RISCV::VSLIDEDOWN_VI:
71 case RISCV::VSLIDEUP_VX:
72 case RISCV::VSLIDEDOWN_VX:
75 case RISCV::VREDMAX_VS:
76 case RISCV::VREDMIN_VS:
77 case RISCV::VREDMAXU_VS:
78 case RISCV::VREDMINU_VS:
79 case RISCV::VREDSUM_VS:
80 case RISCV::VREDAND_VS:
81 case RISCV::VREDOR_VS:
82 case RISCV::VREDXOR_VS:
83 case RISCV::VFREDMAX_VS:
84 case RISCV::VFREDMIN_VS:
85 case RISCV::VFREDUSUM_VS: {
92 case RISCV::VFREDOSUM_VS: {
101 case RISCV::VFMV_F_S:
102 case RISCV::VFMV_S_F:
104 case RISCV::VMXOR_MM:
105 case RISCV::VMAND_MM:
106 case RISCV::VMANDN_MM:
107 case RISCV::VMNAND_MM:
109 case RISCV::VFIRST_M:
125 "getIntImmCost can only estimate cost of materialising integers");
147 auto *BO = dyn_cast<BinaryOperator>(Inst->
getOperand(0));
148 if (!BO || !BO->hasOneUse())
151 if (BO->getOpcode() != Instruction::Shl)
154 if (!isa<ConstantInt>(BO->getOperand(1)))
157 unsigned ShAmt = cast<ConstantInt>(BO->getOperand(1))->getZExtValue();
162 if (ShAmt == Trailing)
174 "getIntImmCost can only estimate cost of materialising integers");
182 bool Takes12BitImm =
false;
183 unsigned ImmArgIdx = ~0U;
186 case Instruction::GetElementPtr:
191 case Instruction::Store: {
196 if (
Idx == 1 || !Inst)
201 if (!getTLI()->allowsMemoryAccessForAlignment(
203 ST->getPointerAddressSpace(), ST->getAlign()))
209 case Instruction::Load:
212 case Instruction::And:
214 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
217 if (Imm == UINT64_C(0xffffffff) &&
218 ((ST->hasStdExtZba() && ST->isRV64()) || ST->isRV32()))
221 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
223 if (Inst &&
Idx == 1 && Imm.getBitWidth() <= ST->
getXLen() &&
226 Takes12BitImm =
true;
228 case Instruction::Add:
229 Takes12BitImm =
true;
231 case Instruction::Or:
232 case Instruction::Xor:
234 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
236 Takes12BitImm =
true;
238 case Instruction::Mul:
240 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
243 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
246 Takes12BitImm =
true;
248 case Instruction::Sub:
249 case Instruction::Shl:
250 case Instruction::LShr:
251 case Instruction::AShr:
252 Takes12BitImm =
true;
263 if (Imm.getSignificantBits() <= 64 &&
292 return ST->hasStdExtZbb() || (ST->hasVendorXCVbitmanip() && !ST->
is64Bit())
298 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
305 if (!ST->hasStdExtZvqdotq() || ST->
getELen() < 64 ||
306 Opcode != Instruction::Add || !BinOp || *BinOp != Instruction::Mul ||
307 InputTypeA != InputTypeB || !InputTypeA->
isIntegerTy(8) ||
315 getRISCVInstructionCost(RISCV::VQDOT_VV, LT.second,
CostKind);
322 switch (
II->getIntrinsicID()) {
326 case Intrinsic::vector_reduce_mul:
327 case Intrinsic::vector_reduce_fmul:
368RISCVTTIImpl::getConstantPoolLoadCost(
Type *Ty,
378 unsigned Size = Mask.size();
381 for (
unsigned I = 0;
I !=
Size; ++
I) {
382 if (
static_cast<unsigned>(Mask[
I]) ==
I)
388 for (
unsigned J =
I + 1; J !=
Size; ++J)
390 if (
static_cast<unsigned>(Mask[J]) != J %
I)
406 return cast<VectorType>(
EVT(IndexVT).getTypeForEVT(
C));
418 "Expected fixed vector type and non-empty mask");
421 unsigned NumOfDests =
divideCeil(Mask.size(), LegalNumElts);
425 if (NumOfDests <= 1 ||
427 Tp->getElementType()->getPrimitiveSizeInBits() ||
428 LegalNumElts >= Tp->getElementCount().getFixedValue())
431 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
434 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
438 unsigned NormalizedVF = LegalNumElts * std::max(NumOfSrcs, NumOfDests);
439 unsigned NumOfSrcRegs = NormalizedVF / LegalNumElts;
440 unsigned NumOfDestRegs = NormalizedVF / LegalNumElts;
442 assert(NormalizedVF >= Mask.size() &&
443 "Normalized mask expected to be not shorter than original mask.");
448 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
449 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
452 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
458 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
460 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
464 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
487 if (!VLen || Mask.empty())
491 LegalVT =
TTI.getTypeLegalizationCost(
497 if (NumOfDests <= 1 ||
499 Tp->getElementType()->getPrimitiveSizeInBits() ||
503 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
506 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
512 unsigned NormalizedVF =
517 assert(NormalizedVF >= Mask.size() &&
518 "Normalized mask expected to be not shorter than original mask.");
524 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
525 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
528 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
533 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
535 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
537 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
544 if ((NumOfDestRegs > 2 && NumShuffles <=
static_cast<int>(NumOfDestRegs)) ||
545 (NumOfDestRegs <= 2 && NumShuffles < 4))
560 if (!
LT.second.isFixedLengthVector())
568 auto GetSlideOpcode = [&](
int SlideAmt) {
570 bool IsVI = isUInt<5>(std::abs(SlideAmt));
572 return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX;
573 return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX;
576 std::array<std::pair<int, int>, 2> SrcInfo;
580 if (SrcInfo[1].second == 0)
584 if (SrcInfo[0].second != 0) {
585 unsigned Opcode = GetSlideOpcode(SrcInfo[0].second);
586 FirstSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
589 if (SrcInfo[1].first == -1)
590 return FirstSlideCost;
593 if (SrcInfo[1].second != 0) {
594 unsigned Opcode = GetSlideOpcode(SrcInfo[1].second);
595 SecondSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
598 getRISCVInstructionCost(RISCV::VMERGE_VVM,
LT.second,
CostKind);
605 return FirstSlideCost + SecondSlideCost + MaskCost;
616 "Expected the Mask to match the return size if given");
618 "Expected the same scalar types");
626 if (
auto *FVTp = dyn_cast<FixedVectorType>(SrcTy);
631 if (VRegSplittingCost.
isValid())
632 return VRegSplittingCost;
637 if (Mask.size() >= 2) {
638 MVT EltTp = LT.second.getVectorElementType();
651 if (Mask[0] == 0 || Mask[0] == 1) {
655 if (
equal(DeinterleaveMask, Mask))
656 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
661 if (LT.second.getScalarSizeInBits() != 1 &&
664 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
666 for (
unsigned I = 0;
I != NumSlides; ++
I) {
667 unsigned InsertIndex = SubVectorSize * (1 <<
I);
672 std::pair<InstructionCost, MVT> DestLT =
691 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
692 LT.second.getVectorNumElements() <= 256)) {
697 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
711 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
712 LT.second.getVectorNumElements() <= 256)) {
713 auto &
C = SrcTy->getContext();
714 auto EC = SrcTy->getElementCount();
719 return 2 * IndexCost +
720 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
739 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
767 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
768 if (std::optional<unsigned> VLen = ST->
getRealVLen();
769 VLen && SubLT.second.getScalarSizeInBits() * Index % *VLen == 0 &&
770 SubLT.second.getSizeInBits() <= *VLen)
778 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
785 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
797 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
802 Instruction::InsertElement);
803 if (LT.second.getScalarSizeInBits() == 1) {
811 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
824 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
825 RISCV::VMV_X_S, RISCV::VMV_V_X,
834 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
840 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
846 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
847 if (Index >= 0 && Index < 32)
848 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
849 else if (Index < 0 && Index > -32)
850 Opcodes[1] = RISCV::VSLIDEUP_VI;
851 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
855 if (!LT.second.isVector())
864 cast<VectorType>(SrcTy)->getElementCount());
873 MVT ContainerVT = LT.second;
874 if (LT.second.isFixedLengthVector())
877 if (ContainerVT.
bitsLE(M1VT)) {
887 if (LT.second.isFixedLengthVector())
889 LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
890 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
891 if (LT.second.isFixedLengthVector() &&
892 isInt<5>(LT.second.getVectorNumElements() - 1))
893 Opcodes[1] = RISCV::VRSUB_VI;
895 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
896 return LT.first * (LenCost + GatherCost);
903 unsigned M1Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX};
905 getRISCVInstructionCost(M1Opcodes, M1VT,
CostKind) + 3;
909 getRISCVInstructionCost({RISCV::VRGATHER_VV}, M1VT,
CostKind) * Ratio;
911 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX}, LT.second,
CostKind);
912 return FixedCost + LT.first * (GatherCost + SlideCost);
931 if (isa<ScalableVectorType>(Ty))
939 Ty, DemandedElts, Insert, Extract,
CostKind);
941 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
952 assert(LT.second.isFixedLengthVector());
956 cast<FixedVectorType>(Ty)->getNumElements() *
957 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
980 bool UseMaskForCond,
bool UseMaskForGaps)
const {
986 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
987 auto *VTy = cast<VectorType>(VecTy);
990 if (LT.second.isVector()) {
993 VTy->getElementCount().divideCoefficientBy(Factor));
994 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
1005 return LT.first *
Cost;
1012 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
1013 unsigned NumLoads = getEstimatedVLFor(VTy);
1014 return NumLoads * MemOpCost;
1021 if (isa<ScalableVectorType>(VecTy))
1024 auto *FVTy = cast<FixedVectorType>(VecTy);
1027 unsigned VF = FVTy->getNumElements() / Factor;
1034 if (Opcode == Instruction::Load) {
1036 for (
unsigned Index : Indices) {
1040 Mask.resize(VF * Factor, -1);
1044 Cost += ShuffleCost;
1062 UseMaskForCond, UseMaskForGaps);
1064 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
1071 return MemCost + ShuffleCost;
1075 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1081 if ((Opcode == Instruction::Load &&
1083 (Opcode == Instruction::Store &&
1091 auto &VTy = *cast<VectorType>(DataTy);
1094 {TTI::OK_AnyValue, TTI::OP_None},
I);
1095 unsigned NumLoads = getEstimatedVLFor(&VTy);
1096 return NumLoads * MemOpCost;
1100 unsigned Opcode,
Type *DataTy,
bool VariableMask,
Align Alignment,
1102 bool IsLegal = (Opcode == Instruction::Store &&
1104 (Opcode == Instruction::Load &&
1129 if (Opcode == Instruction::Store)
1130 Opcodes.append({RISCV::VCOMPRESS_VM});
1132 Opcodes.append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
1134 LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1138 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1140 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1142 (Opcode != Instruction::Load && Opcode != Instruction::Store))
1152 auto &VTy = *cast<VectorType>(DataTy);
1155 {TTI::OK_AnyValue, TTI::OP_None},
I);
1156 unsigned NumLoads = getEstimatedVLFor(&VTy);
1157 return NumLoads * MemOpCost;
1167 for (
auto *Ty : Tys) {
1168 if (!Ty->isVectorTy())
1182 {Intrinsic::floor, MVT::f32, 9},
1183 {Intrinsic::floor, MVT::f64, 9},
1184 {Intrinsic::ceil, MVT::f32, 9},
1185 {Intrinsic::ceil, MVT::f64, 9},
1186 {Intrinsic::trunc, MVT::f32, 7},
1187 {Intrinsic::trunc, MVT::f64, 7},
1188 {Intrinsic::round, MVT::f32, 9},
1189 {Intrinsic::round, MVT::f64, 9},
1190 {Intrinsic::roundeven, MVT::f32, 9},
1191 {Intrinsic::roundeven, MVT::f64, 9},
1192 {Intrinsic::rint, MVT::f32, 7},
1193 {Intrinsic::rint, MVT::f64, 7},
1194 {Intrinsic::nearbyint, MVT::f32, 9},
1195 {Intrinsic::nearbyint, MVT::f64, 9},
1196 {Intrinsic::bswap, MVT::i16, 3},
1197 {Intrinsic::bswap, MVT::i32, 12},
1198 {Intrinsic::bswap, MVT::i64, 31},
1199 {Intrinsic::vp_bswap, MVT::i16, 3},
1200 {Intrinsic::vp_bswap, MVT::i32, 12},
1201 {Intrinsic::vp_bswap, MVT::i64, 31},
1202 {Intrinsic::vp_fshl, MVT::i8, 7},
1203 {Intrinsic::vp_fshl, MVT::i16, 7},
1204 {Intrinsic::vp_fshl, MVT::i32, 7},
1205 {Intrinsic::vp_fshl, MVT::i64, 7},
1206 {Intrinsic::vp_fshr, MVT::i8, 7},
1207 {Intrinsic::vp_fshr, MVT::i16, 7},
1208 {Intrinsic::vp_fshr, MVT::i32, 7},
1209 {Intrinsic::vp_fshr, MVT::i64, 7},
1210 {Intrinsic::bitreverse, MVT::i8, 17},
1211 {Intrinsic::bitreverse, MVT::i16, 24},
1212 {Intrinsic::bitreverse, MVT::i32, 33},
1213 {Intrinsic::bitreverse, MVT::i64, 52},
1214 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1215 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1216 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1217 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1218 {Intrinsic::ctpop, MVT::i8, 12},
1219 {Intrinsic::ctpop, MVT::i16, 19},
1220 {Intrinsic::ctpop, MVT::i32, 20},
1221 {Intrinsic::ctpop, MVT::i64, 21},
1222 {Intrinsic::ctlz, MVT::i8, 19},
1223 {Intrinsic::ctlz, MVT::i16, 28},
1224 {Intrinsic::ctlz, MVT::i32, 31},
1225 {Intrinsic::ctlz, MVT::i64, 35},
1226 {Intrinsic::cttz, MVT::i8, 16},
1227 {Intrinsic::cttz, MVT::i16, 23},
1228 {Intrinsic::cttz, MVT::i32, 24},
1229 {Intrinsic::cttz, MVT::i64, 25},
1230 {Intrinsic::vp_ctpop, MVT::i8, 12},
1231 {Intrinsic::vp_ctpop, MVT::i16, 19},
1232 {Intrinsic::vp_ctpop, MVT::i32, 20},
1233 {Intrinsic::vp_ctpop, MVT::i64, 21},
1234 {Intrinsic::vp_ctlz, MVT::i8, 19},
1235 {Intrinsic::vp_ctlz, MVT::i16, 28},
1236 {Intrinsic::vp_ctlz, MVT::i32, 31},
1237 {Intrinsic::vp_ctlz, MVT::i64, 35},
1238 {Intrinsic::vp_cttz, MVT::i8, 16},
1239 {Intrinsic::vp_cttz, MVT::i16, 23},
1240 {Intrinsic::vp_cttz, MVT::i32, 24},
1241 {Intrinsic::vp_cttz, MVT::i64, 25},
1248 switch (ICA.
getID()) {
1249 case Intrinsic::lrint:
1250 case Intrinsic::llrint:
1251 case Intrinsic::lround:
1252 case Intrinsic::llround: {
1260 if (LT.second.getVectorElementType() == MVT::bf16) {
1264 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFCVT_X_F_V};
1266 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVT_X_F_V};
1267 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1272 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFCVT_X_F_V};
1274 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_X_F_V};
1276 }
else if (SrcEltSz > DstEltSz) {
1277 Ops = {RISCV::VFNCVT_X_F_W};
1278 }
else if (SrcEltSz < DstEltSz) {
1279 Ops = {RISCV::VFWCVT_X_F_V};
1281 Ops = {RISCV::VFCVT_X_F_V};
1286 if (SrcEltSz > DstEltSz)
1287 return SrcLT.first *
1288 getRISCVInstructionCost(Ops, SrcLT.second,
CostKind);
1289 return LT.first * getRISCVInstructionCost(Ops, LT.second,
CostKind);
1293 case Intrinsic::ceil:
1294 case Intrinsic::floor:
1295 case Intrinsic::trunc:
1296 case Intrinsic::rint:
1297 case Intrinsic::round:
1298 case Intrinsic::roundeven: {
1302 return LT.first * 8;
1305 case Intrinsic::umin:
1306 case Intrinsic::umax:
1307 case Intrinsic::smin:
1308 case Intrinsic::smax: {
1310 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1315 switch (ICA.
getID()) {
1316 case Intrinsic::umin:
1317 Op = RISCV::VMINU_VV;
1319 case Intrinsic::umax:
1320 Op = RISCV::VMAXU_VV;
1322 case Intrinsic::smin:
1323 Op = RISCV::VMIN_VV;
1325 case Intrinsic::smax:
1326 Op = RISCV::VMAX_VV;
1329 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1333 case Intrinsic::sadd_sat:
1334 case Intrinsic::ssub_sat:
1335 case Intrinsic::uadd_sat:
1336 case Intrinsic::usub_sat: {
1340 switch (ICA.
getID()) {
1341 case Intrinsic::sadd_sat:
1342 Op = RISCV::VSADD_VV;
1344 case Intrinsic::ssub_sat:
1345 Op = RISCV::VSSUBU_VV;
1347 case Intrinsic::uadd_sat:
1348 Op = RISCV::VSADDU_VV;
1350 case Intrinsic::usub_sat:
1351 Op = RISCV::VSSUBU_VV;
1354 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1358 case Intrinsic::fma:
1359 case Intrinsic::fmuladd: {
1364 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second,
CostKind);
1367 case Intrinsic::fabs: {
1375 if (LT.second.getVectorElementType() == MVT::bf16 ||
1376 (LT.second.getVectorElementType() == MVT::f16 &&
1378 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1383 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1387 case Intrinsic::sqrt: {
1392 MVT ConvType = LT.second;
1393 MVT FsqrtType = LT.second;
1396 if (LT.second.getVectorElementType() == MVT::bf16) {
1397 if (LT.second == MVT::nxv32bf16) {
1398 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1399 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1400 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1401 ConvType = MVT::nxv16f16;
1402 FsqrtType = MVT::nxv16f32;
1404 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1405 FsqrtOp = {RISCV::VFSQRT_V};
1408 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1410 if (LT.second == MVT::nxv32f16) {
1411 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1412 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1413 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1414 ConvType = MVT::nxv16f16;
1415 FsqrtType = MVT::nxv16f32;
1417 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1418 FsqrtOp = {RISCV::VFSQRT_V};
1422 FsqrtOp = {RISCV::VFSQRT_V};
1425 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1426 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1430 case Intrinsic::cttz:
1431 case Intrinsic::ctlz:
1432 case Intrinsic::ctpop: {
1434 if (ST->hasStdExtZvbb() && LT.second.isVector()) {
1436 switch (ICA.
getID()) {
1437 case Intrinsic::cttz:
1440 case Intrinsic::ctlz:
1443 case Intrinsic::ctpop:
1444 Op = RISCV::VCPOP_V;
1447 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1451 case Intrinsic::abs: {
1457 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1462 case Intrinsic::get_active_lane_mask: {
1472 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1478 case Intrinsic::stepvector: {
1483 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1485 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1486 return 1 + (LT.first - 1);
1488 case Intrinsic::experimental_cttz_elts: {
1500 cast<ConstantInt>(ICA.
getArgs()[1])->isZero())
1508 case Intrinsic::experimental_vp_splat: {
1513 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
1518 case Intrinsic::experimental_vp_splice: {
1526 case Intrinsic::fptoui_sat:
1527 case Intrinsic::fptosi_sat: {
1529 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1537 if (!SrcLT.first.isValid() || !DstLT.first.isValid())
1547 Type *CondTy =
RetTy->getWithNewBitWidth(1);
1558 LT.second.isVector()) {
1559 MVT EltTy = LT.second.getVectorElementType();
1561 ICA.
getID(), EltTy))
1562 return LT.first * Entry->Cost;
1574 bool IsVectorType = isa<VectorType>(Dst) && isa<VectorType>(Src);
1582 Dst->getScalarSizeInBits() > ST->
getELen())
1586 assert(ISD &&
"Invalid opcode");
1600 if (Src->getScalarSizeInBits() == 1) {
1605 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1606 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1612 if (Dst->getScalarSizeInBits() == 1) {
1618 return SrcLT.first *
1619 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1631 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1632 !SrcLT.first.isValid() || !DstLT.first.isValid() ||
1634 SrcLT.second.getSizeInBits()) ||
1636 DstLT.second.getSizeInBits()))
1640 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1642 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1643 (
int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1647 if ((PowDiff < 1) || (PowDiff > 3))
1649 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1650 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1653 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1659 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1660 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1664 : RISCV::VFNCVT_F_F_W;
1666 for (; SrcEltSize != DstEltSize;) {
1672 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1680 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1682 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1684 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1685 unsigned SrcEltSize = Src->getScalarSizeInBits();
1686 unsigned DstEltSize = Dst->getScalarSizeInBits();
1688 if ((SrcEltSize == 16) &&
1694 cast<VectorType>(Dst)->getElementCount());
1695 std::pair<InstructionCost, MVT> VecF32LT =
1698 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1703 if (DstEltSize == SrcEltSize)
1704 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1705 else if (DstEltSize > SrcEltSize)
1706 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1712 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1713 if ((SrcEltSize / 2) > DstEltSize) {
1724 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1725 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1726 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1727 unsigned SrcEltSize = Src->getScalarSizeInBits();
1728 unsigned DstEltSize = Dst->getScalarSizeInBits();
1731 if ((DstEltSize == 16) &&
1737 cast<VectorType>(Dst)->getElementCount());
1738 std::pair<InstructionCost, MVT> VecF32LT =
1741 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1746 if (DstEltSize == SrcEltSize)
1747 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1748 else if (DstEltSize > SrcEltSize) {
1749 if ((DstEltSize / 2) > SrcEltSize) {
1752 cast<VectorType>(Dst)->getElementCount());
1753 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1756 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1758 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
1765unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty)
const {
1766 if (isa<ScalableVectorType>(Ty)) {
1772 return cast<FixedVectorType>(Ty)->getNumElements();
1791 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1797 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1801 case Intrinsic::maximum:
1803 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1805 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1820 case Intrinsic::minimum:
1822 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1824 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1839 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1848 case Intrinsic::smax:
1849 SplitOp = RISCV::VMAX_VV;
1850 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1852 case Intrinsic::smin:
1853 SplitOp = RISCV::VMIN_VV;
1854 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1856 case Intrinsic::umax:
1857 SplitOp = RISCV::VMAXU_VV;
1858 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1860 case Intrinsic::umin:
1861 SplitOp = RISCV::VMINU_VV;
1862 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1864 case Intrinsic::maxnum:
1865 SplitOp = RISCV::VFMAX_VV;
1866 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1868 case Intrinsic::minnum:
1869 SplitOp = RISCV::VFMIN_VV;
1870 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1875 (LT.first > 1) ? (LT.first - 1) *
1876 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1878 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1883 std::optional<FastMathFlags> FMF,
1893 assert(ISD &&
"Invalid opcode");
1905 if (LT.second == MVT::v1i1)
1906 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
1924 return ((LT.first > 2) ? (LT.first - 2) : 0) *
1925 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
1926 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
1927 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1936 return (LT.first - 1) *
1937 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
1938 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
1946 return (LT.first - 1) *
1947 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
1948 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1961 SplitOp = RISCV::VADD_VV;
1962 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
1965 SplitOp = RISCV::VOR_VV;
1966 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
1969 SplitOp = RISCV::VXOR_VV;
1970 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
1973 SplitOp = RISCV::VAND_VV;
1974 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
1979 LT.second.getScalarType() == MVT::bf16)
1983 for (
unsigned i = 0; i < LT.first.getValue(); i++)
1986 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1988 SplitOp = RISCV::VFADD_VV;
1989 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
1994 (LT.first > 1) ? (LT.first - 1) *
1995 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1997 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2001 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
2012 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
2018 if (IsUnsigned && Opcode == Instruction::Add &&
2019 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
2023 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
2030 return (LT.first - 1) +
2038 if (!isa<VectorType>(Ty))
2050 return getConstantPoolLoadCost(Ty,
CostKind);
2061 if (VT == MVT::Other)
2066 if (Opcode == Instruction::Store && OpInfo.
isConstant())
2081 if (Src->
isVectorTy() && LT.second.isVector() &&
2083 LT.second.getSizeInBits()))
2095 return Cost + BaseCost;
2104 Op1Info, Op2Info,
I);
2108 Op1Info, Op2Info,
I);
2113 Op1Info, Op2Info,
I);
2115 auto GetConstantMatCost =
2117 if (OpInfo.isUniform())
2122 return getConstantPoolLoadCost(ValTy,
CostKind);
2127 ConstantMatCost += GetConstantMatCost(Op1Info);
2129 ConstantMatCost += GetConstantMatCost(Op2Info);
2132 if (Opcode == Instruction::Select && ValTy->
isVectorTy()) {
2138 return ConstantMatCost +
2140 getRISCVInstructionCost(
2141 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2145 return ConstantMatCost +
2146 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
2156 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2157 return ConstantMatCost +
2159 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2161 LT.first * getRISCVInstructionCost(
2162 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2169 return ConstantMatCost +
2170 LT.first * getRISCVInstructionCost(
2171 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2175 if ((Opcode == Instruction::ICmp) && ValTy->
isVectorTy() &&
2179 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2184 if ((Opcode == Instruction::FCmp) && ValTy->
isVectorTy() &&
2189 return ConstantMatCost +
2190 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
2200 Op1Info, Op2Info,
I);
2209 return ConstantMatCost +
2210 LT.first * getRISCVInstructionCost(
2211 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2218 return ConstantMatCost +
2220 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2229 return ConstantMatCost +
2231 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
2244 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2245 U->getType()->isIntegerTy() &&
2246 !isa<ConstantData>(U->getOperand(1)) &&
2247 !isa<ConstantData>(U->getOperand(2));
2255 Op1Info, Op2Info,
I);
2262 return Opcode == Instruction::PHI ? 0 : 1;
2271 const Value *Op1)
const {
2274 if (Opcode != Instruction::ExtractElement &&
2275 Opcode != Instruction::InsertElement)
2282 if (!LT.second.isVector()) {
2283 auto *FixedVecTy = cast<FixedVectorType>(Val);
2291 Type *ElemTy = FixedVecTy->getElementType();
2292 auto NumElems = FixedVecTy->getNumElements();
2298 return Opcode == Instruction::ExtractElement
2299 ? StoreCost * NumElems + LoadCost
2300 : (StoreCost + LoadCost) * NumElems + StoreCost;
2304 if (LT.second.isScalableVector() && !LT.first.isValid())
2311 cast<VectorType>(Val)->getElementCount());
2312 if (Opcode == Instruction::ExtractElement) {
2318 return ExtendCost + ExtractCost;
2328 return ExtendCost + InsertCost + TruncCost;
2334 unsigned BaseCost = 1;
2336 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2341 if (LT.second.isFixedLengthVector()) {
2342 unsigned Width = LT.second.getVectorNumElements();
2343 Index = Index % Width;
2349 unsigned EltSize = LT.second.getScalarSizeInBits();
2350 unsigned M1Max = *VLEN / EltSize;
2351 Index = Index % M1Max;
2357 else if (ST->hasVendorXRivosVisni() && isUInt<5>(Index) &&
2360 else if (Opcode == Instruction::InsertElement)
2368 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2369 LT.second.isScalableVector()))) {
2377 if (Opcode == Instruction::ExtractElement)
2413 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2415 return BaseCost + SlideCost;
2421 unsigned Index)
const {
2422 if (isa<FixedVectorType>(Val))
2429 ElementCount EC = cast<VectorType>(Val)->getElementCount();
2430 assert(Index < EC.getKnownMinValue() &&
"Unexpected reverse index");
2432 EC.getKnownMinValue() - 1 - Index,
nullptr,
2459 if (!LT.second.isVector())
2467 if ((LT.second.getVectorElementType() == MVT::f16 ||
2468 LT.second.getVectorElementType() == MVT::bf16) &&
2475 CastCost += LT.first * Args.size() *
2483 LT.second = PromotedVT;
2486 auto getConstantMatCost =
2496 return getConstantPoolLoadCost(Ty,
CostKind);
2502 ConstantMatCost += getConstantMatCost(0, Op1Info);
2504 ConstantMatCost += getConstantMatCost(1, Op2Info);
2507 switch (ISDOpcode) {
2510 Op = RISCV::VADD_VV;
2515 Op = RISCV::VSLL_VV;
2525 Op = RISCV::VMUL_VV;
2529 Op = RISCV::VDIV_VV;
2533 Op = RISCV::VREM_VV;
2537 Op = RISCV::VFADD_VV;
2540 Op = RISCV::VFMUL_VV;
2543 Op = RISCV::VFDIV_VV;
2546 Op = RISCV::VFSGNJN_VV;
2551 return CastCost + ConstantMatCost +
2562 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2582 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
2585 if (
Info.isSameBase() && V !=
Base) {
2586 if (
GEP->hasAllConstantIndices())
2593 if (
Info.isUnitStride() &&
2599 GEP->getType()->getPointerAddressSpace()))
2602 {TTI::OK_AnyValue, TTI::OP_None},
2603 {TTI::OK_AnyValue, TTI::OP_None}, {});
2620 if (ST->enableDefaultUnroll())
2630 if (L->getHeader()->getParent()->hasOptSize())
2634 L->getExitingBlocks(ExitingBlocks);
2636 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2637 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2641 if (ExitingBlocks.
size() > 2)
2646 if (L->getNumBlocks() > 4)
2654 for (
auto *BB : L->getBlocks()) {
2655 for (
auto &
I : *BB) {
2659 if (IsVectorized &&
I.getType()->isVectorTy())
2662 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
2697 Type *EltTy = cast<VectorType>(Ty)->getElementType();
2701 cast<VectorType>(Ty));
2727 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
2737 if (ST->hasVendorXCVmem() && !ST->
is64Bit())
2759 Align Alignment)
const {
2760 auto *VTy = dyn_cast<VectorType>(DataTy);
2761 if (!VTy || VTy->isScalableTy())
2769 if (VTy->getElementType()->isIntegerTy(8))
2770 if (VTy->getElementCount().getFixedValue() > 256)
2777 Align Alignment)
const {
2778 auto *VTy = dyn_cast<VectorType>(DataTy);
2779 if (!VTy || VTy->isScalableTy())
2793 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
2794 bool Considerable =
false;
2795 AllowPromotionWithoutCommonHeader =
false;
2796 if (!isa<SExtInst>(&
I))
2798 Type *ConsideredSExtType =
2800 if (
I.getType() != ConsideredSExtType)
2804 for (
const User *U :
I.users()) {
2806 Considerable =
true;
2810 if (GEPInst->getNumOperands() > 2) {
2811 AllowPromotionWithoutCommonHeader =
true;
2816 return Considerable;
2821 case Instruction::Add:
2822 case Instruction::Sub:
2823 case Instruction::Mul:
2824 case Instruction::And:
2825 case Instruction::Or:
2826 case Instruction::Xor:
2827 case Instruction::FAdd:
2828 case Instruction::FSub:
2829 case Instruction::FMul:
2830 case Instruction::FDiv:
2831 case Instruction::ICmp:
2832 case Instruction::FCmp:
2834 case Instruction::Shl:
2835 case Instruction::LShr:
2836 case Instruction::AShr:
2837 case Instruction::UDiv:
2838 case Instruction::SDiv:
2839 case Instruction::URem:
2840 case Instruction::SRem:
2841 case Instruction::Select:
2842 return Operand == 1;
2855 auto *
II = dyn_cast<IntrinsicInst>(
I);
2859 switch (
II->getIntrinsicID()) {
2860 case Intrinsic::fma:
2861 case Intrinsic::vp_fma:
2862 case Intrinsic::fmuladd:
2863 case Intrinsic::vp_fmuladd:
2864 return Operand == 0 || Operand == 1;
2865 case Intrinsic::vp_shl:
2866 case Intrinsic::vp_lshr:
2867 case Intrinsic::vp_ashr:
2868 case Intrinsic::vp_udiv:
2869 case Intrinsic::vp_sdiv:
2870 case Intrinsic::vp_urem:
2871 case Intrinsic::vp_srem:
2872 case Intrinsic::ssub_sat:
2873 case Intrinsic::vp_ssub_sat:
2874 case Intrinsic::usub_sat:
2875 case Intrinsic::vp_usub_sat:
2876 case Intrinsic::vp_select:
2877 return Operand == 1;
2879 case Intrinsic::vp_add:
2880 case Intrinsic::vp_mul:
2881 case Intrinsic::vp_and:
2882 case Intrinsic::vp_or:
2883 case Intrinsic::vp_xor:
2884 case Intrinsic::vp_fadd:
2885 case Intrinsic::vp_fmul:
2886 case Intrinsic::vp_icmp:
2887 case Intrinsic::vp_fcmp:
2888 case Intrinsic::smin:
2889 case Intrinsic::vp_smin:
2890 case Intrinsic::umin:
2891 case Intrinsic::vp_umin:
2892 case Intrinsic::smax:
2893 case Intrinsic::vp_smax:
2894 case Intrinsic::umax:
2895 case Intrinsic::vp_umax:
2896 case Intrinsic::sadd_sat:
2897 case Intrinsic::vp_sadd_sat:
2898 case Intrinsic::uadd_sat:
2899 case Intrinsic::vp_uadd_sat:
2901 case Intrinsic::vp_sub:
2902 case Intrinsic::vp_fsub:
2903 case Intrinsic::vp_fdiv:
2904 return Operand == 0 || Operand == 1;
2917 if (
I->isBitwiseLogicOp()) {
2918 if (!
I->getType()->isVectorTy()) {
2919 if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
2920 for (
auto &
Op :
I->operands()) {
2928 }
else if (
I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
2929 for (
auto &
Op :
I->operands()) {
2939 Use &InsertElt = cast<Instruction>(
Op)->getOperandUse(0);
2940 Use &Not = cast<Instruction>(InsertElt)->getOperandUse(1);
2958 if (!ST->sinkSplatOperands())
2967 if (!
Op ||
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
2971 bool IsVPSplat =
match(
Op, m_Intrinsic<Intrinsic::experimental_vp_splat>(
2979 if (cast<VectorType>(
Op->getType())->getElementType()->isIntegerTy(1))
2984 for (
Use &U :
Op->uses()) {
2985 Instruction *Insn = cast<Instruction>(U.getUser());
2992 if (isa<FPExtInst>(
Op->getOperand(0)))
2995 Use *InsertEltUse = &
Op->getOperandUse(0);
2996 auto *InsertElt = cast<InsertElementInst>(InsertEltUse);
2997 if (isa<FPExtInst>(InsertElt->getOperand(1)))
2998 Ops.
push_back(&InsertElt->getOperandUse(1));
3011 if (!ST->enableUnalignedScalarMem())
3014 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
3017 Options.AllowOverlappingLoads =
true;
3021 Options.LoadSizes = {8, 4, 2, 1};
3022 Options.AllowedTailExpansions = {3, 5, 6};
3024 Options.LoadSizes = {4, 2, 1};
3025 Options.AllowedTailExpansions = {3};
3032 unsigned MinSize = ST->
getXLen() / 8 + 1;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
std::optional< unsigned > getMaxVScale() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
bool isLegalAddImmediate(int64_t imm) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsF64() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsBF16Minimal() const
bool hasVInstructionsF16Minimal() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
unsigned getRealMaxVLen() const
bool hasVInstructionsF32() const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
unsigned getMinTripCountTailFoldingThreshold() const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const
Return the cost of materializing an immediate for a value operand of a store instruction.
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
bool hasActiveVectorLength() const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *Src, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
std::optional< unsigned > getMaxVScale() const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
std::optional< unsigned > getVScaleForTuning() const override
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
MVT getContainerForFixedLengthVector(MVT VT) const
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
static RISCVVType::VLMUL getLMUL(MVT VT)
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
MVT getTypeToPromoteTo(unsigned Op, MVT VT) const
If the action for this operation is to promote, this method returns the ValueType to promote to.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool match(Val *V, const Pattern &P)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
OutputIt copy(R &&Range, OutputIt Out)
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.