18#include "llvm/IR/IntrinsicsRISCV.h"
25#define DEBUG_TYPE "riscvtti"
28 "riscv-v-register-bit-width-lmul",
30 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
31 "by autovectorized code. Fractional LMULs are not supported."),
37 "Overrides result used for getMaximumVF query which is used "
38 "exclusively by SLP vectorizer."),
43 cl::desc(
"Set the lower bound of a trip count to decide on "
44 "vectorization while tail-folding."),
53 size_t NumInstr = OpCodes.size();
58 return LMULCost * NumInstr;
60 for (
auto Op : OpCodes) {
62 case RISCV::VRGATHER_VI:
65 case RISCV::VRGATHER_VV:
68 case RISCV::VSLIDEUP_VI:
69 case RISCV::VSLIDEDOWN_VI:
72 case RISCV::VSLIDEUP_VX:
73 case RISCV::VSLIDEDOWN_VX:
76 case RISCV::VREDMAX_VS:
77 case RISCV::VREDMIN_VS:
78 case RISCV::VREDMAXU_VS:
79 case RISCV::VREDMINU_VS:
80 case RISCV::VREDSUM_VS:
81 case RISCV::VREDAND_VS:
82 case RISCV::VREDOR_VS:
83 case RISCV::VREDXOR_VS:
84 case RISCV::VFREDMAX_VS:
85 case RISCV::VFREDMIN_VS:
86 case RISCV::VFREDUSUM_VS: {
93 case RISCV::VFREDOSUM_VS: {
102 case RISCV::VFMV_F_S:
103 case RISCV::VFMV_S_F:
105 case RISCV::VMXOR_MM:
106 case RISCV::VMAND_MM:
107 case RISCV::VMANDN_MM:
108 case RISCV::VMNAND_MM:
110 case RISCV::VFIRST_M:
125 assert(Ty->isIntegerTy() &&
126 "getIntImmCost can only estimate cost of materialising integers");
149 if (!BO || !BO->hasOneUse())
152 if (BO->getOpcode() != Instruction::Shl)
163 if (ShAmt == Trailing)
180 if (!Cmp || !Cmp->isEquality())
196 if ((CmpC & Mask) != CmpC)
203 return NewCmpC >= -2048 && NewCmpC <= 2048;
210 assert(Ty->isIntegerTy() &&
211 "getIntImmCost can only estimate cost of materialising integers");
219 bool Takes12BitImm =
false;
220 unsigned ImmArgIdx = ~0U;
223 case Instruction::GetElementPtr:
228 case Instruction::Store: {
233 if (Idx == 1 || !Inst)
238 if (!getTLI()->allowsMemoryAccessForAlignment(
246 case Instruction::Load:
249 case Instruction::And:
251 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
254 if (Imm == UINT64_C(0xffffffff) &&
255 ((ST->hasStdExtZba() && ST->isRV64()) || ST->isRV32()))
258 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
260 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
263 if (Inst && Idx == 1 && Imm.getBitWidth() == 64 &&
266 Takes12BitImm =
true;
268 case Instruction::Add:
269 Takes12BitImm =
true;
271 case Instruction::Or:
272 case Instruction::Xor:
274 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
276 Takes12BitImm =
true;
278 case Instruction::Mul:
280 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
283 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
286 Takes12BitImm =
true;
288 case Instruction::Sub:
289 case Instruction::Shl:
290 case Instruction::LShr:
291 case Instruction::AShr:
292 Takes12BitImm =
true;
303 if (Imm.getSignificantBits() <= 64 &&
326 return ST->hasVInstructions();
336 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
343 if (!ST->hasStdExtZvqdotq() || ST->getELen() < 64 ||
344 Opcode != Instruction::Add || !BinOp || *BinOp != Instruction::Mul ||
345 InputTypeA != InputTypeB || !InputTypeA->
isIntegerTy(8) ||
353 getRISCVInstructionCost(RISCV::VQDOT_VV, LT.second,
CostKind);
360 switch (
II->getIntrinsicID()) {
364 case Intrinsic::vector_reduce_mul:
365 case Intrinsic::vector_reduce_fmul:
371 if (ST->hasVInstructions())
377 if (ST->hasVInstructions())
378 if (
unsigned MinVLen = ST->getRealMinVLen();
393 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
396 (ST->hasVInstructions() &&
406RISCVTTIImpl::getConstantPoolLoadCost(
Type *Ty,
416 unsigned Size = Mask.size();
419 for (
unsigned I = 0;
I !=
Size; ++
I) {
420 if (
static_cast<unsigned>(Mask[
I]) ==
I)
426 for (
unsigned J =
I + 1; J !=
Size; ++J)
428 if (
static_cast<unsigned>(Mask[J]) != J %
I)
456 "Expected fixed vector type and non-empty mask");
459 unsigned NumOfDests =
divideCeil(Mask.size(), LegalNumElts);
463 if (NumOfDests <= 1 ||
465 Tp->getElementType()->getPrimitiveSizeInBits() ||
466 LegalNumElts >= Tp->getElementCount().getFixedValue())
469 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
472 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
476 unsigned NormalizedVF = LegalNumElts * std::max(NumOfSrcs, NumOfDests);
477 unsigned NumOfSrcRegs = NormalizedVF / LegalNumElts;
478 unsigned NumOfDestRegs = NormalizedVF / LegalNumElts;
480 assert(NormalizedVF >= Mask.size() &&
481 "Normalized mask expected to be not shorter than original mask.");
486 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
487 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
490 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
493 Cost +=
TTI.getShuffleCost(
496 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
498 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
499 Cost +=
TTI.getShuffleCost(
502 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
525 if (!VLen || Mask.empty())
529 LegalVT =
TTI.getTypeLegalizationCost(
535 if (NumOfDests <= 1 ||
537 Tp->getElementType()->getPrimitiveSizeInBits() ||
541 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
544 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
550 unsigned NormalizedVF =
555 assert(NormalizedVF >= Mask.size() &&
556 "Normalized mask expected to be not shorter than original mask.");
562 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
563 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
566 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
571 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
573 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
575 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
582 if ((NumOfDestRegs > 2 && NumShuffles <=
static_cast<int>(NumOfDestRegs)) ||
583 (NumOfDestRegs <= 2 && NumShuffles < 4))
598 if (!
LT.second.isFixedLengthVector())
606 auto GetSlideOpcode = [&](
int SlideAmt) {
608 bool IsVI =
isUInt<5>(std::abs(SlideAmt));
610 return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX;
611 return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX;
614 std::array<std::pair<int, int>, 2> SrcInfo;
618 if (SrcInfo[1].second == 0)
622 if (SrcInfo[0].second != 0) {
623 unsigned Opcode = GetSlideOpcode(SrcInfo[0].second);
624 FirstSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
627 if (SrcInfo[1].first == -1)
628 return FirstSlideCost;
631 if (SrcInfo[1].second != 0) {
632 unsigned Opcode = GetSlideOpcode(SrcInfo[1].second);
633 SecondSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
636 getRISCVInstructionCost(RISCV::VMERGE_VVM,
LT.second,
CostKind);
643 return FirstSlideCost + SecondSlideCost + MaskCost;
654 "Expected the Mask to match the return size if given");
656 "Expected the same scalar types");
665 FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) {
667 *
this, LT.second, ST->getRealVLen(),
669 if (VRegSplittingCost.
isValid())
670 return VRegSplittingCost;
675 if (Mask.size() >= 2) {
676 MVT EltTp = LT.second.getVectorElementType();
687 return 2 * LT.first * TLI->getLMULCost(LT.second);
689 if (Mask[0] == 0 || Mask[0] == 1) {
693 if (
equal(DeinterleaveMask, Mask))
694 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
699 if (LT.second.getScalarSizeInBits() != 1 &&
702 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
704 for (
unsigned I = 0;
I != NumSlides; ++
I) {
705 unsigned InsertIndex = SubVectorSize * (1 <<
I);
710 std::pair<InstructionCost, MVT> DestLT =
715 Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
729 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
730 LT.second.getVectorNumElements() <= 256)) {
735 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
749 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
750 LT.second.getVectorNumElements() <= 256)) {
751 auto &
C = SrcTy->getContext();
752 auto EC = SrcTy->getElementCount();
757 return 2 * IndexCost +
758 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
777 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
805 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
806 if (std::optional<unsigned> VLen = ST->getRealVLen();
807 VLen && SubLT.second.getScalarSizeInBits() * Index % *VLen == 0 &&
808 SubLT.second.getSizeInBits() <= *VLen)
816 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
823 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
835 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
840 Instruction::InsertElement);
841 if (LT.second.getScalarSizeInBits() == 1) {
849 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
862 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
863 RISCV::VMV_X_S, RISCV::VMV_V_X,
872 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
878 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
884 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
885 if (Index >= 0 && Index < 32)
886 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
887 else if (Index < 0 && Index > -32)
888 Opcodes[1] = RISCV::VSLIDEUP_VI;
889 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
893 if (!LT.second.isVector())
899 if (SrcTy->getElementType()->isIntegerTy(1)) {
911 MVT ContainerVT = LT.second;
912 if (LT.second.isFixedLengthVector())
913 ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
915 if (ContainerVT.
bitsLE(M1VT)) {
925 if (LT.second.isFixedLengthVector())
927 LenCost =
isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
928 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
929 if (LT.second.isFixedLengthVector() &&
930 isInt<5>(LT.second.getVectorNumElements() - 1))
931 Opcodes[1] = RISCV::VRSUB_VI;
933 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
934 return LT.first * (LenCost + GatherCost);
941 unsigned M1Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX};
943 getRISCVInstructionCost(M1Opcodes, M1VT,
CostKind) + 3;
947 getRISCVInstructionCost({RISCV::VRGATHER_VV}, M1VT,
CostKind) * Ratio;
949 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX}, LT.second,
CostKind);
950 return FixedCost + LT.first * (GatherCost + SlideCost);
977 Ty, DemandedElts, Insert, Extract,
CostKind);
979 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
980 if (Ty->getScalarSizeInBits() == 1) {
990 assert(LT.second.isFixedLengthVector());
991 MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
995 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
1018 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1024 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
1028 if (LT.second.isVector()) {
1031 VTy->getElementCount().divideCoefficientBy(Factor));
1032 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
1033 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
1038 if (ST->hasOptimizedSegmentLoadStore(Factor)) {
1041 MVT SubVecVT = getTLI()->getValueType(
DL, SubVecTy).getSimpleVT();
1042 Cost += Factor * TLI->getLMULCost(SubVecVT);
1043 return LT.first *
Cost;
1050 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
1051 unsigned NumLoads = getEstimatedVLFor(VTy);
1052 return NumLoads * MemOpCost;
1065 unsigned VF = FVTy->getNumElements() / Factor;
1072 if (Opcode == Instruction::Load) {
1074 for (
unsigned Index : Indices) {
1078 Mask.resize(VF * Factor, -1);
1082 Cost += ShuffleCost;
1100 UseMaskForCond, UseMaskForGaps);
1102 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
1109 return MemCost + ShuffleCost;
1113 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1119 if ((Opcode == Instruction::Load &&
1121 (Opcode == Instruction::Store &&
1132 {TTI::OK_AnyValue, TTI::OP_None},
I);
1133 unsigned NumLoads = getEstimatedVLFor(&VTy);
1134 return NumLoads * MemOpCost;
1138 unsigned Opcode,
Type *DataTy,
bool VariableMask,
Align Alignment,
1140 bool IsLegal = (Opcode == Instruction::Store &&
1142 (Opcode == Instruction::Load &&
1167 if (Opcode == Instruction::Store)
1168 Opcodes.
append({RISCV::VCOMPRESS_VM});
1170 Opcodes.
append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
1172 LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1176 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1178 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1180 (Opcode != Instruction::Load && Opcode != Instruction::Store))
1193 {TTI::OK_AnyValue, TTI::OP_None},
I);
1194 unsigned NumLoads = getEstimatedVLFor(&VTy);
1195 return NumLoads * MemOpCost;
1205 for (
auto *Ty : Tys) {
1206 if (!Ty->isVectorTy())
1220 {Intrinsic::floor, MVT::f32, 9},
1221 {Intrinsic::floor, MVT::f64, 9},
1222 {Intrinsic::ceil, MVT::f32, 9},
1223 {Intrinsic::ceil, MVT::f64, 9},
1224 {Intrinsic::trunc, MVT::f32, 7},
1225 {Intrinsic::trunc, MVT::f64, 7},
1226 {Intrinsic::round, MVT::f32, 9},
1227 {Intrinsic::round, MVT::f64, 9},
1228 {Intrinsic::roundeven, MVT::f32, 9},
1229 {Intrinsic::roundeven, MVT::f64, 9},
1230 {Intrinsic::rint, MVT::f32, 7},
1231 {Intrinsic::rint, MVT::f64, 7},
1232 {Intrinsic::nearbyint, MVT::f32, 9},
1233 {Intrinsic::nearbyint, MVT::f64, 9},
1234 {Intrinsic::bswap, MVT::i16, 3},
1235 {Intrinsic::bswap, MVT::i32, 12},
1236 {Intrinsic::bswap, MVT::i64, 31},
1237 {Intrinsic::vp_bswap, MVT::i16, 3},
1238 {Intrinsic::vp_bswap, MVT::i32, 12},
1239 {Intrinsic::vp_bswap, MVT::i64, 31},
1240 {Intrinsic::vp_fshl, MVT::i8, 7},
1241 {Intrinsic::vp_fshl, MVT::i16, 7},
1242 {Intrinsic::vp_fshl, MVT::i32, 7},
1243 {Intrinsic::vp_fshl, MVT::i64, 7},
1244 {Intrinsic::vp_fshr, MVT::i8, 7},
1245 {Intrinsic::vp_fshr, MVT::i16, 7},
1246 {Intrinsic::vp_fshr, MVT::i32, 7},
1247 {Intrinsic::vp_fshr, MVT::i64, 7},
1248 {Intrinsic::bitreverse, MVT::i8, 17},
1249 {Intrinsic::bitreverse, MVT::i16, 24},
1250 {Intrinsic::bitreverse, MVT::i32, 33},
1251 {Intrinsic::bitreverse, MVT::i64, 52},
1252 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1253 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1254 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1255 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1256 {Intrinsic::ctpop, MVT::i8, 12},
1257 {Intrinsic::ctpop, MVT::i16, 19},
1258 {Intrinsic::ctpop, MVT::i32, 20},
1259 {Intrinsic::ctpop, MVT::i64, 21},
1260 {Intrinsic::ctlz, MVT::i8, 19},
1261 {Intrinsic::ctlz, MVT::i16, 28},
1262 {Intrinsic::ctlz, MVT::i32, 31},
1263 {Intrinsic::ctlz, MVT::i64, 35},
1264 {Intrinsic::cttz, MVT::i8, 16},
1265 {Intrinsic::cttz, MVT::i16, 23},
1266 {Intrinsic::cttz, MVT::i32, 24},
1267 {Intrinsic::cttz, MVT::i64, 25},
1268 {Intrinsic::vp_ctpop, MVT::i8, 12},
1269 {Intrinsic::vp_ctpop, MVT::i16, 19},
1270 {Intrinsic::vp_ctpop, MVT::i32, 20},
1271 {Intrinsic::vp_ctpop, MVT::i64, 21},
1272 {Intrinsic::vp_ctlz, MVT::i8, 19},
1273 {Intrinsic::vp_ctlz, MVT::i16, 28},
1274 {Intrinsic::vp_ctlz, MVT::i32, 31},
1275 {Intrinsic::vp_ctlz, MVT::i64, 35},
1276 {Intrinsic::vp_cttz, MVT::i8, 16},
1277 {Intrinsic::vp_cttz, MVT::i16, 23},
1278 {Intrinsic::vp_cttz, MVT::i32, 24},
1279 {Intrinsic::vp_cttz, MVT::i64, 25},
1286 switch (ICA.
getID()) {
1287 case Intrinsic::lrint:
1288 case Intrinsic::llrint:
1289 case Intrinsic::lround:
1290 case Intrinsic::llround: {
1294 if (ST->hasVInstructions() && LT.second.isVector()) {
1296 unsigned SrcEltSz =
DL.getTypeSizeInBits(SrcTy->getScalarType());
1297 unsigned DstEltSz =
DL.getTypeSizeInBits(RetTy->getScalarType());
1298 if (LT.second.getVectorElementType() == MVT::bf16) {
1299 if (!ST->hasVInstructionsBF16Minimal())
1302 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFCVT_X_F_V};
1304 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVT_X_F_V};
1305 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1306 !ST->hasVInstructionsF16()) {
1307 if (!ST->hasVInstructionsF16Minimal())
1310 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFCVT_X_F_V};
1312 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_X_F_V};
1314 }
else if (SrcEltSz > DstEltSz) {
1315 Ops = {RISCV::VFNCVT_X_F_W};
1316 }
else if (SrcEltSz < DstEltSz) {
1317 Ops = {RISCV::VFWCVT_X_F_V};
1319 Ops = {RISCV::VFCVT_X_F_V};
1324 if (SrcEltSz > DstEltSz)
1325 return SrcLT.first *
1326 getRISCVInstructionCost(
Ops, SrcLT.second,
CostKind);
1327 return LT.first * getRISCVInstructionCost(
Ops, LT.second,
CostKind);
1331 case Intrinsic::ceil:
1332 case Intrinsic::floor:
1333 case Intrinsic::trunc:
1334 case Intrinsic::rint:
1335 case Intrinsic::round:
1336 case Intrinsic::roundeven: {
1339 if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))
1340 return LT.first * 8;
1343 case Intrinsic::umin:
1344 case Intrinsic::umax:
1345 case Intrinsic::smin:
1346 case Intrinsic::smax: {
1348 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1351 if (ST->hasVInstructions() && LT.second.isVector()) {
1353 switch (ICA.
getID()) {
1354 case Intrinsic::umin:
1355 Op = RISCV::VMINU_VV;
1357 case Intrinsic::umax:
1358 Op = RISCV::VMAXU_VV;
1360 case Intrinsic::smin:
1361 Op = RISCV::VMIN_VV;
1363 case Intrinsic::smax:
1364 Op = RISCV::VMAX_VV;
1367 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1371 case Intrinsic::sadd_sat:
1372 case Intrinsic::ssub_sat:
1373 case Intrinsic::uadd_sat:
1374 case Intrinsic::usub_sat: {
1376 if (ST->hasVInstructions() && LT.second.isVector()) {
1378 switch (ICA.
getID()) {
1379 case Intrinsic::sadd_sat:
1380 Op = RISCV::VSADD_VV;
1382 case Intrinsic::ssub_sat:
1383 Op = RISCV::VSSUBU_VV;
1385 case Intrinsic::uadd_sat:
1386 Op = RISCV::VSADDU_VV;
1388 case Intrinsic::usub_sat:
1389 Op = RISCV::VSSUBU_VV;
1392 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1396 case Intrinsic::fma:
1397 case Intrinsic::fmuladd: {
1400 if (ST->hasVInstructions() && LT.second.isVector())
1402 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second,
CostKind);
1405 case Intrinsic::fabs: {
1407 if (ST->hasVInstructions() && LT.second.isVector()) {
1413 if (LT.second.getVectorElementType() == MVT::bf16 ||
1414 (LT.second.getVectorElementType() == MVT::f16 &&
1415 !ST->hasVInstructionsF16()))
1416 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1421 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1425 case Intrinsic::sqrt: {
1427 if (ST->hasVInstructions() && LT.second.isVector()) {
1430 MVT ConvType = LT.second;
1431 MVT FsqrtType = LT.second;
1434 if (LT.second.getVectorElementType() == MVT::bf16) {
1435 if (LT.second == MVT::nxv32bf16) {
1436 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1437 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1438 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1439 ConvType = MVT::nxv16f16;
1440 FsqrtType = MVT::nxv16f32;
1442 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1443 FsqrtOp = {RISCV::VFSQRT_V};
1444 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1446 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1447 !ST->hasVInstructionsF16()) {
1448 if (LT.second == MVT::nxv32f16) {
1449 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1450 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1451 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1452 ConvType = MVT::nxv16f16;
1453 FsqrtType = MVT::nxv16f32;
1455 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1456 FsqrtOp = {RISCV::VFSQRT_V};
1457 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1460 FsqrtOp = {RISCV::VFSQRT_V};
1463 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1464 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1468 case Intrinsic::cttz:
1469 case Intrinsic::ctlz:
1470 case Intrinsic::ctpop: {
1472 if (ST->hasStdExtZvbb() && LT.second.isVector()) {
1474 switch (ICA.
getID()) {
1475 case Intrinsic::cttz:
1478 case Intrinsic::ctlz:
1481 case Intrinsic::ctpop:
1482 Op = RISCV::VCPOP_V;
1485 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1489 case Intrinsic::abs: {
1491 if (ST->hasVInstructions() && LT.second.isVector()) {
1495 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1500 case Intrinsic::get_active_lane_mask: {
1501 if (ST->hasVInstructions()) {
1510 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1516 case Intrinsic::stepvector: {
1520 if (ST->hasVInstructions())
1521 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1523 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1524 return 1 + (LT.first - 1);
1526 case Intrinsic::experimental_cttz_elts: {
1528 EVT ArgType = TLI->getValueType(
DL, ArgTy,
true);
1529 if (getTLI()->shouldExpandCttzElements(ArgType))
1546 case Intrinsic::experimental_vp_splat: {
1549 if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1)
1551 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
1556 case Intrinsic::experimental_vp_splice: {
1564 case Intrinsic::fptoui_sat:
1565 case Intrinsic::fptosi_sat: {
1567 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1572 if (!SrcTy->isVectorTy())
1575 if (!SrcLT.first.isValid() || !DstLT.first.isValid())
1594 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1596 LT.second.isVector()) {
1597 MVT EltTy = LT.second.getVectorElementType();
1599 ICA.
getID(), EltTy))
1600 return LT.first * Entry->Cost;
1613 if (ST->hasVInstructions() && PtrTy->
isVectorTy())
1631 if (!ST->hasVInstructions() || Src->getScalarSizeInBits() > ST->getELen() ||
1632 Dst->getScalarSizeInBits() > ST->getELen())
1635 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1650 if (Src->getScalarSizeInBits() == 1) {
1655 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1656 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1662 if (Dst->getScalarSizeInBits() == 1) {
1668 return SrcLT.first *
1669 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1681 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1682 !SrcLT.first.isValid() || !DstLT.first.isValid() ||
1684 SrcLT.second.getSizeInBits()) ||
1686 DstLT.second.getSizeInBits()))
1690 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1692 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1693 (int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1697 if ((PowDiff < 1) || (PowDiff > 3))
1699 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1700 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1703 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1706 case ISD::FP_EXTEND:
1709 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1710 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1713 : (
ISD == ISD::FP_EXTEND) ? RISCV::VFWCVT_F_F_V
1714 : RISCV::VFNCVT_F_F_W;
1716 for (; SrcEltSize != DstEltSize;) {
1720 MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
1722 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1730 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1732 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1734 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1735 unsigned SrcEltSize = Src->getScalarSizeInBits();
1736 unsigned DstEltSize = Dst->getScalarSizeInBits();
1738 if ((SrcEltSize == 16) &&
1739 (!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1745 std::pair<InstructionCost, MVT> VecF32LT =
1748 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1753 if (DstEltSize == SrcEltSize)
1754 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1755 else if (DstEltSize > SrcEltSize)
1756 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1761 MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1762 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1763 if ((SrcEltSize / 2) > DstEltSize) {
1774 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1775 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1776 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1777 unsigned SrcEltSize = Src->getScalarSizeInBits();
1778 unsigned DstEltSize = Dst->getScalarSizeInBits();
1781 if ((DstEltSize == 16) &&
1782 (!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
1788 std::pair<InstructionCost, MVT> VecF32LT =
1791 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1796 if (DstEltSize == SrcEltSize)
1797 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1798 else if (DstEltSize > SrcEltSize) {
1799 if ((DstEltSize / 2) > SrcEltSize) {
1803 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1806 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1808 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
1815unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty)
const {
1817 const unsigned EltSize =
DL.getTypeSizeInBits(Ty->getElementType());
1818 const unsigned MinSize =
DL.getTypeSizeInBits(Ty).getKnownMinValue();
1833 if (Ty->getScalarSizeInBits() > ST->getELen())
1837 if (Ty->getElementType()->isIntegerTy(1)) {
1841 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1847 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1851 case Intrinsic::maximum:
1853 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1855 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1870 case Intrinsic::minimum:
1872 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1874 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1880 const unsigned EltTyBits =
DL.getTypeSizeInBits(DstTy);
1889 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1898 case Intrinsic::smax:
1899 SplitOp = RISCV::VMAX_VV;
1900 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1902 case Intrinsic::smin:
1903 SplitOp = RISCV::VMIN_VV;
1904 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1906 case Intrinsic::umax:
1907 SplitOp = RISCV::VMAXU_VV;
1908 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1910 case Intrinsic::umin:
1911 SplitOp = RISCV::VMINU_VV;
1912 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1914 case Intrinsic::maxnum:
1915 SplitOp = RISCV::VFMAX_VV;
1916 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1918 case Intrinsic::minnum:
1919 SplitOp = RISCV::VFMIN_VV;
1920 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1925 (LT.first > 1) ? (LT.first - 1) *
1926 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1928 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1933 std::optional<FastMathFlags> FMF,
1939 if (Ty->getScalarSizeInBits() > ST->getELen())
1942 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1950 Type *ElementTy = Ty->getElementType();
1955 if (LT.second == MVT::v1i1)
1956 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
1974 return ((LT.first > 2) ? (LT.first - 2) : 0) *
1975 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
1976 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
1977 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1986 return (LT.first - 1) *
1987 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
1988 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
1996 return (LT.first - 1) *
1997 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
1998 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2011 SplitOp = RISCV::VADD_VV;
2012 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
2015 SplitOp = RISCV::VOR_VV;
2016 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
2019 SplitOp = RISCV::VXOR_VV;
2020 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
2023 SplitOp = RISCV::VAND_VV;
2024 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
2028 if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) ||
2029 LT.second.getScalarType() == MVT::bf16)
2033 for (
unsigned i = 0; i < LT.first.getValue(); i++)
2036 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2038 SplitOp = RISCV::VFADD_VV;
2039 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
2044 (LT.first > 1) ? (LT.first - 1) *
2045 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2047 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2051 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
2062 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
2068 if (IsUnsigned && Opcode == Instruction::Add &&
2069 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
2073 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
2080 return (LT.first - 1) +
2087 assert(OpInfo.isConstant() &&
"non constant operand?");
2094 if (OpInfo.isUniform())
2100 return getConstantPoolLoadCost(Ty,
CostKind);
2109 EVT VT = TLI->getValueType(
DL, Src,
true);
2111 if (VT == MVT::Other)
2116 if (Opcode == Instruction::Store && OpInfo.isConstant())
2131 if (Src->
isVectorTy() && LT.second.isVector() &&
2133 LT.second.getSizeInBits()))
2144 BaseCost *= TLI->getLMULCost(LT.second);
2145 return Cost + BaseCost;
2154 Op1Info, Op2Info,
I);
2158 Op1Info, Op2Info,
I);
2161 if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
2163 Op1Info, Op2Info,
I);
2165 auto GetConstantMatCost =
2167 if (OpInfo.isUniform())
2172 return getConstantPoolLoadCost(ValTy,
CostKind);
2177 ConstantMatCost += GetConstantMatCost(Op1Info);
2179 ConstantMatCost += GetConstantMatCost(Op2Info);
2182 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
2184 if (ValTy->getScalarSizeInBits() == 1) {
2188 return ConstantMatCost +
2190 getRISCVInstructionCost(
2191 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2195 return ConstantMatCost +
2196 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
2200 if (ValTy->getScalarSizeInBits() == 1) {
2206 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2207 return ConstantMatCost +
2209 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2211 LT.first * getRISCVInstructionCost(
2212 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2219 return ConstantMatCost +
2220 LT.first * getRISCVInstructionCost(
2221 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2225 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
2229 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2234 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&
2239 return ConstantMatCost +
2240 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
2246 if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||
2247 (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||
2248 (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))
2250 Op1Info, Op2Info,
I);
2259 return ConstantMatCost +
2260 LT.first * getRISCVInstructionCost(
2261 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2268 return ConstantMatCost +
2270 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2279 return ConstantMatCost +
2281 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
2292 ValTy->isIntegerTy() && !
I->user_empty()) {
2294 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2295 U->getType()->isIntegerTy() &&
2296 !isa<ConstantData>(U->getOperand(1)) &&
2297 !isa<ConstantData>(U->getOperand(2));
2305 Op1Info, Op2Info,
I);
2312 return Opcode == Instruction::PHI ? 0 : 1;
2321 const Value *Op1)
const {
2324 if (Opcode != Instruction::ExtractElement &&
2325 Opcode != Instruction::InsertElement)
2332 if (!LT.second.isVector()) {
2341 Type *ElemTy = FixedVecTy->getElementType();
2342 auto NumElems = FixedVecTy->getNumElements();
2343 auto Align =
DL.getPrefTypeAlign(ElemTy);
2348 return Opcode == Instruction::ExtractElement
2349 ? StoreCost * NumElems + LoadCost
2350 : (StoreCost + LoadCost) * NumElems + StoreCost;
2354 if (LT.second.isScalableVector() && !LT.first.isValid())
2362 if (Opcode == Instruction::ExtractElement) {
2368 return ExtendCost + ExtractCost;
2378 return ExtendCost + InsertCost + TruncCost;
2384 unsigned BaseCost = 1;
2386 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2391 if (LT.second.isFixedLengthVector()) {
2392 unsigned Width = LT.second.getVectorNumElements();
2393 Index = Index % Width;
2398 if (
auto VLEN = ST->getRealVLen()) {
2399 unsigned EltSize = LT.second.getScalarSizeInBits();
2400 unsigned M1Max = *VLEN / EltSize;
2401 Index = Index % M1Max;
2407 else if (ST->hasVendorXRivosVisni() &&
isUInt<5>(Index) &&
2410 else if (Opcode == Instruction::InsertElement)
2418 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2419 LT.second.isScalableVector()))) {
2421 Align VecAlign =
DL.getPrefTypeAlign(Val);
2422 Align SclAlign =
DL.getPrefTypeAlign(ScalarType);
2427 if (Opcode == Instruction::ExtractElement)
2463 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2465 return BaseCost + SlideCost;
2471 unsigned Index)
const {
2480 assert(Index < EC.getKnownMinValue() &&
"Unexpected reverse index");
2482 EC.getKnownMinValue() - 1 - Index,
nullptr,
2509 if (!LT.second.isVector())
2515 unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
2517 if ((LT.second.getVectorElementType() == MVT::f16 ||
2518 LT.second.getVectorElementType() == MVT::bf16) &&
2519 TLI->getOperationAction(ISDOpcode, LT.second) ==
2521 MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
2525 CastCost += LT.first * Args.size() *
2533 LT.second = PromotedVT;
2536 auto getConstantMatCost =
2546 return getConstantPoolLoadCost(Ty,
CostKind);
2552 ConstantMatCost += getConstantMatCost(0, Op1Info);
2554 ConstantMatCost += getConstantMatCost(1, Op2Info);
2557 switch (ISDOpcode) {
2560 Op = RISCV::VADD_VV;
2565 Op = RISCV::VSLL_VV;
2570 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
2575 Op = RISCV::VMUL_VV;
2579 Op = RISCV::VDIV_VV;
2583 Op = RISCV::VREM_VV;
2587 Op = RISCV::VFADD_VV;
2590 Op = RISCV::VFMUL_VV;
2593 Op = RISCV::VFDIV_VV;
2596 Op = RISCV::VFSGNJN_VV;
2601 return CastCost + ConstantMatCost +
2610 if (Ty->isFPOrFPVectorTy())
2612 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2635 if (Info.isSameBase() && V !=
Base) {
2636 if (
GEP->hasAllConstantIndices())
2642 unsigned Stride =
DL.getTypeStoreSize(AccessTy);
2643 if (Info.isUnitStride() &&
2649 GEP->getType()->getPointerAddressSpace()))
2652 {TTI::OK_AnyValue, TTI::OP_None},
2653 {TTI::OK_AnyValue, TTI::OP_None}, {});
2670 if (ST->enableDefaultUnroll())
2680 if (L->getHeader()->getParent()->hasOptSize())
2684 L->getExitingBlocks(ExitingBlocks);
2686 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2687 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2691 if (ExitingBlocks.
size() > 2)
2696 if (L->getNumBlocks() > 4)
2704 for (
auto *BB : L->getBlocks()) {
2705 for (
auto &
I : *BB) {
2709 if (IsVectorized &&
I.getType()->isVectorTy())
2749 bool HasMask =
false;
2752 bool IsWrite) -> int64_t {
2753 if (
auto *TarExtTy =
2755 return TarExtTy->getIntParameter(0);
2761 case Intrinsic::riscv_vle_mask:
2762 case Intrinsic::riscv_vse_mask:
2763 case Intrinsic::riscv_vlseg2_mask:
2764 case Intrinsic::riscv_vlseg3_mask:
2765 case Intrinsic::riscv_vlseg4_mask:
2766 case Intrinsic::riscv_vlseg5_mask:
2767 case Intrinsic::riscv_vlseg6_mask:
2768 case Intrinsic::riscv_vlseg7_mask:
2769 case Intrinsic::riscv_vlseg8_mask:
2770 case Intrinsic::riscv_vsseg2_mask:
2771 case Intrinsic::riscv_vsseg3_mask:
2772 case Intrinsic::riscv_vsseg4_mask:
2773 case Intrinsic::riscv_vsseg5_mask:
2774 case Intrinsic::riscv_vsseg6_mask:
2775 case Intrinsic::riscv_vsseg7_mask:
2776 case Intrinsic::riscv_vsseg8_mask:
2779 case Intrinsic::riscv_vle:
2780 case Intrinsic::riscv_vse:
2781 case Intrinsic::riscv_vlseg2:
2782 case Intrinsic::riscv_vlseg3:
2783 case Intrinsic::riscv_vlseg4:
2784 case Intrinsic::riscv_vlseg5:
2785 case Intrinsic::riscv_vlseg6:
2786 case Intrinsic::riscv_vlseg7:
2787 case Intrinsic::riscv_vlseg8:
2788 case Intrinsic::riscv_vsseg2:
2789 case Intrinsic::riscv_vsseg3:
2790 case Intrinsic::riscv_vsseg4:
2791 case Intrinsic::riscv_vsseg5:
2792 case Intrinsic::riscv_vsseg6:
2793 case Intrinsic::riscv_vsseg7:
2794 case Intrinsic::riscv_vsseg8: {
2811 Ty = TarExtTy->getTypeParameter(0U);
2816 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2817 unsigned VLIndex = RVVIInfo->VLOperand;
2818 unsigned PtrOperandNo = VLIndex - 1 - HasMask;
2826 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
2829 unsigned ElemSize = Ty->getScalarSizeInBits();
2833 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
2834 Alignment, Mask, EVL);
2837 case Intrinsic::riscv_vlse_mask:
2838 case Intrinsic::riscv_vsse_mask:
2839 case Intrinsic::riscv_vlsseg2_mask:
2840 case Intrinsic::riscv_vlsseg3_mask:
2841 case Intrinsic::riscv_vlsseg4_mask:
2842 case Intrinsic::riscv_vlsseg5_mask:
2843 case Intrinsic::riscv_vlsseg6_mask:
2844 case Intrinsic::riscv_vlsseg7_mask:
2845 case Intrinsic::riscv_vlsseg8_mask:
2846 case Intrinsic::riscv_vssseg2_mask:
2847 case Intrinsic::riscv_vssseg3_mask:
2848 case Intrinsic::riscv_vssseg4_mask:
2849 case Intrinsic::riscv_vssseg5_mask:
2850 case Intrinsic::riscv_vssseg6_mask:
2851 case Intrinsic::riscv_vssseg7_mask:
2852 case Intrinsic::riscv_vssseg8_mask:
2855 case Intrinsic::riscv_vlse:
2856 case Intrinsic::riscv_vsse:
2857 case Intrinsic::riscv_vlsseg2:
2858 case Intrinsic::riscv_vlsseg3:
2859 case Intrinsic::riscv_vlsseg4:
2860 case Intrinsic::riscv_vlsseg5:
2861 case Intrinsic::riscv_vlsseg6:
2862 case Intrinsic::riscv_vlsseg7:
2863 case Intrinsic::riscv_vlsseg8:
2864 case Intrinsic::riscv_vssseg2:
2865 case Intrinsic::riscv_vssseg3:
2866 case Intrinsic::riscv_vssseg4:
2867 case Intrinsic::riscv_vssseg5:
2868 case Intrinsic::riscv_vssseg6:
2869 case Intrinsic::riscv_vssseg7:
2870 case Intrinsic::riscv_vssseg8: {
2887 Ty = TarExtTy->getTypeParameter(0U);
2892 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2893 unsigned VLIndex = RVVIInfo->VLOperand;
2894 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
2906 Alignment =
Align(1);
2913 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
2916 unsigned ElemSize = Ty->getScalarSizeInBits();
2920 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
2921 Alignment, Mask, EVL, Stride);
2924 case Intrinsic::riscv_vloxei_mask:
2925 case Intrinsic::riscv_vluxei_mask:
2926 case Intrinsic::riscv_vsoxei_mask:
2927 case Intrinsic::riscv_vsuxei_mask:
2928 case Intrinsic::riscv_vloxseg2_mask:
2929 case Intrinsic::riscv_vloxseg3_mask:
2930 case Intrinsic::riscv_vloxseg4_mask:
2931 case Intrinsic::riscv_vloxseg5_mask:
2932 case Intrinsic::riscv_vloxseg6_mask:
2933 case Intrinsic::riscv_vloxseg7_mask:
2934 case Intrinsic::riscv_vloxseg8_mask:
2935 case Intrinsic::riscv_vluxseg2_mask:
2936 case Intrinsic::riscv_vluxseg3_mask:
2937 case Intrinsic::riscv_vluxseg4_mask:
2938 case Intrinsic::riscv_vluxseg5_mask:
2939 case Intrinsic::riscv_vluxseg6_mask:
2940 case Intrinsic::riscv_vluxseg7_mask:
2941 case Intrinsic::riscv_vluxseg8_mask:
2942 case Intrinsic::riscv_vsoxseg2_mask:
2943 case Intrinsic::riscv_vsoxseg3_mask:
2944 case Intrinsic::riscv_vsoxseg4_mask:
2945 case Intrinsic::riscv_vsoxseg5_mask:
2946 case Intrinsic::riscv_vsoxseg6_mask:
2947 case Intrinsic::riscv_vsoxseg7_mask:
2948 case Intrinsic::riscv_vsoxseg8_mask:
2949 case Intrinsic::riscv_vsuxseg2_mask:
2950 case Intrinsic::riscv_vsuxseg3_mask:
2951 case Intrinsic::riscv_vsuxseg4_mask:
2952 case Intrinsic::riscv_vsuxseg5_mask:
2953 case Intrinsic::riscv_vsuxseg6_mask:
2954 case Intrinsic::riscv_vsuxseg7_mask:
2955 case Intrinsic::riscv_vsuxseg8_mask:
2958 case Intrinsic::riscv_vloxei:
2959 case Intrinsic::riscv_vluxei:
2960 case Intrinsic::riscv_vsoxei:
2961 case Intrinsic::riscv_vsuxei:
2962 case Intrinsic::riscv_vloxseg2:
2963 case Intrinsic::riscv_vloxseg3:
2964 case Intrinsic::riscv_vloxseg4:
2965 case Intrinsic::riscv_vloxseg5:
2966 case Intrinsic::riscv_vloxseg6:
2967 case Intrinsic::riscv_vloxseg7:
2968 case Intrinsic::riscv_vloxseg8:
2969 case Intrinsic::riscv_vluxseg2:
2970 case Intrinsic::riscv_vluxseg3:
2971 case Intrinsic::riscv_vluxseg4:
2972 case Intrinsic::riscv_vluxseg5:
2973 case Intrinsic::riscv_vluxseg6:
2974 case Intrinsic::riscv_vluxseg7:
2975 case Intrinsic::riscv_vluxseg8:
2976 case Intrinsic::riscv_vsoxseg2:
2977 case Intrinsic::riscv_vsoxseg3:
2978 case Intrinsic::riscv_vsoxseg4:
2979 case Intrinsic::riscv_vsoxseg5:
2980 case Intrinsic::riscv_vsoxseg6:
2981 case Intrinsic::riscv_vsoxseg7:
2982 case Intrinsic::riscv_vsoxseg8:
2983 case Intrinsic::riscv_vsuxseg2:
2984 case Intrinsic::riscv_vsuxseg3:
2985 case Intrinsic::riscv_vsuxseg4:
2986 case Intrinsic::riscv_vsuxseg5:
2987 case Intrinsic::riscv_vsuxseg6:
2988 case Intrinsic::riscv_vsuxseg7:
2989 case Intrinsic::riscv_vsuxseg8: {
3006 Ty = TarExtTy->getTypeParameter(0U);
3011 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3012 unsigned VLIndex = RVVIInfo->VLOperand;
3013 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3026 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3029 unsigned ElemSize = Ty->getScalarSizeInBits();
3034 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3035 Align(1), Mask, EVL,
3044 if (Ty->isVectorTy()) {
3047 if ((EltTy->
isHalfTy() && !ST->hasVInstructionsF16()) ||
3053 if (
Size.isScalable() && ST->hasVInstructions())
3056 if (ST->useRVVForFixedLengthVectors())
3076 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
3084 return ST->enableUnalignedVectorMem();
3090 if (ST->hasVendorXCVmem() && !ST->is64Bit())
3112 Align Alignment)
const {
3114 if (!VTy || VTy->isScalableTy())
3122 if (VTy->getElementType()->isIntegerTy(8))
3123 if (VTy->getElementCount().getFixedValue() > 256)
3124 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
3125 ST->getMaxLMULForFixedLengthVectors();
3130 Align Alignment)
const {
3132 if (!VTy || VTy->isScalableTy())
3146 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
3147 bool Considerable =
false;
3148 AllowPromotionWithoutCommonHeader =
false;
3151 Type *ConsideredSExtType =
3153 if (
I.getType() != ConsideredSExtType)
3157 for (
const User *U :
I.users()) {
3159 Considerable =
true;
3163 if (GEPInst->getNumOperands() > 2) {
3164 AllowPromotionWithoutCommonHeader =
true;
3169 return Considerable;
3174 case Instruction::Add:
3175 case Instruction::Sub:
3176 case Instruction::Mul:
3177 case Instruction::And:
3178 case Instruction::Or:
3179 case Instruction::Xor:
3180 case Instruction::FAdd:
3181 case Instruction::FSub:
3182 case Instruction::FMul:
3183 case Instruction::FDiv:
3184 case Instruction::ICmp:
3185 case Instruction::FCmp:
3187 case Instruction::Shl:
3188 case Instruction::LShr:
3189 case Instruction::AShr:
3190 case Instruction::UDiv:
3191 case Instruction::SDiv:
3192 case Instruction::URem:
3193 case Instruction::SRem:
3194 case Instruction::Select:
3195 return Operand == 1;
3202 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3212 switch (
II->getIntrinsicID()) {
3213 case Intrinsic::fma:
3214 case Intrinsic::vp_fma:
3215 case Intrinsic::fmuladd:
3216 case Intrinsic::vp_fmuladd:
3217 return Operand == 0 || Operand == 1;
3218 case Intrinsic::vp_shl:
3219 case Intrinsic::vp_lshr:
3220 case Intrinsic::vp_ashr:
3221 case Intrinsic::vp_udiv:
3222 case Intrinsic::vp_sdiv:
3223 case Intrinsic::vp_urem:
3224 case Intrinsic::vp_srem:
3225 case Intrinsic::ssub_sat:
3226 case Intrinsic::vp_ssub_sat:
3227 case Intrinsic::usub_sat:
3228 case Intrinsic::vp_usub_sat:
3229 case Intrinsic::vp_select:
3230 return Operand == 1;
3232 case Intrinsic::vp_add:
3233 case Intrinsic::vp_mul:
3234 case Intrinsic::vp_and:
3235 case Intrinsic::vp_or:
3236 case Intrinsic::vp_xor:
3237 case Intrinsic::vp_fadd:
3238 case Intrinsic::vp_fmul:
3239 case Intrinsic::vp_icmp:
3240 case Intrinsic::vp_fcmp:
3241 case Intrinsic::smin:
3242 case Intrinsic::vp_smin:
3243 case Intrinsic::umin:
3244 case Intrinsic::vp_umin:
3245 case Intrinsic::smax:
3246 case Intrinsic::vp_smax:
3247 case Intrinsic::umax:
3248 case Intrinsic::vp_umax:
3249 case Intrinsic::sadd_sat:
3250 case Intrinsic::vp_sadd_sat:
3251 case Intrinsic::uadd_sat:
3252 case Intrinsic::vp_uadd_sat:
3254 case Intrinsic::vp_sub:
3255 case Intrinsic::vp_fsub:
3256 case Intrinsic::vp_fdiv:
3257 return Operand == 0 || Operand == 1;
3270 if (
I->isBitwiseLogicOp()) {
3271 if (!
I->getType()->isVectorTy()) {
3272 if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
3273 for (
auto &
Op :
I->operands()) {
3281 }
else if (
I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
3282 for (
auto &
Op :
I->operands()) {
3294 Ops.push_back(&Not);
3295 Ops.push_back(&InsertElt);
3303 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3311 if (!ST->sinkSplatOperands())
3337 for (
Use &U :
Op->uses()) {
3346 Ops.push_back(&
Op->getOperandUse(0));
3348 Use *InsertEltUse = &
Op->getOperandUse(0);
3351 Ops.push_back(&InsertElt->getOperandUse(1));
3352 Ops.push_back(InsertEltUse);
3364 if (!ST->enableUnalignedScalarMem())
3367 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
3370 Options.AllowOverlappingLoads =
true;
3371 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
3373 if (ST->is64Bit()) {
3374 Options.LoadSizes = {8, 4, 2, 1};
3375 Options.AllowedTailExpansions = {3, 5, 6};
3377 Options.LoadSizes = {4, 2, 1};
3378 Options.AllowedTailExpansions = {3};
3381 if (IsZeroCmp && ST->hasVInstructions()) {
3382 unsigned VLenB = ST->getRealMinVLen() / 8;
3385 unsigned MinSize = ST->getXLen() / 8 + 1;
3386 unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
std::optional< unsigned > getMaxVScale() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
bool isLegalAddImmediate(int64_t imm) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isFPPredicate(Predicate P)
static bool isIntPredicate(Predicate P)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
A parsed version of the target data layout string in and methods for querying it.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
unsigned getMinTripCountTailFoldingThreshold() const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const
Return the cost of materializing an immediate for a value operand of a store instruction.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
bool hasActiveVectorLength() const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *Src, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override
bool preferAlternateOpcodeVectorization() const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
std::optional< unsigned > getMaxVScale() const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
std::optional< unsigned > getVScaleForTuning() const override
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVVType::VLMUL getLMUL(MVT VT)
This class represents an analyzed expression in the program.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
OutputIt copy(R &&Range, OutputIt Out)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Information about a load/store intrinsic defined by the target.