24#define DEBUG_TYPE "riscvtti"
27 "riscv-v-register-bit-width-lmul",
29 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
30 "by autovectorized code. Fractional LMULs are not supported."),
36 "Overrides result used for getMaximumVF query which is used "
37 "exclusively by SLP vectorizer."),
46 size_t NumInstr = OpCodes.
size();
51 return LMULCost * NumInstr;
53 for (
auto Op : OpCodes) {
55 case RISCV::VRGATHER_VI:
58 case RISCV::VRGATHER_VV:
61 case RISCV::VSLIDEUP_VI:
62 case RISCV::VSLIDEDOWN_VI:
65 case RISCV::VSLIDEUP_VX:
66 case RISCV::VSLIDEDOWN_VX:
69 case RISCV::VREDMAX_VS:
70 case RISCV::VREDMIN_VS:
71 case RISCV::VREDMAXU_VS:
72 case RISCV::VREDMINU_VS:
73 case RISCV::VREDSUM_VS:
74 case RISCV::VREDAND_VS:
75 case RISCV::VREDOR_VS:
76 case RISCV::VREDXOR_VS:
77 case RISCV::VFREDMAX_VS:
78 case RISCV::VFREDMIN_VS:
79 case RISCV::VFREDUSUM_VS: {
86 case RISCV::VFREDOSUM_VS: {
100 case RISCV::VMANDN_MM:
101 case RISCV::VMNAND_MM:
103 case RISCV::VFIRST_M:
119 "getIntImmCost can only estimate cost of materialising integers");
140 auto *BO = dyn_cast<BinaryOperator>(Inst->
getOperand(0));
141 if (!BO || !BO->hasOneUse())
144 if (BO->getOpcode() != Instruction::Shl)
147 if (!isa<ConstantInt>(BO->getOperand(1)))
150 unsigned ShAmt = cast<ConstantInt>(BO->getOperand(1))->getZExtValue();
155 if (ShAmt == Trailing)
167 "getIntImmCost can only estimate cost of materialising integers");
175 bool Takes12BitImm =
false;
176 unsigned ImmArgIdx = ~0U;
179 case Instruction::GetElementPtr:
184 case Instruction::Store: {
189 if (
Idx == 1 || !Inst)
194 if (!getTLI()->allowsMemoryAccessForAlignment(
196 ST->getPointerAddressSpace(), ST->getAlign()))
202 case Instruction::Load:
205 case Instruction::And:
207 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
210 if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
213 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
215 if (Inst &&
Idx == 1 && Imm.getBitWidth() <= ST->
getXLen() &&
218 Takes12BitImm =
true;
220 case Instruction::Add:
221 Takes12BitImm =
true;
223 case Instruction::Or:
224 case Instruction::Xor:
226 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
228 Takes12BitImm =
true;
230 case Instruction::Mul:
232 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
235 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
238 Takes12BitImm =
true;
240 case Instruction::Sub:
241 case Instruction::Shl:
242 case Instruction::LShr:
243 case Instruction::AShr:
244 Takes12BitImm =
true;
255 if (Imm.getSignificantBits() <= 64 &&
284 return ST->hasStdExtZbb() || (ST->hasVendorXCVbitmanip() && !ST->
is64Bit())
293 switch (
II->getIntrinsicID()) {
297 case Intrinsic::vector_reduce_mul:
298 case Intrinsic::vector_reduce_fmul:
348 unsigned Size = Mask.size();
351 for (
unsigned I = 0;
I !=
Size; ++
I) {
352 if (
static_cast<unsigned>(Mask[
I]) ==
I)
358 for (
unsigned J =
I + 1; J !=
Size; ++J)
360 if (
static_cast<unsigned>(Mask[J]) != J %
I)
376 return cast<VectorType>(
EVT(IndexVT).getTypeForEVT(
C));
400 LegalVT =
TTI.getTypeLegalizationCost(
406 if (!NumOfDests.
isValid() || NumOfDests <= 1 ||
409 Tp->getElementType()->getPrimitiveSizeInBits() ||
413 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
416 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
421 unsigned E = *NumOfDests.
getValue();
422 unsigned NormalizedVF =
427 assert(NormalizedVF >= Mask.size() &&
428 "Normalized mask expected to be not shorter than original mask.");
434 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
435 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
436 if (ExtractedRegs.
test(SrcReg)) {
438 (SrcReg % NumOfSrcRegs) *
439 SingleOpTy->getNumElements(),
441 ExtractedRegs.
set(SrcReg);
450 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
451 if (ExtractedRegs.
test(Idx1)) {
454 (Idx1 % NumOfSrcRegs) * SingleOpTy->getNumElements(), SingleOpTy);
455 ExtractedRegs.
set(Idx1);
457 if (ExtractedRegs.
test(Idx2)) {
460 (Idx2 % NumOfSrcRegs) * SingleOpTy->getNumElements(), SingleOpTy);
461 ExtractedRegs.
set(Idx2);
471 if ((NumOfDestRegs > 2 && NumShuffles <=
static_cast<int>(NumOfDestRegs)) ||
472 (NumOfDestRegs <= 2 && NumShuffles < 4))
493 if (VRegSplittingCost.
isValid())
494 return VRegSplittingCost;
499 if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
500 MVT EltTp = LT.second.getVectorElementType();
513 if (Mask[0] == 0 || Mask[0] == 1) {
517 if (
equal(DeinterleaveMask, Mask))
518 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
523 if (LT.second.getScalarSizeInBits() != 1 &&
526 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
528 for (
unsigned I = 0;
I != NumSlides; ++
I) {
529 unsigned InsertIndex = SubVectorSize * (1 <<
I);
534 std::pair<InstructionCost, MVT> DestLT =
548 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
549 (LT.second.getScalarSizeInBits() != 8 ||
550 LT.second.getVectorNumElements() <= 256)) {
554 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
563 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
564 (LT.second.getScalarSizeInBits() != 8 ||
565 LT.second.getVectorNumElements() <= 256)) {
572 return 2 * IndexCost +
573 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
583 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
584 LT.second.isFixedLengthVector() &&
585 LT.second.getVectorElementType().getSizeInBits() ==
587 LT.second.getVectorNumElements() <
588 cast<FixedVectorType>(Tp)->getNumElements() &&
590 cast<FixedVectorType>(Tp)->getNumElements()) ==
591 static_cast<unsigned>(*LT.first.getValue())) {
592 unsigned NumRegs = *LT.first.getValue();
593 unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
598 for (
unsigned I = 0, NumSrcRegs =
divideCeil(Mask.size(), SubVF);
599 I < NumSrcRegs; ++
I) {
600 bool IsSingleVector =
true;
603 Mask.slice(
I * SubVF,
604 I == NumSrcRegs - 1 ? Mask.size() % SubVF : SubVF),
605 SubMask.
begin(), [&](
int I) ->
int {
606 if (I == PoisonMaskElem)
607 return PoisonMaskElem;
608 bool SingleSubVector = I / VF == 0;
609 IsSingleVector &= SingleSubVector;
610 return (SingleSubVector ? 0 : 1) * SubVF + (I % VF) % SubVF;
614 static_cast<unsigned>(
P.value()) ==
P.index();
619 SubVecTy, SubMask,
CostKind, 0,
nullptr);
647 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
650 if (MinVLen == MaxVLen &&
651 SubLT.second.getScalarSizeInBits() * Index % MinVLen == 0 &&
652 SubLT.second.getSizeInBits() <= MinVLen)
660 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
666 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
678 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
683 Instruction::InsertElement);
684 if (LT.second.getScalarSizeInBits() == 1) {
692 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
705 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
706 RISCV::VMV_X_S, RISCV::VMV_V_X,
715 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
721 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
727 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
728 if (Index >= 0 && Index < 32)
729 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
730 else if (Index < 0 && Index > -32)
731 Opcodes[1] = RISCV::VSLIDEUP_VI;
732 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
750 if (LT.second.isFixedLengthVector())
752 LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
753 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
754 if (LT.second.isFixedLengthVector() &&
755 isInt<5>(LT.second.getVectorNumElements() - 1))
756 Opcodes[1] = RISCV::VRSUB_VI;
758 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
761 return LT.first * (LenCost + GatherCost + ExtendCost);
776 if (isa<ScalableVectorType>(Ty))
784 Ty, DemandedElts, Insert, Extract,
CostKind);
786 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
797 assert(LT.second.isFixedLengthVector());
801 cast<FixedVectorType>(Ty)->getNumElements() *
802 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
825 bool UseMaskForCond,
bool UseMaskForGaps) {
830 if (!UseMaskForCond && !UseMaskForGaps &&
831 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
832 auto *VTy = cast<VectorType>(VecTy);
835 if (LT.second.isVector()) {
838 VTy->getElementCount().divideCoefficientBy(Factor));
839 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
850 return LT.first *
Cost;
857 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
858 unsigned NumLoads = getEstimatedVLFor(VTy);
859 return NumLoads * MemOpCost;
866 if (isa<ScalableVectorType>(VecTy))
869 auto *FVTy = cast<FixedVectorType>(VecTy);
872 unsigned VF = FVTy->getNumElements() / Factor;
879 if (Opcode == Instruction::Load) {
881 for (
unsigned Index : Indices) {
906 UseMaskForCond, UseMaskForGaps);
908 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
915 return MemCost + ShuffleCost;
919 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
925 if ((Opcode == Instruction::Load &&
927 (Opcode == Instruction::Store &&
935 auto &VTy = *cast<VectorType>(DataTy);
938 {TTI::OK_AnyValue, TTI::OP_None},
I);
939 unsigned NumLoads = getEstimatedVLFor(&VTy);
940 return NumLoads * MemOpCost;
944 unsigned Opcode,
Type *DataTy,
bool VariableMask,
Align Alignment,
946 bool IsLegal = (Opcode == Instruction::Store &&
948 (Opcode == Instruction::Load &&
973 if (Opcode == Instruction::Store)
974 Opcodes.append({RISCV::VCOMPRESS_VM});
976 Opcodes.append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
978 LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
982 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
984 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
986 (Opcode != Instruction::Load && Opcode != Instruction::Store))
996 auto &VTy = *cast<VectorType>(DataTy);
999 {TTI::OK_AnyValue, TTI::OP_None},
I);
1000 unsigned NumLoads = getEstimatedVLFor(&VTy);
1001 return NumLoads * MemOpCost;
1011 for (
auto *Ty : Tys) {
1012 if (!Ty->isVectorTy())
1026 {Intrinsic::floor, MVT::f32, 9},
1027 {Intrinsic::floor, MVT::f64, 9},
1028 {Intrinsic::ceil, MVT::f32, 9},
1029 {Intrinsic::ceil, MVT::f64, 9},
1030 {Intrinsic::trunc, MVT::f32, 7},
1031 {Intrinsic::trunc, MVT::f64, 7},
1032 {Intrinsic::round, MVT::f32, 9},
1033 {Intrinsic::round, MVT::f64, 9},
1034 {Intrinsic::roundeven, MVT::f32, 9},
1035 {Intrinsic::roundeven, MVT::f64, 9},
1036 {Intrinsic::rint, MVT::f32, 7},
1037 {Intrinsic::rint, MVT::f64, 7},
1038 {Intrinsic::lrint, MVT::i32, 1},
1039 {Intrinsic::lrint, MVT::i64, 1},
1040 {Intrinsic::llrint, MVT::i64, 1},
1041 {Intrinsic::nearbyint, MVT::f32, 9},
1042 {Intrinsic::nearbyint, MVT::f64, 9},
1043 {Intrinsic::bswap, MVT::i16, 3},
1044 {Intrinsic::bswap, MVT::i32, 12},
1045 {Intrinsic::bswap, MVT::i64, 31},
1046 {Intrinsic::vp_bswap, MVT::i16, 3},
1047 {Intrinsic::vp_bswap, MVT::i32, 12},
1048 {Intrinsic::vp_bswap, MVT::i64, 31},
1049 {Intrinsic::vp_fshl, MVT::i8, 7},
1050 {Intrinsic::vp_fshl, MVT::i16, 7},
1051 {Intrinsic::vp_fshl, MVT::i32, 7},
1052 {Intrinsic::vp_fshl, MVT::i64, 7},
1053 {Intrinsic::vp_fshr, MVT::i8, 7},
1054 {Intrinsic::vp_fshr, MVT::i16, 7},
1055 {Intrinsic::vp_fshr, MVT::i32, 7},
1056 {Intrinsic::vp_fshr, MVT::i64, 7},
1057 {Intrinsic::bitreverse, MVT::i8, 17},
1058 {Intrinsic::bitreverse, MVT::i16, 24},
1059 {Intrinsic::bitreverse, MVT::i32, 33},
1060 {Intrinsic::bitreverse, MVT::i64, 52},
1061 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1062 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1063 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1064 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1065 {Intrinsic::ctpop, MVT::i8, 12},
1066 {Intrinsic::ctpop, MVT::i16, 19},
1067 {Intrinsic::ctpop, MVT::i32, 20},
1068 {Intrinsic::ctpop, MVT::i64, 21},
1069 {Intrinsic::ctlz, MVT::i8, 19},
1070 {Intrinsic::ctlz, MVT::i16, 28},
1071 {Intrinsic::ctlz, MVT::i32, 31},
1072 {Intrinsic::ctlz, MVT::i64, 35},
1073 {Intrinsic::cttz, MVT::i8, 16},
1074 {Intrinsic::cttz, MVT::i16, 23},
1075 {Intrinsic::cttz, MVT::i32, 24},
1076 {Intrinsic::cttz, MVT::i64, 25},
1077 {Intrinsic::vp_ctpop, MVT::i8, 12},
1078 {Intrinsic::vp_ctpop, MVT::i16, 19},
1079 {Intrinsic::vp_ctpop, MVT::i32, 20},
1080 {Intrinsic::vp_ctpop, MVT::i64, 21},
1081 {Intrinsic::vp_ctlz, MVT::i8, 19},
1082 {Intrinsic::vp_ctlz, MVT::i16, 28},
1083 {Intrinsic::vp_ctlz, MVT::i32, 31},
1084 {Intrinsic::vp_ctlz, MVT::i64, 35},
1085 {Intrinsic::vp_cttz, MVT::i8, 16},
1086 {Intrinsic::vp_cttz, MVT::i16, 23},
1087 {Intrinsic::vp_cttz, MVT::i32, 24},
1088 {Intrinsic::vp_cttz, MVT::i64, 25},
1093#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
1094 case Intrinsic::VPID: \
1096#include "llvm/IR/VPIntrinsics.def"
1097#undef HELPER_MAP_VPID_TO_VPSD
1106 switch (ICA.
getID()) {
1107 case Intrinsic::lrint:
1108 case Intrinsic::llrint:
1110 if (
auto *VecTy = dyn_cast<VectorType>(ICA.
getArgTypes()[0]);
1111 VecTy && VecTy->getElementType()->is16bitFPTy())
1114 case Intrinsic::ceil:
1115 case Intrinsic::floor:
1116 case Intrinsic::trunc:
1117 case Intrinsic::rint:
1118 case Intrinsic::round:
1119 case Intrinsic::roundeven: {
1123 return LT.first * 8;
1126 case Intrinsic::umin:
1127 case Intrinsic::umax:
1128 case Intrinsic::smin:
1129 case Intrinsic::smax: {
1131 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1136 switch (ICA.
getID()) {
1137 case Intrinsic::umin:
1138 Op = RISCV::VMINU_VV;
1140 case Intrinsic::umax:
1141 Op = RISCV::VMAXU_VV;
1143 case Intrinsic::smin:
1144 Op = RISCV::VMIN_VV;
1146 case Intrinsic::smax:
1147 Op = RISCV::VMAX_VV;
1150 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1154 case Intrinsic::sadd_sat:
1155 case Intrinsic::ssub_sat:
1156 case Intrinsic::uadd_sat:
1157 case Intrinsic::usub_sat: {
1161 switch (ICA.
getID()) {
1162 case Intrinsic::sadd_sat:
1163 Op = RISCV::VSADD_VV;
1165 case Intrinsic::ssub_sat:
1166 Op = RISCV::VSSUBU_VV;
1168 case Intrinsic::uadd_sat:
1169 Op = RISCV::VSADDU_VV;
1171 case Intrinsic::usub_sat:
1172 Op = RISCV::VSSUBU_VV;
1175 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1179 case Intrinsic::fma:
1180 case Intrinsic::fmuladd: {
1185 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second,
CostKind);
1188 case Intrinsic::fabs: {
1196 if (LT.second.getVectorElementType() == MVT::bf16 ||
1197 (LT.second.getVectorElementType() == MVT::f16 &&
1199 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1204 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1208 case Intrinsic::sqrt: {
1213 MVT ConvType = LT.second;
1214 MVT FsqrtType = LT.second;
1217 if (LT.second.getVectorElementType() == MVT::bf16) {
1218 if (LT.second == MVT::nxv32bf16) {
1219 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1220 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1221 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1222 ConvType = MVT::nxv16f16;
1223 FsqrtType = MVT::nxv16f32;
1225 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1226 FsqrtOp = {RISCV::VFSQRT_V};
1229 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1231 if (LT.second == MVT::nxv32f16) {
1232 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1233 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1234 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1235 ConvType = MVT::nxv16f16;
1236 FsqrtType = MVT::nxv16f32;
1238 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1239 FsqrtOp = {RISCV::VFSQRT_V};
1243 FsqrtOp = {RISCV::VFSQRT_V};
1246 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1247 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1251 case Intrinsic::cttz:
1252 case Intrinsic::ctlz:
1253 case Intrinsic::ctpop: {
1255 if (ST->
hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) {
1257 switch (ICA.
getID()) {
1258 case Intrinsic::cttz:
1261 case Intrinsic::ctlz:
1264 case Intrinsic::ctpop:
1265 Op = RISCV::VCPOP_V;
1268 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1272 case Intrinsic::abs: {
1278 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1283 case Intrinsic::get_active_lane_mask: {
1293 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1299 case Intrinsic::stepvector: {
1304 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1306 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1307 return 1 + (LT.first - 1);
1309 case Intrinsic::experimental_cttz_elts: {
1321 cast<ConstantInt>(ICA.
getArgs()[1])->isZero())
1329 case Intrinsic::vp_rint: {
1334 return Cost * LT.first;
1337 case Intrinsic::vp_nearbyint: {
1342 return Cost * LT.first;
1345 case Intrinsic::vp_ceil:
1346 case Intrinsic::vp_floor:
1347 case Intrinsic::vp_round:
1348 case Intrinsic::vp_roundeven:
1349 case Intrinsic::vp_roundtozero: {
1356 return Cost * LT.first;
1359 case Intrinsic::vp_fneg: {
1360 std::optional<unsigned> FOp =
1366 case Intrinsic::vp_select: {
1373 case Intrinsic::vp_merge:
1377 case Intrinsic::experimental_vp_splat: {
1382 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
1387 case Intrinsic::experimental_vp_splice: {
1399 LT.second.isVector()) {
1400 MVT EltTy = LT.second.getVectorElementType();
1402 ICA.
getID(), EltTy))
1403 return LT.first * Entry->Cost;
1415 bool IsVectorType = isa<VectorType>(Dst) && isa<VectorType>(Src);
1423 Dst->getScalarSizeInBits() > ST->
getELen())
1427 assert(ISD &&
"Invalid opcode");
1441 if (Src->getScalarSizeInBits() == 1) {
1446 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1447 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1453 if (Dst->getScalarSizeInBits() == 1) {
1459 return SrcLT.first *
1460 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1472 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1474 SrcLT.second.getSizeInBits()) ||
1476 DstLT.second.getSizeInBits()))
1480 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1482 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1483 (
int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1487 if ((PowDiff < 1) || (PowDiff > 3))
1489 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1490 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1493 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1499 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1500 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1504 : RISCV::VFNCVT_F_F_W;
1506 for (; SrcEltSize != DstEltSize;) {
1512 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1520 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1522 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1524 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1525 unsigned SrcEltSize = Src->getScalarSizeInBits();
1526 unsigned DstEltSize = Dst->getScalarSizeInBits();
1528 if ((SrcEltSize == 16) &&
1534 cast<VectorType>(Dst)->getElementCount());
1535 std::pair<InstructionCost, MVT> VecF32LT =
1538 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1543 if (DstEltSize == SrcEltSize)
1544 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1545 else if (DstEltSize > SrcEltSize)
1546 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1552 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1553 if ((SrcEltSize / 2) > DstEltSize) {
1564 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1565 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1566 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1567 unsigned SrcEltSize = Src->getScalarSizeInBits();
1568 unsigned DstEltSize = Dst->getScalarSizeInBits();
1571 if ((DstEltSize == 16) &&
1577 cast<VectorType>(Dst)->getElementCount());
1578 std::pair<InstructionCost, MVT> VecF32LT =
1581 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1586 if (DstEltSize == SrcEltSize)
1587 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1588 else if (DstEltSize > SrcEltSize) {
1589 if ((DstEltSize / 2) > SrcEltSize) {
1592 cast<VectorType>(Dst)->getElementCount());
1593 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1596 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1598 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
1605unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty) {
1606 if (isa<ScalableVectorType>(Ty)) {
1612 return cast<FixedVectorType>(Ty)->getNumElements();
1631 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1637 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1641 case Intrinsic::maximum:
1643 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1645 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1660 case Intrinsic::minimum:
1662 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1664 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1679 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1688 case Intrinsic::smax:
1689 SplitOp = RISCV::VMAX_VV;
1690 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1692 case Intrinsic::smin:
1693 SplitOp = RISCV::VMIN_VV;
1694 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1696 case Intrinsic::umax:
1697 SplitOp = RISCV::VMAXU_VV;
1698 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1700 case Intrinsic::umin:
1701 SplitOp = RISCV::VMINU_VV;
1702 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1704 case Intrinsic::maxnum:
1705 SplitOp = RISCV::VFMAX_VV;
1706 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1708 case Intrinsic::minnum:
1709 SplitOp = RISCV::VFMIN_VV;
1710 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1715 (LT.first > 1) ? (LT.first - 1) *
1716 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1718 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1723 std::optional<FastMathFlags> FMF,
1733 assert(ISD &&
"Invalid opcode");
1745 if (LT.second == MVT::v1i1)
1746 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
1764 return ((LT.first > 2) ? (LT.first - 2) : 0) *
1765 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
1766 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
1767 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1776 return (LT.first - 1) *
1777 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
1778 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
1786 return (LT.first - 1) *
1787 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
1788 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1801 SplitOp = RISCV::VADD_VV;
1802 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
1805 SplitOp = RISCV::VOR_VV;
1806 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
1809 SplitOp = RISCV::VXOR_VV;
1810 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
1813 SplitOp = RISCV::VAND_VV;
1814 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
1818 if ((LT.second.getVectorElementType() == MVT::f16 &&
1820 LT.second.getVectorElementType() == MVT::bf16)
1824 for (
unsigned i = 0; i < LT.first.getValue(); i++)
1827 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1829 SplitOp = RISCV::VFADD_VV;
1830 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
1835 (LT.first > 1) ? (LT.first - 1) *
1836 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1838 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1842 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1853 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
1859 if (IsUnsigned && Opcode == Instruction::Add &&
1860 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
1864 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
1871 return (LT.first - 1) +
1879 if (!isa<VectorType>(Ty))
1891 return getConstantPoolLoadCost(Ty,
CostKind);
1903 if (VT == MVT::Other)
1908 if (Opcode == Instruction::Store && OpInfo.
isConstant())
1923 if (Src->
isVectorTy() && LT.second.isVector() &&
1925 LT.second.getSizeInBits()))
1937 return Cost + BaseCost;
1947 Op1Info, Op2Info,
I);
1951 Op1Info, Op2Info,
I);
1956 Op1Info, Op2Info,
I);
1958 auto GetConstantMatCost =
1960 if (OpInfo.isUniform())
1965 return getConstantPoolLoadCost(ValTy,
CostKind);
1970 ConstantMatCost += GetConstantMatCost(Op1Info);
1972 ConstantMatCost += GetConstantMatCost(Op2Info);
1975 if (Opcode == Instruction::Select && ValTy->
isVectorTy()) {
1981 return ConstantMatCost +
1983 getRISCVInstructionCost(
1984 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1988 return ConstantMatCost +
1989 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
1999 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2000 return ConstantMatCost +
2002 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2004 LT.first * getRISCVInstructionCost(
2005 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2012 return ConstantMatCost +
2013 LT.first * getRISCVInstructionCost(
2014 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2018 if ((Opcode == Instruction::ICmp) && ValTy->
isVectorTy() &&
2022 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2027 if ((Opcode == Instruction::FCmp) && ValTy->
isVectorTy() &&
2032 return ConstantMatCost +
2033 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
2043 Op1Info, Op2Info,
I);
2052 return ConstantMatCost +
2053 LT.first * getRISCVInstructionCost(
2054 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2061 return ConstantMatCost +
2063 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2072 return ConstantMatCost +
2074 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
2087 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2088 U->getType()->isIntegerTy() &&
2089 !isa<ConstantData>(U->getOperand(1)) &&
2090 !isa<ConstantData>(U->getOperand(2));
2098 Op1Info, Op2Info,
I);
2105 return Opcode == Instruction::PHI ? 0 : 1;
2112 unsigned Index,
Value *Op0,
2116 if (Opcode != Instruction::ExtractElement &&
2117 Opcode != Instruction::InsertElement)
2124 if (!LT.second.isVector()) {
2125 auto *FixedVecTy = cast<FixedVectorType>(Val);
2133 Type *ElemTy = FixedVecTy->getElementType();
2134 auto NumElems = FixedVecTy->getNumElements();
2140 return Opcode == Instruction::ExtractElement
2141 ? StoreCost * NumElems + LoadCost
2142 : (StoreCost + LoadCost) * NumElems + StoreCost;
2146 if (LT.second.isScalableVector() && !LT.first.isValid())
2153 cast<VectorType>(Val)->getElementCount());
2154 if (Opcode == Instruction::ExtractElement) {
2160 return ExtendCost + ExtractCost;
2170 return ExtendCost + InsertCost + TruncCost;
2176 unsigned BaseCost = 1;
2178 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2183 if (LT.second.isFixedLengthVector()) {
2184 unsigned Width = LT.second.getVectorNumElements();
2185 Index = Index % Width;
2191 unsigned EltSize = LT.second.getScalarSizeInBits();
2192 unsigned M1Max = *VLEN / EltSize;
2193 Index = Index % M1Max;
2199 else if (Opcode == Instruction::InsertElement)
2207 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2208 LT.second.isScalableVector()))) {
2216 if (Opcode == Instruction::ExtractElement)
2252 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2254 return BaseCost + SlideCost;
2280 if (!LT.second.isVector())
2288 if ((LT.second.getVectorElementType() == MVT::f16 ||
2289 LT.second.getVectorElementType() == MVT::bf16) &&
2296 CastCost += LT.first * Args.size() *
2304 LT.second = PromotedVT;
2307 auto getConstantMatCost =
2317 return getConstantPoolLoadCost(Ty,
CostKind);
2323 ConstantMatCost += getConstantMatCost(0, Op1Info);
2325 ConstantMatCost += getConstantMatCost(1, Op2Info);
2328 switch (ISDOpcode) {
2331 Op = RISCV::VADD_VV;
2336 Op = RISCV::VSLL_VV;
2346 Op = RISCV::VMUL_VV;
2350 Op = RISCV::VDIV_VV;
2354 Op = RISCV::VREM_VV;
2358 Op = RISCV::VFADD_VV;
2361 Op = RISCV::VFMUL_VV;
2364 Op = RISCV::VFDIV_VV;
2367 Op = RISCV::VFSGNJN_VV;
2372 return CastCost + ConstantMatCost +
2383 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2403 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
2406 if (
Info.isSameBase() && V !=
Base) {
2407 if (
GEP->hasAllConstantIndices())
2414 if (
Info.isUnitStride() &&
2420 GEP->getType()->getPointerAddressSpace()))
2423 {TTI::OK_AnyValue, TTI::OP_None},
2424 {TTI::OK_AnyValue, TTI::OP_None}, {});
2441 if (ST->enableDefaultUnroll())
2451 if (L->getHeader()->getParent()->hasOptSize())
2455 L->getExitingBlocks(ExitingBlocks);
2457 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2458 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2462 if (ExitingBlocks.
size() > 2)
2467 if (L->getNumBlocks() > 4)
2477 for (
auto *BB : L->getBlocks()) {
2478 for (
auto &
I : *BB) {
2481 if (
I.getType()->isVectorTy())
2484 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
2519 Type *EltTy = cast<VectorType>(Ty)->getElementType();
2523 cast<VectorType>(Ty));
2549 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
2555 if (ST->hasVendorXCVmem() && !ST->
is64Bit())
2577 auto *VTy = dyn_cast<VectorType>(DataTy);
2578 if (!VTy || VTy->isScalableTy())
2586 if (VTy->getElementType()->isIntegerTy(8))
2587 if (VTy->getElementCount().getFixedValue() > 256)
2594 auto *VTy = dyn_cast<VectorType>(DataTy);
2595 if (!VTy || VTy->isScalableTy())
2609 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
2610 bool Considerable =
false;
2611 AllowPromotionWithoutCommonHeader =
false;
2612 if (!isa<SExtInst>(&
I))
2614 Type *ConsideredSExtType =
2616 if (
I.getType() != ConsideredSExtType)
2620 for (
const User *U :
I.users()) {
2622 Considerable =
true;
2626 if (GEPInst->getNumOperands() > 2) {
2627 AllowPromotionWithoutCommonHeader =
true;
2632 return Considerable;
2637 case Instruction::Add:
2638 case Instruction::Sub:
2639 case Instruction::Mul:
2640 case Instruction::And:
2641 case Instruction::Or:
2642 case Instruction::Xor:
2643 case Instruction::FAdd:
2644 case Instruction::FSub:
2645 case Instruction::FMul:
2646 case Instruction::FDiv:
2647 case Instruction::ICmp:
2648 case Instruction::FCmp:
2650 case Instruction::Shl:
2651 case Instruction::LShr:
2652 case Instruction::AShr:
2653 case Instruction::UDiv:
2654 case Instruction::SDiv:
2655 case Instruction::URem:
2656 case Instruction::SRem:
2657 case Instruction::Select:
2658 return Operand == 1;
2671 auto *
II = dyn_cast<IntrinsicInst>(
I);
2675 switch (
II->getIntrinsicID()) {
2676 case Intrinsic::fma:
2677 case Intrinsic::vp_fma:
2678 case Intrinsic::fmuladd:
2679 case Intrinsic::vp_fmuladd:
2680 return Operand == 0 || Operand == 1;
2681 case Intrinsic::vp_shl:
2682 case Intrinsic::vp_lshr:
2683 case Intrinsic::vp_ashr:
2684 case Intrinsic::vp_udiv:
2685 case Intrinsic::vp_sdiv:
2686 case Intrinsic::vp_urem:
2687 case Intrinsic::vp_srem:
2688 case Intrinsic::ssub_sat:
2689 case Intrinsic::vp_ssub_sat:
2690 case Intrinsic::usub_sat:
2691 case Intrinsic::vp_usub_sat:
2692 case Intrinsic::vp_select:
2693 return Operand == 1;
2695 case Intrinsic::vp_add:
2696 case Intrinsic::vp_mul:
2697 case Intrinsic::vp_and:
2698 case Intrinsic::vp_or:
2699 case Intrinsic::vp_xor:
2700 case Intrinsic::vp_fadd:
2701 case Intrinsic::vp_fmul:
2702 case Intrinsic::vp_icmp:
2703 case Intrinsic::vp_fcmp:
2704 case Intrinsic::smin:
2705 case Intrinsic::vp_smin:
2706 case Intrinsic::umin:
2707 case Intrinsic::vp_umin:
2708 case Intrinsic::smax:
2709 case Intrinsic::vp_smax:
2710 case Intrinsic::umax:
2711 case Intrinsic::vp_umax:
2712 case Intrinsic::sadd_sat:
2713 case Intrinsic::vp_sadd_sat:
2714 case Intrinsic::uadd_sat:
2715 case Intrinsic::vp_uadd_sat:
2717 case Intrinsic::vp_sub:
2718 case Intrinsic::vp_fsub:
2719 case Intrinsic::vp_fdiv:
2720 return Operand == 0 || Operand == 1;
2741 if (!ST->sinkSplatOperands())
2744 for (
auto OpIdx :
enumerate(
I->operands())) {
2748 Instruction *
Op = dyn_cast<Instruction>(OpIdx.value().get());
2750 if (!
Op ||
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
2759 if (cast<VectorType>(
Op->getType())->getElementType()->isIntegerTy(1))
2764 for (
Use &U :
Op->uses()) {
2770 Use *InsertEltUse = &
Op->getOperandUse(0);
2772 auto *InsertElt = cast<InsertElementInst>(InsertEltUse);
2773 if (isa<FPExtInst>(InsertElt->getOperand(1)))
2774 Ops.
push_back(&InsertElt->getOperandUse(1));
2786 if (!ST->enableUnalignedScalarMem())
2789 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
2792 Options.AllowOverlappingLoads =
true;
2796 Options.LoadSizes = {8, 4, 2, 1};
2797 Options.AllowedTailExpansions = {3, 5, 6};
2799 Options.LoadSizes = {4, 2, 1};
2800 Options.AllowedTailExpansions = {3};
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getMaxVScale() const
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
unsigned getRegUsageForType(Type *Ty)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
bool isLegalAddImmediate(int64_t imm)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsF64() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
unsigned getRealMaxVLen() const
bool hasVInstructionsF32() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
std::optional< unsigned > getMaxVScale() const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *Src, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool shouldExpandReduction(const IntrinsicInst *II) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)
Return the cost of materializing an immediate for a value operand of a store instruction.
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment)
bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getRegUsageForType(Type *Ty)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
MVT getContainerForFixedLengthVector(MVT VT) const
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVII::VLMUL getLMUL(MVT VT)
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
MVT getTypeToPromoteTo(unsigned Op, MVT VT) const
If the action for this operation is to promote, this method returns the ValueType to promote to.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static std::optional< unsigned > getFunctionalOpcodeForVP(Intrinsic::ID ID)
LLVM Value Representation.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
OutputIt copy(R &&Range, OutputIt Out)
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
This struct is a compact representation of a valid (non-zero power of two) alignment.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.