23#include "llvm/IR/IntrinsicsAArch64.h"
34#define DEBUG_TYPE "aarch64tti"
40 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
58 "Penalty of calling a function that requires a change to PSTATE.SM"));
62 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
73 cl::desc(
"The cost of a histcnt instruction"));
77 cl::desc(
"The number of instructions to search for a redundant dmb"));
80class TailFoldingOption {
95 bool NeedsDefault =
true;
99 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
114 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
115 "Initial bits should only include one of "
116 "(disabled|all|simple|default)");
117 Bits = NeedsDefault ? DefaultBits : InitialBits;
119 Bits &= ~DisableBits;
125 errs() <<
"invalid argument '" << Opt
126 <<
"' to -sve-tail-folding=; the option should be of the form\n"
127 " (disabled|all|default|simple)[+(reductions|recurrences"
128 "|reverse|noreductions|norecurrences|noreverse)]\n";
134 void operator=(
const std::string &Val) {
143 setNeedsDefault(
false);
148 unsigned StartIdx = 1;
149 if (TailFoldTypes[0] ==
"disabled")
150 setInitialBits(TailFoldingOpts::Disabled);
151 else if (TailFoldTypes[0] ==
"all")
152 setInitialBits(TailFoldingOpts::All);
153 else if (TailFoldTypes[0] ==
"default")
154 setNeedsDefault(
true);
155 else if (TailFoldTypes[0] ==
"simple")
156 setInitialBits(TailFoldingOpts::Simple);
159 setInitialBits(TailFoldingOpts::Disabled);
162 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
163 if (TailFoldTypes[
I] ==
"reductions")
164 setEnableBit(TailFoldingOpts::Reductions);
165 else if (TailFoldTypes[
I] ==
"recurrences")
166 setEnableBit(TailFoldingOpts::Recurrences);
167 else if (TailFoldTypes[
I] ==
"reverse")
168 setEnableBit(TailFoldingOpts::Reverse);
169 else if (TailFoldTypes[
I] ==
"noreductions")
170 setDisableBit(TailFoldingOpts::Reductions);
171 else if (TailFoldTypes[
I] ==
"norecurrences")
172 setDisableBit(TailFoldingOpts::Recurrences);
173 else if (TailFoldTypes[
I] ==
"noreverse")
174 setDisableBit(TailFoldingOpts::Reverse);
191 "Control the use of vectorisation using tail-folding for SVE where the"
192 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
193 "\ndisabled (Initial) No loop types will vectorize using "
195 "\ndefault (Initial) Uses the default tail-folding settings for "
197 "\nall (Initial) All legal loop types will vectorize using "
199 "\nsimple (Initial) Use tail-folding for simple loops (not "
200 "reductions or recurrences)"
201 "\nreductions Use tail-folding for loops containing reductions"
202 "\nnoreductions Inverse of above"
203 "\nrecurrences Use tail-folding for loops containing fixed order "
205 "\nnorecurrences Inverse of above"
206 "\nreverse Use tail-folding for loops requiring reversed "
208 "\nnoreverse Inverse of above"),
240 if (isa<CallInst>(
I) && !
I.isDebugOrPseudoInst() &&
241 (cast<CallInst>(
I).isInlineAsm() || isa<IntrinsicInst>(
I) ||
252 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
254 FeatureStr.
split(Features,
",");
259 return F.hasFnAttribute(
"fmv-features");
263 AArch64::FeatureExecuteOnly,
296 TM.getSubtargetImpl(*Caller)->getFeatureBits();
298 TM.getSubtargetImpl(*Callee)->getFeatureBits();
303 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
304 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
306 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
324 auto FVTy = dyn_cast<FixedVectorType>(Ty);
326 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
335 unsigned DefaultCallPenalty)
const {
360 if (
F == Call.getCaller())
366 return DefaultCallPenalty;
406 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
411 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
417 return std::max<InstructionCost>(1,
Cost);
432 unsigned ImmIdx = ~0U;
436 case Instruction::GetElementPtr:
441 case Instruction::Store:
444 case Instruction::Add:
445 case Instruction::Sub:
446 case Instruction::Mul:
447 case Instruction::UDiv:
448 case Instruction::SDiv:
449 case Instruction::URem:
450 case Instruction::SRem:
451 case Instruction::And:
452 case Instruction::Or:
453 case Instruction::Xor:
454 case Instruction::ICmp:
458 case Instruction::Shl:
459 case Instruction::LShr:
460 case Instruction::AShr:
464 case Instruction::Trunc:
465 case Instruction::ZExt:
466 case Instruction::SExt:
467 case Instruction::IntToPtr:
468 case Instruction::PtrToInt:
469 case Instruction::BitCast:
470 case Instruction::PHI:
471 case Instruction::Call:
472 case Instruction::Select:
473 case Instruction::Ret:
474 case Instruction::Load:
479 int NumConstants = (BitSize + 63) / 64;
503 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
509 case Intrinsic::sadd_with_overflow:
510 case Intrinsic::uadd_with_overflow:
511 case Intrinsic::ssub_with_overflow:
512 case Intrinsic::usub_with_overflow:
513 case Intrinsic::smul_with_overflow:
514 case Intrinsic::umul_with_overflow:
516 int NumConstants = (BitSize + 63) / 64;
523 case Intrinsic::experimental_stackmap:
524 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
527 case Intrinsic::experimental_patchpoint_void:
528 case Intrinsic::experimental_patchpoint:
529 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
532 case Intrinsic::experimental_gc_statepoint:
533 if ((
Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
543 if (TyWidth == 32 || TyWidth == 64)
567 unsigned TotalHistCnts = 1;
576 if (
VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy)) {
577 unsigned EC = VTy->getElementCount().getKnownMinValue();
582 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
584 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
588 TotalHistCnts = EC / NaturalVectorWidth;
604 if (
auto *VTy = dyn_cast<ScalableVectorType>(
RetTy))
608 switch (ICA.
getID()) {
609 case Intrinsic::experimental_vector_histogram_add: {
616 case Intrinsic::umin:
617 case Intrinsic::umax:
618 case Intrinsic::smin:
619 case Intrinsic::smax: {
620 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
621 MVT::v8i16, MVT::v2i32, MVT::v4i32,
622 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
626 if (LT.second == MVT::v2i64)
628 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
632 case Intrinsic::sadd_sat:
633 case Intrinsic::ssub_sat:
634 case Intrinsic::uadd_sat:
635 case Intrinsic::usub_sat: {
636 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
637 MVT::v8i16, MVT::v2i32, MVT::v4i32,
643 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits() ? 1 : 4;
644 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
645 return LT.first * Instrs;
651 return LT.first * Instrs;
655 case Intrinsic::abs: {
656 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
657 MVT::v8i16, MVT::v2i32, MVT::v4i32,
660 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
664 case Intrinsic::bswap: {
665 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
666 MVT::v4i32, MVT::v2i64};
668 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
669 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits())
674 case Intrinsic::fmuladd: {
679 (EltTy->
isHalfTy() && ST->hasFullFP16()))
683 case Intrinsic::stepvector: {
692 Cost += AddCost * (LT.first - 1);
696 case Intrinsic::vector_extract:
697 case Intrinsic::vector_insert: {
710 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
729 case Intrinsic::bitreverse: {
731 {Intrinsic::bitreverse, MVT::i32, 1},
732 {Intrinsic::bitreverse, MVT::i64, 1},
733 {Intrinsic::bitreverse, MVT::v8i8, 1},
734 {Intrinsic::bitreverse, MVT::v16i8, 1},
735 {Intrinsic::bitreverse, MVT::v4i16, 2},
736 {Intrinsic::bitreverse, MVT::v8i16, 2},
737 {Intrinsic::bitreverse, MVT::v2i32, 2},
738 {Intrinsic::bitreverse, MVT::v4i32, 2},
739 {Intrinsic::bitreverse, MVT::v1i64, 2},
740 {Intrinsic::bitreverse, MVT::v2i64, 2},
750 return LegalisationCost.first * Entry->Cost + 1;
752 return LegalisationCost.first * Entry->Cost;
756 case Intrinsic::ctpop: {
757 if (!ST->hasNEON()) {
778 RetTy->getScalarSizeInBits()
781 return LT.first * Entry->Cost + ExtraCost;
785 case Intrinsic::sadd_with_overflow:
786 case Intrinsic::uadd_with_overflow:
787 case Intrinsic::ssub_with_overflow:
788 case Intrinsic::usub_with_overflow:
789 case Intrinsic::smul_with_overflow:
790 case Intrinsic::umul_with_overflow: {
792 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
793 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
794 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
795 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
796 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
797 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
798 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
799 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
800 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
801 {Intrinsic::usub_with_overflow, MVT::i8, 3},
802 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
803 {Intrinsic::usub_with_overflow, MVT::i16, 3},
804 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
805 {Intrinsic::usub_with_overflow, MVT::i32, 1},
806 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
807 {Intrinsic::usub_with_overflow, MVT::i64, 1},
808 {Intrinsic::smul_with_overflow, MVT::i8, 5},
809 {Intrinsic::umul_with_overflow, MVT::i8, 4},
810 {Intrinsic::smul_with_overflow, MVT::i16, 5},
811 {Intrinsic::umul_with_overflow, MVT::i16, 4},
812 {Intrinsic::smul_with_overflow, MVT::i32, 2},
813 {Intrinsic::umul_with_overflow, MVT::i32, 2},
814 {Intrinsic::smul_with_overflow, MVT::i64, 3},
815 {Intrinsic::umul_with_overflow, MVT::i64, 3},
824 case Intrinsic::fptosi_sat:
825 case Intrinsic::fptoui_sat: {
828 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
833 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
834 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
835 LT.second == MVT::v2f64)) {
837 (LT.second == MVT::f64 && MTy == MVT::i32) ||
838 (LT.second == MVT::f32 && MTy == MVT::i64)))
847 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
854 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
855 (LT.second == MVT::f16 && MTy == MVT::i64) ||
856 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
870 if ((LT.second.getScalarType() == MVT::f32 ||
871 LT.second.getScalarType() == MVT::f64 ||
872 LT.second.getScalarType() == MVT::f16) &&
876 if (LT.second.isVector())
880 LegalTy, {LegalTy, LegalTy});
883 LegalTy, {LegalTy, LegalTy});
885 return LT.first *
Cost +
886 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
893 if (LT.second.isVector()) {
905 Type *CondTy =
RetTy->getWithNewBitWidth(1);
911 return LT.first *
Cost;
913 case Intrinsic::fshl:
914 case Intrinsic::fshr: {
926 {Intrinsic::fshl, MVT::v4i32, 2},
927 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
928 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
929 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
935 return LegalisationCost.first * Entry->Cost;
939 if (!
RetTy->isIntegerTy())
944 bool HigherCost = (
RetTy->getScalarSizeInBits() != 32 &&
945 RetTy->getScalarSizeInBits() < 64) ||
946 (
RetTy->getScalarSizeInBits() % 64 != 0);
947 unsigned ExtraCost = HigherCost ? 1 : 0;
948 if (
RetTy->getScalarSizeInBits() == 32 ||
949 RetTy->getScalarSizeInBits() == 64)
956 return TyL.first + ExtraCost;
958 case Intrinsic::get_active_lane_mask: {
963 if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
974 return RetTy->getNumElements() * 2;
979 case Intrinsic::experimental_vector_match: {
980 auto *NeedleTy = cast<FixedVectorType>(ICA.
getArgTypes()[1]);
982 unsigned SearchSize = NeedleTy->getNumElements();
983 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
990 if (isa<FixedVectorType>(
RetTy))
996 case Intrinsic::experimental_cttz_elts: {
998 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1017 auto RequiredType =
II.getType();
1019 auto *PN = dyn_cast<PHINode>(
II.getArgOperand(0));
1020 assert(PN &&
"Expected Phi Node!");
1023 if (!PN->hasOneUse())
1024 return std::nullopt;
1026 for (
Value *IncValPhi : PN->incoming_values()) {
1027 auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
1029 Reinterpret->getIntrinsicID() !=
1030 Intrinsic::aarch64_sve_convert_to_svbool ||
1031 RequiredType != Reinterpret->getArgOperand(0)->getType())
1032 return std::nullopt;
1040 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1041 auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(
I));
1042 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1108 explicit operator bool()
const {
return hasGoverningPredicate(); }
1115 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1119 assert(hasGoverningPredicate() &&
"Propery not set!");
1120 return GoverningPredicateIdx;
1124 assert(!hasGoverningPredicate() &&
"Cannot set property twice!");
1125 GoverningPredicateIdx =
Index;
1142 assert(hasMatchingUndefIntrinsic() &&
"Propery not set!");
1143 return UndefIntrinsic;
1147 assert(!hasMatchingUndefIntrinsic() &&
"Cannot set property twice!");
1148 UndefIntrinsic = IID;
1155 assert(hasMatchingIROpode() &&
"Propery not set!");
1160 assert(!hasMatchingIROpode() &&
"Cannot set property twice!");
1170 return ResultLanes == InactiveLanesTakenFromOperand;
1174 assert(inactiveLanesTakenFromOperand() &&
"Propery not set!");
1175 return OperandIdxForInactiveLanes;
1180 ResultLanes = InactiveLanesTakenFromOperand;
1181 OperandIdxForInactiveLanes =
Index;
1186 return ResultLanes == InactiveLanesAreNotDefined;
1191 ResultLanes = InactiveLanesAreNotDefined;
1196 return ResultLanes == InactiveLanesAreUnused;
1201 ResultLanes = InactiveLanesAreUnused;
1211 ResultIsZeroInitialized =
true;
1222 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1226 assert(hasOperandWithNoActiveLanes() &&
"Propery not set!");
1227 return OperandIdxWithNoActiveLanes;
1231 assert(!hasOperandWithNoActiveLanes() &&
"Cannot set property twice!");
1232 OperandIdxWithNoActiveLanes =
Index;
1237 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1240 unsigned IROpcode = 0;
1242 enum PredicationStyle {
1244 InactiveLanesTakenFromOperand,
1245 InactiveLanesAreNotDefined,
1246 InactiveLanesAreUnused
1249 bool ResultIsZeroInitialized =
false;
1250 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1251 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1257 if (!isa<ScalableVectorType>(
II.getType()) &&
1259 return !isa<ScalableVectorType>(V->getType());
1267 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1268 case Intrinsic::aarch64_sve_fcvt_f16f32:
1269 case Intrinsic::aarch64_sve_fcvt_f16f64:
1270 case Intrinsic::aarch64_sve_fcvt_f32f16:
1271 case Intrinsic::aarch64_sve_fcvt_f32f64:
1272 case Intrinsic::aarch64_sve_fcvt_f64f16:
1273 case Intrinsic::aarch64_sve_fcvt_f64f32:
1274 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1275 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1276 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1277 case Intrinsic::aarch64_sve_fcvtzs:
1278 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1279 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1280 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1281 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1282 case Intrinsic::aarch64_sve_fcvtzu:
1283 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1284 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1285 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1286 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1287 case Intrinsic::aarch64_sve_scvtf:
1288 case Intrinsic::aarch64_sve_scvtf_f16i32:
1289 case Intrinsic::aarch64_sve_scvtf_f16i64:
1290 case Intrinsic::aarch64_sve_scvtf_f32i64:
1291 case Intrinsic::aarch64_sve_scvtf_f64i32:
1292 case Intrinsic::aarch64_sve_ucvtf:
1293 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1294 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1295 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1296 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1299 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1300 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1301 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1302 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1305 case Intrinsic::aarch64_sve_fabd:
1307 case Intrinsic::aarch64_sve_fadd:
1310 case Intrinsic::aarch64_sve_fdiv:
1313 case Intrinsic::aarch64_sve_fmax:
1315 case Intrinsic::aarch64_sve_fmaxnm:
1317 case Intrinsic::aarch64_sve_fmin:
1319 case Intrinsic::aarch64_sve_fminnm:
1321 case Intrinsic::aarch64_sve_fmla:
1323 case Intrinsic::aarch64_sve_fmls:
1325 case Intrinsic::aarch64_sve_fmul:
1328 case Intrinsic::aarch64_sve_fmulx:
1330 case Intrinsic::aarch64_sve_fnmla:
1332 case Intrinsic::aarch64_sve_fnmls:
1334 case Intrinsic::aarch64_sve_fsub:
1337 case Intrinsic::aarch64_sve_add:
1340 case Intrinsic::aarch64_sve_mla:
1342 case Intrinsic::aarch64_sve_mls:
1344 case Intrinsic::aarch64_sve_mul:
1347 case Intrinsic::aarch64_sve_sabd:
1349 case Intrinsic::aarch64_sve_sdiv:
1352 case Intrinsic::aarch64_sve_smax:
1354 case Intrinsic::aarch64_sve_smin:
1356 case Intrinsic::aarch64_sve_smulh:
1358 case Intrinsic::aarch64_sve_sub:
1361 case Intrinsic::aarch64_sve_uabd:
1363 case Intrinsic::aarch64_sve_udiv:
1366 case Intrinsic::aarch64_sve_umax:
1368 case Intrinsic::aarch64_sve_umin:
1370 case Intrinsic::aarch64_sve_umulh:
1372 case Intrinsic::aarch64_sve_asr:
1375 case Intrinsic::aarch64_sve_lsl:
1378 case Intrinsic::aarch64_sve_lsr:
1381 case Intrinsic::aarch64_sve_and:
1384 case Intrinsic::aarch64_sve_bic:
1386 case Intrinsic::aarch64_sve_eor:
1389 case Intrinsic::aarch64_sve_orr:
1392 case Intrinsic::aarch64_sve_sqsub:
1394 case Intrinsic::aarch64_sve_uqsub:
1397 case Intrinsic::aarch64_sve_add_u:
1400 case Intrinsic::aarch64_sve_and_u:
1403 case Intrinsic::aarch64_sve_asr_u:
1406 case Intrinsic::aarch64_sve_eor_u:
1409 case Intrinsic::aarch64_sve_fadd_u:
1412 case Intrinsic::aarch64_sve_fdiv_u:
1415 case Intrinsic::aarch64_sve_fmul_u:
1418 case Intrinsic::aarch64_sve_fsub_u:
1421 case Intrinsic::aarch64_sve_lsl_u:
1424 case Intrinsic::aarch64_sve_lsr_u:
1427 case Intrinsic::aarch64_sve_mul_u:
1430 case Intrinsic::aarch64_sve_orr_u:
1433 case Intrinsic::aarch64_sve_sdiv_u:
1436 case Intrinsic::aarch64_sve_sub_u:
1439 case Intrinsic::aarch64_sve_udiv_u:
1443 case Intrinsic::aarch64_sve_addqv:
1444 case Intrinsic::aarch64_sve_and_z:
1445 case Intrinsic::aarch64_sve_bic_z:
1446 case Intrinsic::aarch64_sve_brka_z:
1447 case Intrinsic::aarch64_sve_brkb_z:
1448 case Intrinsic::aarch64_sve_brkn_z:
1449 case Intrinsic::aarch64_sve_brkpa_z:
1450 case Intrinsic::aarch64_sve_brkpb_z:
1451 case Intrinsic::aarch64_sve_cntp:
1452 case Intrinsic::aarch64_sve_compact:
1453 case Intrinsic::aarch64_sve_eor_z:
1454 case Intrinsic::aarch64_sve_eorv:
1455 case Intrinsic::aarch64_sve_eorqv:
1456 case Intrinsic::aarch64_sve_nand_z:
1457 case Intrinsic::aarch64_sve_nor_z:
1458 case Intrinsic::aarch64_sve_orn_z:
1459 case Intrinsic::aarch64_sve_orr_z:
1460 case Intrinsic::aarch64_sve_orv:
1461 case Intrinsic::aarch64_sve_orqv:
1462 case Intrinsic::aarch64_sve_pnext:
1463 case Intrinsic::aarch64_sve_rdffr_z:
1464 case Intrinsic::aarch64_sve_saddv:
1465 case Intrinsic::aarch64_sve_uaddv:
1466 case Intrinsic::aarch64_sve_umaxv:
1467 case Intrinsic::aarch64_sve_umaxqv:
1468 case Intrinsic::aarch64_sve_cmpeq:
1469 case Intrinsic::aarch64_sve_cmpeq_wide:
1470 case Intrinsic::aarch64_sve_cmpge:
1471 case Intrinsic::aarch64_sve_cmpge_wide:
1472 case Intrinsic::aarch64_sve_cmpgt:
1473 case Intrinsic::aarch64_sve_cmpgt_wide:
1474 case Intrinsic::aarch64_sve_cmphi:
1475 case Intrinsic::aarch64_sve_cmphi_wide:
1476 case Intrinsic::aarch64_sve_cmphs:
1477 case Intrinsic::aarch64_sve_cmphs_wide:
1478 case Intrinsic::aarch64_sve_cmple_wide:
1479 case Intrinsic::aarch64_sve_cmplo_wide:
1480 case Intrinsic::aarch64_sve_cmpls_wide:
1481 case Intrinsic::aarch64_sve_cmplt_wide:
1482 case Intrinsic::aarch64_sve_cmpne:
1483 case Intrinsic::aarch64_sve_cmpne_wide:
1484 case Intrinsic::aarch64_sve_facge:
1485 case Intrinsic::aarch64_sve_facgt:
1486 case Intrinsic::aarch64_sve_fcmpeq:
1487 case Intrinsic::aarch64_sve_fcmpge:
1488 case Intrinsic::aarch64_sve_fcmpgt:
1489 case Intrinsic::aarch64_sve_fcmpne:
1490 case Intrinsic::aarch64_sve_fcmpuo:
1491 case Intrinsic::aarch64_sve_ld1:
1492 case Intrinsic::aarch64_sve_ld1_gather:
1493 case Intrinsic::aarch64_sve_ld1_gather_index:
1494 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1495 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1496 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1497 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1498 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1499 case Intrinsic::aarch64_sve_ld1q_gather_index:
1500 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1501 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1502 case Intrinsic::aarch64_sve_ld1ro:
1503 case Intrinsic::aarch64_sve_ld1rq:
1504 case Intrinsic::aarch64_sve_ld1udq:
1505 case Intrinsic::aarch64_sve_ld1uwq:
1506 case Intrinsic::aarch64_sve_ld2_sret:
1507 case Intrinsic::aarch64_sve_ld2q_sret:
1508 case Intrinsic::aarch64_sve_ld3_sret:
1509 case Intrinsic::aarch64_sve_ld3q_sret:
1510 case Intrinsic::aarch64_sve_ld4_sret:
1511 case Intrinsic::aarch64_sve_ld4q_sret:
1512 case Intrinsic::aarch64_sve_ldff1:
1513 case Intrinsic::aarch64_sve_ldff1_gather:
1514 case Intrinsic::aarch64_sve_ldff1_gather_index:
1515 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1516 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1517 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1518 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1519 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1520 case Intrinsic::aarch64_sve_ldnf1:
1521 case Intrinsic::aarch64_sve_ldnt1:
1522 case Intrinsic::aarch64_sve_ldnt1_gather:
1523 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1524 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1525 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1528 case Intrinsic::aarch64_sve_prf:
1529 case Intrinsic::aarch64_sve_prfb_gather_index:
1530 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1531 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1532 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1533 case Intrinsic::aarch64_sve_prfd_gather_index:
1534 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1535 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1536 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1537 case Intrinsic::aarch64_sve_prfh_gather_index:
1538 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1539 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1540 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1541 case Intrinsic::aarch64_sve_prfw_gather_index:
1542 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1543 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1544 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1547 case Intrinsic::aarch64_sve_st1_scatter:
1548 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1549 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1550 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1551 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1552 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1553 case Intrinsic::aarch64_sve_st1dq:
1554 case Intrinsic::aarch64_sve_st1q_scatter_index:
1555 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1556 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1557 case Intrinsic::aarch64_sve_st1wq:
1558 case Intrinsic::aarch64_sve_stnt1:
1559 case Intrinsic::aarch64_sve_stnt1_scatter:
1560 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1561 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1562 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1564 case Intrinsic::aarch64_sve_st2:
1565 case Intrinsic::aarch64_sve_st2q:
1567 case Intrinsic::aarch64_sve_st3:
1568 case Intrinsic::aarch64_sve_st3q:
1570 case Intrinsic::aarch64_sve_st4:
1571 case Intrinsic::aarch64_sve_st4q:
1580 Value *UncastedPred;
1581 if (
match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
1582 m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
1586 if (cast<ScalableVectorType>(Pred->
getType())->getMinNumElements() <=
1587 cast<ScalableVectorType>(UncastedPred->
getType())->getMinNumElements())
1588 Pred = UncastedPred;
1589 auto *
C = dyn_cast<Constant>(Pred);
1590 return (
C &&
C->isAllOnesValue());
1596 auto *Dup = dyn_cast<IntrinsicInst>(V);
1597 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1598 Dup->getOperand(1) == Pg && isa<Constant>(Dup->getOperand(2)))
1600 cast<VectorType>(V->getType())->getElementCount(),
1601 cast<Constant>(Dup->getOperand(2)));
1606static std::optional<Instruction *>
1613 Value *Op1 =
II.getOperand(1);
1614 Value *Op2 =
II.getOperand(2);
1619 isa<Constant>(Op1) && !isa<Constant>(Op2)) {
1630 if (
auto FII = dyn_cast<FPMathOperator>(&
II))
1639 if (!SimpleII || isa<UndefValue>(SimpleII))
1640 return std::nullopt;
1648 if (SimpleII == Inactive)
1658static std::optional<Instruction *>
1662 return std::nullopt;
1684 if (!isa<UndefValue>(
II.getOperand(
OpIdx)))
1691 II.setCalledFunction(NewDecl);
1701 return std::nullopt;
1713static std::optional<Instruction *>
1715 auto BinOp = dyn_cast<IntrinsicInst>(
II.getOperand(0));
1717 return std::nullopt;
1719 auto IntrinsicID = BinOp->getIntrinsicID();
1720 switch (IntrinsicID) {
1721 case Intrinsic::aarch64_sve_and_z:
1722 case Intrinsic::aarch64_sve_bic_z:
1723 case Intrinsic::aarch64_sve_eor_z:
1724 case Intrinsic::aarch64_sve_nand_z:
1725 case Intrinsic::aarch64_sve_nor_z:
1726 case Intrinsic::aarch64_sve_orn_z:
1727 case Intrinsic::aarch64_sve_orr_z:
1730 return std::nullopt;
1733 auto BinOpPred = BinOp->getOperand(0);
1734 auto BinOpOp1 = BinOp->getOperand(1);
1735 auto BinOpOp2 = BinOp->getOperand(2);
1737 auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
1739 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1740 return std::nullopt;
1742 auto PredOp = PredIntr->getOperand(0);
1743 auto PredOpTy = cast<VectorType>(PredOp->getType());
1744 if (PredOpTy !=
II.getType())
1745 return std::nullopt;
1749 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1750 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1751 if (BinOpOp1 == BinOpOp2)
1752 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1755 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1757 auto NarrowedBinOp =
1762static std::optional<Instruction *>
1765 if (isa<PHINode>(
II.getArgOperand(0)))
1769 return BinOpCombine;
1772 if (isa<TargetExtType>(
II.getArgOperand(0)->getType()) ||
1773 isa<TargetExtType>(
II.getType()))
1774 return std::nullopt;
1777 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1779 const auto *IVTy = cast<VectorType>(
II.getType());
1785 const auto *CursorVTy = cast<VectorType>(Cursor->
getType());
1786 if (CursorVTy->getElementCount().getKnownMinValue() <
1787 IVTy->getElementCount().getKnownMinValue())
1791 if (Cursor->
getType() == IVTy)
1792 EarliestReplacement = Cursor;
1794 auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
1797 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1798 Intrinsic::aarch64_sve_convert_to_svbool ||
1799 IntrinsicCursor->getIntrinsicID() ==
1800 Intrinsic::aarch64_sve_convert_from_svbool))
1803 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1804 Cursor = IntrinsicCursor->getOperand(0);
1809 if (!EarliestReplacement)
1810 return std::nullopt;
1818 auto *OpPredicate =
II.getOperand(0);
1831 return std::nullopt;
1834 return std::nullopt;
1836 const auto PTruePattern =
1837 cast<ConstantInt>(Pg->
getOperand(0))->getZExtValue();
1838 if (PTruePattern != AArch64SVEPredPattern::vl1)
1839 return std::nullopt;
1844 II.getArgOperand(0),
II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1845 Insert->insertBefore(
II.getIterator());
1846 Insert->takeName(&
II);
1854 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1856 II.getArgOperand(0));
1866 return std::nullopt;
1871 if (!SplatValue || !SplatValue->isZero())
1872 return std::nullopt;
1875 auto *DupQLane = dyn_cast<IntrinsicInst>(
II.getArgOperand(1));
1877 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1878 return std::nullopt;
1881 auto *DupQLaneIdx = dyn_cast<ConstantInt>(DupQLane->getArgOperand(1));
1882 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1883 return std::nullopt;
1885 auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
1886 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1887 return std::nullopt;
1891 if (!isa<UndefValue>(VecIns->getArgOperand(0)))
1892 return std::nullopt;
1894 if (!cast<ConstantInt>(VecIns->getArgOperand(2))->isZero())
1895 return std::nullopt;
1897 auto *ConstVec = dyn_cast<Constant>(VecIns->getArgOperand(1));
1899 return std::nullopt;
1901 auto *VecTy = dyn_cast<FixedVectorType>(ConstVec->getType());
1902 auto *OutTy = dyn_cast<ScalableVectorType>(
II.getType());
1903 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1904 return std::nullopt;
1906 unsigned NumElts = VecTy->getNumElements();
1907 unsigned PredicateBits = 0;
1910 for (
unsigned I = 0;
I < NumElts; ++
I) {
1911 auto *Arg = dyn_cast<ConstantInt>(ConstVec->getAggregateElement(
I));
1913 return std::nullopt;
1915 PredicateBits |= 1 << (
I * (16 / NumElts));
1919 if (PredicateBits == 0) {
1921 PFalse->takeName(&
II);
1927 for (
unsigned I = 0;
I < 16; ++
I)
1928 if ((PredicateBits & (1 <<
I)) != 0)
1931 unsigned PredSize = Mask & -Mask;
1936 for (
unsigned I = 0;
I < 16;
I += PredSize)
1937 if ((PredicateBits & (1 <<
I)) == 0)
1938 return std::nullopt;
1943 {PredType}, {PTruePat});
1945 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1946 auto *ConvertFromSVBool =
1948 {
II.getType()}, {ConvertToSVBool});
1956 Value *Pg =
II.getArgOperand(0);
1957 Value *Vec =
II.getArgOperand(1);
1958 auto IntrinsicID =
II.getIntrinsicID();
1959 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
1970 auto *OldBinOp = cast<BinaryOperator>(Vec);
1971 auto OpC = OldBinOp->getOpcode();
1977 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
1982 auto *
C = dyn_cast<Constant>(Pg);
1983 if (IsAfter &&
C &&
C->isNullValue()) {
1987 Extract->insertBefore(
II.getIterator());
1988 Extract->takeName(&
II);
1992 auto *IntrPG = dyn_cast<IntrinsicInst>(Pg);
1994 return std::nullopt;
1996 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1997 return std::nullopt;
1999 const auto PTruePattern =
2000 cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
2005 return std::nullopt;
2007 unsigned Idx = MinNumElts - 1;
2016 auto *PgVTy = cast<ScalableVectorType>(Pg->
getType());
2017 if (
Idx >= PgVTy->getMinNumElements())
2018 return std::nullopt;
2023 Extract->insertBefore(
II.getIterator());
2024 Extract->takeName(&
II);
2037 Value *Pg =
II.getArgOperand(0);
2039 Value *Vec =
II.getArgOperand(2);
2043 return std::nullopt;
2048 return std::nullopt;
2062 FPTy, cast<VectorType>(Vec->
getType())->getElementCount());
2065 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2078 {
II.getType()}, {AllPat});
2085static std::optional<Instruction *>
2087 const auto Pattern = cast<ConstantInt>(
II.getArgOperand(0))->getZExtValue();
2089 if (
Pattern == AArch64SVEPredPattern::all) {
2098 return MinNumElts && NumElts >= MinNumElts
2100 II, ConstantInt::get(
II.getType(), MinNumElts)))
2104static std::optional<Instruction *>
2107 if (!ST->isStreaming())
2108 return std::nullopt;
2120 Value *PgVal =
II.getArgOperand(0);
2121 Value *OpVal =
II.getArgOperand(1);
2125 if (PgVal == OpVal &&
2126 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2127 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2128 Value *Ops[] = {PgVal, OpVal};
2142 return std::nullopt;
2146 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2147 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2161 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2162 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2163 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2164 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2165 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2166 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2167 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2168 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2169 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2170 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2171 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2172 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2173 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2183 return std::nullopt;
2186template <Intrinsic::ID MulOpc,
typename Intrinsic::ID FuseOpc>
2187static std::optional<Instruction *>
2189 bool MergeIntoAddendOp) {
2191 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2192 if (MergeIntoAddendOp) {
2193 AddendOp =
II.getOperand(1);
2194 Mul =
II.getOperand(2);
2196 AddendOp =
II.getOperand(2);
2197 Mul =
II.getOperand(1);
2202 return std::nullopt;
2204 if (!
Mul->hasOneUse())
2205 return std::nullopt;
2208 if (
II.getType()->isFPOrFPVectorTy()) {
2212 if (FAddFlags != cast<CallInst>(
Mul)->getFastMathFlags())
2213 return std::nullopt;
2215 return std::nullopt;
2220 if (MergeIntoAddendOp)
2230static std::optional<Instruction *>
2232 Value *Pred =
II.getOperand(0);
2233 Value *PtrOp =
II.getOperand(1);
2234 Type *VecTy =
II.getType();
2238 Load->copyMetadata(
II);
2249static std::optional<Instruction *>
2251 Value *VecOp =
II.getOperand(0);
2252 Value *Pred =
II.getOperand(1);
2253 Value *PtrOp =
II.getOperand(2);
2257 Store->copyMetadata(
II);
2268 switch (Intrinsic) {
2269 case Intrinsic::aarch64_sve_fmul_u:
2270 return Instruction::BinaryOps::FMul;
2271 case Intrinsic::aarch64_sve_fadd_u:
2272 return Instruction::BinaryOps::FAdd;
2273 case Intrinsic::aarch64_sve_fsub_u:
2274 return Instruction::BinaryOps::FSub;
2276 return Instruction::BinaryOpsEnd;
2280static std::optional<Instruction *>
2283 if (
II.isStrictFP())
2284 return std::nullopt;
2286 auto *OpPredicate =
II.getOperand(0);
2288 if (BinOpCode == Instruction::BinaryOpsEnd ||
2290 return std::nullopt;
2292 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2299 Intrinsic::aarch64_sve_mla>(
2303 Intrinsic::aarch64_sve_mad>(
2306 return std::nullopt;
2309static std::optional<Instruction *>
2313 Intrinsic::aarch64_sve_fmla>(IC,
II,
2318 Intrinsic::aarch64_sve_fmad>(IC,
II,
2323 Intrinsic::aarch64_sve_fmla>(IC,
II,
2326 return std::nullopt;
2329static std::optional<Instruction *>
2333 Intrinsic::aarch64_sve_fmla>(IC,
II,
2338 Intrinsic::aarch64_sve_fmad>(IC,
II,
2343 Intrinsic::aarch64_sve_fmla_u>(
2349static std::optional<Instruction *>
2353 Intrinsic::aarch64_sve_fmls>(IC,
II,
2358 Intrinsic::aarch64_sve_fnmsb>(
2363 Intrinsic::aarch64_sve_fmls>(IC,
II,
2366 return std::nullopt;
2369static std::optional<Instruction *>
2373 Intrinsic::aarch64_sve_fmls>(IC,
II,
2378 Intrinsic::aarch64_sve_fnmsb>(
2383 Intrinsic::aarch64_sve_fmls_u>(
2392 Intrinsic::aarch64_sve_mls>(
2395 return std::nullopt;
2400 Value *UnpackArg =
II.getArgOperand(0);
2401 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
2402 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2403 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2416 return std::nullopt;
2420 auto *OpVal =
II.getOperand(0);
2421 auto *OpIndices =
II.getOperand(1);
2426 auto *SplatValue = dyn_cast_or_null<ConstantInt>(
getSplatValue(OpIndices));
2428 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2429 return std::nullopt;
2445 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2446 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2450 if ((
match(
II.getArgOperand(0),
2451 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
A)))) &&
2453 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
B))))) ||
2456 auto *TyA = cast<ScalableVectorType>(
A->getType());
2457 if (TyA ==
B->getType() &&
2462 TyA->getMinNumElements());
2468 return std::nullopt;
2476 if (
match(
II.getArgOperand(0),
2478 match(
II.getArgOperand(1), m_Intrinsic<Intrinsic::aarch64_sve_uzp2>(
2481 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2483 return std::nullopt;
2486static std::optional<Instruction *>
2488 Value *Mask =
II.getOperand(0);
2489 Value *BasePtr =
II.getOperand(1);
2490 Value *Index =
II.getOperand(2);
2498 if (
match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
2501 BasePtr->getPointerAlignment(
II.getDataLayout());
2504 BasePtr, IndexBase);
2511 return std::nullopt;
2514static std::optional<Instruction *>
2516 Value *Val =
II.getOperand(0);
2517 Value *Mask =
II.getOperand(1);
2518 Value *BasePtr =
II.getOperand(2);
2519 Value *Index =
II.getOperand(3);
2526 if (
match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
2529 BasePtr->getPointerAlignment(
II.getDataLayout());
2532 BasePtr, IndexBase);
2538 return std::nullopt;
2544 Value *Pred =
II.getOperand(0);
2545 Value *Vec =
II.getOperand(1);
2546 Value *DivVec =
II.getOperand(2);
2549 ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
2550 if (!SplatConstantInt)
2551 return std::nullopt;
2555 if (DivisorValue == -1)
2556 return std::nullopt;
2557 if (DivisorValue == 1)
2563 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2570 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2572 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2576 return std::nullopt;
2580 size_t VecSize = Vec.
size();
2585 size_t HalfVecSize = VecSize / 2;
2589 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2597 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2612 m_Intrinsic<Intrinsic::vector_insert>(
2614 !isa<FixedVectorType>(CurrentInsertElt->
getType()))
2615 return std::nullopt;
2616 auto IIScalableTy = cast<ScalableVectorType>(
II.getType());
2620 while (
auto InsertElt = dyn_cast<InsertElementInst>(CurrentInsertElt)) {
2621 auto Idx = cast<ConstantInt>(InsertElt->getOperand(2));
2622 Elts[
Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2623 CurrentInsertElt = InsertElt->getOperand(0);
2627 isa<PoisonValue>(CurrentInsertElt) && isa<PoisonValue>(
Default);
2629 return std::nullopt;
2633 for (
size_t I = 0;
I < Elts.
size();
I++) {
2634 if (Elts[
I] ==
nullptr)
2639 if (InsertEltChain ==
nullptr)
2640 return std::nullopt;
2646 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2647 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2648 IIScalableTy->getMinNumElements() /
2653 auto *WideShuffleMaskTy =
2664 auto NarrowBitcast =
2677 return std::nullopt;
2682 Value *Pred =
II.getOperand(0);
2683 Value *Vec =
II.getOperand(1);
2684 Value *Shift =
II.getOperand(2);
2687 Value *AbsPred, *MergedValue;
2688 if (!
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
2690 !
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
2693 return std::nullopt;
2701 return std::nullopt;
2706 return std::nullopt;
2709 {
II.getType()}, {Pred, Vec, Shift});
2716 Value *Vec =
II.getOperand(0);
2721 return std::nullopt;
2727 auto *NI =
II.getNextNode();
2730 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2732 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2733 auto *NIBB = NI->getParent();
2734 NI = NI->getNextNode();
2736 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2737 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2742 auto *NextII = dyn_cast_or_null<IntrinsicInst>(NI);
2743 if (NextII &&
II.isIdenticalTo(NextII))
2746 return std::nullopt;
2751 if (
match(
II.getOperand(0), m_ConstantInt<AArch64SVEPredPattern::all>()))
2753 return std::nullopt;
2759 Value *Passthru =
II.getOperand(0);
2765 auto *Ty = cast<VectorType>(
II.getType());
2767 auto *Mask = ConstantInt::get(Ty, MaskValue);
2773 return std::nullopt;
2776static std::optional<Instruction *>
2783 return std::nullopt;
2786std::optional<Instruction *>
2797 case Intrinsic::aarch64_dmb:
2799 case Intrinsic::aarch64_neon_fmaxnm:
2800 case Intrinsic::aarch64_neon_fminnm:
2802 case Intrinsic::aarch64_sve_convert_from_svbool:
2804 case Intrinsic::aarch64_sve_dup:
2806 case Intrinsic::aarch64_sve_dup_x:
2808 case Intrinsic::aarch64_sve_cmpne:
2809 case Intrinsic::aarch64_sve_cmpne_wide:
2811 case Intrinsic::aarch64_sve_rdffr:
2813 case Intrinsic::aarch64_sve_lasta:
2814 case Intrinsic::aarch64_sve_lastb:
2816 case Intrinsic::aarch64_sve_clasta_n:
2817 case Intrinsic::aarch64_sve_clastb_n:
2819 case Intrinsic::aarch64_sve_cntd:
2821 case Intrinsic::aarch64_sve_cntw:
2823 case Intrinsic::aarch64_sve_cnth:
2825 case Intrinsic::aarch64_sve_cntb:
2827 case Intrinsic::aarch64_sme_cntsd:
2829 case Intrinsic::aarch64_sme_cntsw:
2831 case Intrinsic::aarch64_sme_cntsh:
2833 case Intrinsic::aarch64_sme_cntsb:
2835 case Intrinsic::aarch64_sve_ptest_any:
2836 case Intrinsic::aarch64_sve_ptest_first:
2837 case Intrinsic::aarch64_sve_ptest_last:
2839 case Intrinsic::aarch64_sve_fadd:
2841 case Intrinsic::aarch64_sve_fadd_u:
2843 case Intrinsic::aarch64_sve_fmul_u:
2845 case Intrinsic::aarch64_sve_fsub:
2847 case Intrinsic::aarch64_sve_fsub_u:
2849 case Intrinsic::aarch64_sve_add:
2851 case Intrinsic::aarch64_sve_add_u:
2853 Intrinsic::aarch64_sve_mla_u>(
2855 case Intrinsic::aarch64_sve_sub:
2857 case Intrinsic::aarch64_sve_sub_u:
2859 Intrinsic::aarch64_sve_mls_u>(
2861 case Intrinsic::aarch64_sve_tbl:
2863 case Intrinsic::aarch64_sve_uunpkhi:
2864 case Intrinsic::aarch64_sve_uunpklo:
2865 case Intrinsic::aarch64_sve_sunpkhi:
2866 case Intrinsic::aarch64_sve_sunpklo:
2868 case Intrinsic::aarch64_sve_uzp1:
2870 case Intrinsic::aarch64_sve_zip1:
2871 case Intrinsic::aarch64_sve_zip2:
2873 case Intrinsic::aarch64_sve_ld1_gather_index:
2875 case Intrinsic::aarch64_sve_st1_scatter_index:
2877 case Intrinsic::aarch64_sve_ld1:
2879 case Intrinsic::aarch64_sve_st1:
2881 case Intrinsic::aarch64_sve_sdiv:
2883 case Intrinsic::aarch64_sve_sel:
2885 case Intrinsic::aarch64_sve_srshl:
2887 case Intrinsic::aarch64_sve_dupq_lane:
2889 case Intrinsic::aarch64_sve_insr:
2891 case Intrinsic::aarch64_sve_ptrue:
2893 case Intrinsic::aarch64_sve_uxtb:
2895 case Intrinsic::aarch64_sve_uxth:
2897 case Intrinsic::aarch64_sve_uxtw:
2899 case Intrinsic::aarch64_sme_in_streaming_mode:
2903 return std::nullopt;
2910 SimplifyAndSetOp)
const {
2911 switch (
II.getIntrinsicID()) {
2914 case Intrinsic::aarch64_neon_fcvtxn:
2915 case Intrinsic::aarch64_neon_rshrn:
2916 case Intrinsic::aarch64_neon_sqrshrn:
2917 case Intrinsic::aarch64_neon_sqrshrun:
2918 case Intrinsic::aarch64_neon_sqshrn:
2919 case Intrinsic::aarch64_neon_sqshrun:
2920 case Intrinsic::aarch64_neon_sqxtn:
2921 case Intrinsic::aarch64_neon_sqxtun:
2922 case Intrinsic::aarch64_neon_uqrshrn:
2923 case Intrinsic::aarch64_neon_uqshrn:
2924 case Intrinsic::aarch64_neon_uqxtn:
2925 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
2929 return std::nullopt;
2961bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
2963 Type *SrcOverrideTy)
const {
2968 cast<VectorType>(DstTy)->getElementCount());
2978 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
2988 Type *SrcTy = SrcOverrideTy;
2990 case Instruction::Add:
2991 case Instruction::Sub:
2993 if (isa<SExtInst>(Args[1]) || isa<ZExtInst>(Args[1])) {
3000 case Instruction::Mul: {
3002 if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
3003 (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
3007 }
else if (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1])) {
3036 assert(SrcTy &&
"Expected some SrcTy");
3038 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3044 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3046 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3050 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3063 (Src->isScalableTy() && !ST->hasSVE2()))
3072 dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
3073 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3076 auto *Shr = dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
3077 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3080 auto *Trunc = dyn_cast_or_null<Instruction>(Shr->getUniqueUndroppableUser());
3081 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3082 Src->getScalarSizeInBits() !=
3083 cast<CastInst>(Trunc)->getDestTy()->getScalarSizeInBits())
3107 assert(ISD &&
"Invalid opcode");
3110 if (
I &&
I->hasOneUser()) {
3111 auto *SingleUser = cast<Instruction>(*
I->user_begin());
3113 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands, Src)) {
3117 if (SingleUser->getOpcode() == Instruction::Add) {
3118 if (
I == SingleUser->getOperand(1) ||
3119 (isa<CastInst>(SingleUser->getOperand(1)) &&
3120 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3127 if ((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
3135 return Cost == 0 ? 0 : 1;
3172 return AdjustCost(Entry->Cost);
3180 const unsigned int SVE_EXT_COST = 1;
3181 const unsigned int SVE_FCVT_COST = 1;
3182 const unsigned int SVE_UNPACK_ONCE = 4;
3183 const unsigned int SVE_UNPACK_TWICE = 16;
3312 SVE_EXT_COST + SVE_FCVT_COST},
3317 SVE_EXT_COST + SVE_FCVT_COST},
3324 SVE_EXT_COST + SVE_FCVT_COST},
3328 SVE_EXT_COST + SVE_FCVT_COST},
3334 SVE_EXT_COST + SVE_FCVT_COST},
3337 SVE_EXT_COST + SVE_FCVT_COST},
3342 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3344 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3354 SVE_EXT_COST + SVE_FCVT_COST},
3359 SVE_EXT_COST + SVE_FCVT_COST},
3372 SVE_EXT_COST + SVE_FCVT_COST},
3376 SVE_EXT_COST + SVE_FCVT_COST},
3388 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3390 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3392 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3394 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3398 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3400 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3416 SVE_EXT_COST + SVE_FCVT_COST},
3421 SVE_EXT_COST + SVE_FCVT_COST},
3432 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3434 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3436 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3438 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3440 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3442 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3446 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3448 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3450 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3452 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3651 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3655 std::pair<InstructionCost, MVT> LT =
3657 unsigned NumElements =
3669 return AdjustCost(Entry->Cost);
3696 if (ST->hasFullFP16())
3699 return AdjustCost(Entry->Cost);
3705 isa<FixedVectorType>(Dst) && isa<FixedVectorType>(Src))
3729 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3732 return Part1 + Part2;
3752 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3760 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
3765 CostKind, Index,
nullptr,
nullptr);
3775 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
3781 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3791 case Instruction::SExt:
3796 case Instruction::ZExt:
3797 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3810 return Opcode == Instruction::PHI ? 0 : 1;
3819 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
3827 if (!LT.second.isVector())
3832 if (LT.second.isFixedLengthVector()) {
3833 unsigned Width = LT.second.getVectorNumElements();
3834 Index = Index % Width;
3848 if (
I && dyn_cast<LoadInst>(
I->getOperand(1)))
3882 auto ExtractCanFuseWithFmul = [&]() {
3889 auto IsAllowedScalarTy = [&](
const Type *
T) {
3890 return T->isFloatTy() ||
T->isDoubleTy() ||
3891 (
T->isHalfTy() && ST->hasFullFP16());
3895 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
3897 const auto *BO = dyn_cast<BinaryOperator>(EEUser);
3898 return BO && BO->getOpcode() == BinaryOperator::FMul &&
3899 !BO->getType()->isVectorTy();
3904 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
3908 return Idx == 0 || (RegWidth != 0 && (
Idx * EltSz) % RegWidth == 0);
3913 if (!isa<FixedVectorType>(Val) || !IsAllowedScalarTy(Val->
getScalarType()))
3918 for (
auto *U : Scalar->users()) {
3919 if (!IsUserFMulScalarTy(U))
3923 UserToExtractIdx[
U];
3925 if (UserToExtractIdx.
empty())
3927 for (
auto &[S, U, L] : ScalarUserAndIdx) {
3928 for (
auto *U : S->users()) {
3929 if (UserToExtractIdx.
contains(U)) {
3930 auto *
FMul = cast<BinaryOperator>(U);
3931 auto *Op0 =
FMul->getOperand(0);
3932 auto *Op1 =
FMul->getOperand(1);
3933 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
3934 UserToExtractIdx[
U] =
L;
3940 for (
auto &[U, L] : UserToExtractIdx) {
3946 const auto *EE = cast<ExtractElementInst>(
I);
3948 const auto *IdxOp = dyn_cast<ConstantInt>(EE->getIndexOperand());
3952 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
3953 if (!IsUserFMulScalarTy(U))
3958 const auto *BO = cast<BinaryOperator>(U);
3959 const auto *OtherEE = dyn_cast<ExtractElementInst>(
3960 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
3962 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
3965 return IsExtractLaneEquivalentToZero(
3966 cast<ConstantInt>(OtherEE->getIndexOperand())
3969 OtherEE->getType()->getScalarSizeInBits());
3977 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
3978 ExtractCanFuseWithFmul())
3983 :
ST->getVectorInsertExtractBaseCost();
3990 const Value *Op1)
const {
3994 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
3995 isa<PoisonValue>(Op0))
3997 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index);
4003 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
4004 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4011 unsigned Index)
const {
4012 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I);
4018 unsigned Index)
const {
4019 if (isa<FixedVectorType>(Val))
4037 if (isa<ScalableVectorType>(Ty))
4042 unsigned VecInstCost =
4044 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4052 return std::nullopt;
4054 return std::nullopt;
4061 Cost += InstCost(PromotedTy);
4077 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
4084 Op2Info, Args, CxtI);
4095 Ty,
CostKind, Op1Info, Op2Info,
true,
4096 [&](
Type *PromotedTy) {
4100 return *PromotedCost;
4162 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4165 return ISD ==
ISD::SDIV ? (3 * AddCost + AsrCost)
4166 : (3 * AsrCost + AddCost);
4168 return MulCost + AsrCost + 2 * AddCost;
4170 }
else if (VT.isVector()) {
4181 Cost += 2 * AsrCost;
4186 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4190 }
else if (LT.second == MVT::v2i64) {
4191 return VT.getVectorNumElements() *
4199 return MulCost + 2 * AddCost + 2 * AsrCost;
4200 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4205 LT.second.isFixedLengthVector()) {
4214 unsigned NElts = cast<FixedVectorType>(Ty)->getNumElements();
4215 return ExtractCost + InsertCost +
4239 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4240 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4241 LT.second == MVT::nxv16i8;
4242 bool Is128bit = LT.second.is128BitVector();
4254 (HasMULH ? 0 : ShrCost) +
4255 AddCost * 2 + ShrCost;
4256 return DivCost + (ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4263 if (!VT.isVector() && VT.getSizeInBits() > 64)
4267 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4272 if (VT.isSimple() && isa<FixedVectorType>(Ty) &&
4283 if (
nullptr != Entry)
4288 if (LT.second.getScalarType() == MVT::i8)
4290 else if (LT.second.getScalarType() == MVT::i16)
4300 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
4303 return (4 + DivCost) * VTy->getNumElements();
4309 -1,
nullptr,
nullptr);
4323 if (LT.second == MVT::v2i64 && ST->hasSVE())
4338 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
4340 return cast<VectorType>(Ty)->getElementCount().getKnownMinValue() *
4361 (Ty->
isHalfTy() && ST->hasFullFP16())) &&
4378 return 2 * LT.first;
4401 int MaxMergeDistance = 64;
4405 return NumVectorInstToHideOverhead;
4418 if (isa<FixedVectorType>(ValTy) && Opcode == Instruction::Select) {
4420 const int AmortizationCost = 20;
4428 VecPred = CurrentPred;
4436 static const auto ValidMinMaxTys = {
4437 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4438 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4439 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4442 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
4443 (ST->hasFullFP16() &&
4444 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
4449 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4450 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4451 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4452 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4453 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4454 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4455 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4456 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4457 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4458 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4459 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4471 if (Opcode == Instruction::FCmp) {
4473 ValTy,
CostKind, Op1Info, Op2Info,
false,
4474 [&](
Type *PromotedTy) {
4478 if (isa<VectorType>(PromotedTy))
4486 return *PromotedCost;
4490 if (LT.second.getScalarType() != MVT::f64 &&
4491 LT.second.getScalarType() != MVT::f32 &&
4492 LT.second.getScalarType() != MVT::f16)
4497 unsigned Factor = 1;
4501 else if (isa<FixedVectorType>(ValTy) &&
4505 else if (isa<ScalableVectorType>(ValTy) &&
4537 Op1Info, Op2Info,
I);
4543 if (ST->requiresStrictAlign()) {
4548 Options.AllowOverlappingLoads =
true;
4554 Options.LoadSizes = {8, 4, 2, 1};
4555 Options.AllowedTailExpansions = {3, 5, 6};
4560 return ST->hasSVE();
4571 if (!LT.first.isValid())
4575 auto *VT = cast<VectorType>(Src);
4576 if (VT->getElementType()->isIntegerTy(1))
4593 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4594 "Should be called on only load or stores.");
4596 case Instruction::Load:
4599 return ST->getGatherOverhead();
4601 case Instruction::Store:
4604 return ST->getScatterOverhead();
4612 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
4617 auto *VT = cast<VectorType>(DataTy);
4619 if (!LT.first.isValid())
4623 if (!LT.second.isVector() ||
4625 VT->getElementType()->isIntegerTy(1))
4635 ElementCount LegalVF = LT.second.getVectorElementCount();
4638 {TTI::OK_AnyValue, TTI::OP_None},
I);
4656 if (VT == MVT::Other)
4661 if (!LT.first.isValid())
4669 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
4671 (VTy->getElementType()->isIntegerTy(1) &&
4672 !VTy->getElementCount().isKnownMultipleOf(
4683 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4684 LT.second.is128BitVector() && Alignment <
Align(16)) {
4690 const int AmortizationCost = 6;
4692 return LT.first * 2 * AmortizationCost;
4703 if (VT == MVT::v4i8)
4706 return cast<FixedVectorType>(Ty)->getNumElements() * 2;
4710 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
4725 while (!TypeWorklist.
empty()) {
4747 bool UseMaskForCond,
bool UseMaskForGaps)
const {
4748 assert(Factor >= 2 &&
"Invalid interleave factor");
4749 auto *VecVTy = cast<VectorType>(VecTy);
4763 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
4766 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
4767 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
4770 VecVTy->getElementCount().divideCoefficientBy(Factor));
4776 if (MinElts % Factor == 0 &&
4783 UseMaskForCond, UseMaskForGaps);
4790 for (
auto *
I : Tys) {
4791 if (!
I->isVectorTy())
4793 if (
I->getScalarSizeInBits() * cast<FixedVectorType>(
I)->getNumElements() ==
4812 enum { MaxStridedLoads = 7 };
4814 int StridedLoads = 0;
4817 for (
const auto BB : L->blocks()) {
4818 for (
auto &
I : *BB) {
4819 LoadInst *LMemI = dyn_cast<LoadInst>(&
I);
4824 if (L->isLoopInvariant(PtrValue))
4828 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
4829 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
4838 if (StridedLoads > MaxStridedLoads / 2)
4839 return StridedLoads;
4842 return StridedLoads;
4845 int StridedLoads = countStridedLoads(L, SE);
4847 <<
" strided loads\n");
4863 unsigned *FinalSize) {
4867 for (
auto *BB : L->getBlocks()) {
4868 for (
auto &
I : *BB) {
4878 if (LoopCost > Budget)
4894 if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC))
4900 if (MaxTC > 0 && MaxTC <= 32)
4933 if (!L->isInnermost() || L->getNumBlocks() > 8)
4937 if (!L->getExitBlock())
4941 if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
4960 if (Header == Latch) {
4963 unsigned Width = 10;
4969 unsigned MaxInstsPerLine = 16;
4971 unsigned BestUC = 1;
4972 unsigned SizeWithBestUC = BestUC *
Size;
4974 unsigned SizeWithUC = UC *
Size;
4975 if (SizeWithUC > 48)
4977 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
4978 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
4980 SizeWithBestUC = BestUC *
Size;
4990 for (
auto *BB : L->blocks()) {
4991 for (
auto &
I : *BB) {
4998 if (isa<LoadInst>(&
I)) {
5001 for (
auto *U :
I.users())
5002 if (L->contains(cast<Instruction>(U)))
5003 LoadedValuesPlus.
insert(U);
5010 return LoadedValuesPlus.
contains(SI->getOperand(0));
5021 auto *Term = dyn_cast<BranchInst>(Header->getTerminator());
5023 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
5030 if (isa<PHINode>(
I) || L->isLoopInvariant(
I) ||
Depth > 8)
5033 if (isa<LoadInst>(
I))
5037 auto *I = dyn_cast<Instruction>(V);
5038 return I && DependsOnLoopLoad(I, Depth + 1);
5045 DependsOnLoopLoad(
I, 0)) {
5061 if (L->getLoopDepth() > 1)
5071 for (
auto *BB : L->getBlocks()) {
5072 for (
auto &
I : *BB) {
5076 if (IsVectorized &&
I.getType()->isVectorTy())
5078 if (isa<CallBase>(
I)) {
5079 if (isa<CallInst>(
I) || isa<InvokeInst>(
I))
5090 case AArch64Subtarget::AppleA14:
5091 case AArch64Subtarget::AppleA15:
5092 case AArch64Subtarget::AppleA16:
5093 case AArch64Subtarget::AppleM4:
5096 case AArch64Subtarget::Falkor:
5122 !ST->getSchedModel().isOutOfOrder()) {
5140 bool CanCreate)
const {
5144 case Intrinsic::aarch64_neon_st2:
5145 case Intrinsic::aarch64_neon_st3:
5146 case Intrinsic::aarch64_neon_st4: {
5148 StructType *ST = dyn_cast<StructType>(ExpectedType);
5149 if (!CanCreate || !ST)
5151 unsigned NumElts = Inst->
arg_size() - 1;
5152 if (ST->getNumElements() != NumElts)
5154 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5160 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5166 case Intrinsic::aarch64_neon_ld2:
5167 case Intrinsic::aarch64_neon_ld3:
5168 case Intrinsic::aarch64_neon_ld4:
5169 if (Inst->
getType() == ExpectedType)
5180 case Intrinsic::aarch64_neon_ld2:
5181 case Intrinsic::aarch64_neon_ld3:
5182 case Intrinsic::aarch64_neon_ld4:
5183 Info.ReadMem =
true;
5184 Info.WriteMem =
false;
5187 case Intrinsic::aarch64_neon_st2:
5188 case Intrinsic::aarch64_neon_st3:
5189 case Intrinsic::aarch64_neon_st4:
5190 Info.ReadMem =
false;
5191 Info.WriteMem =
true;
5199 case Intrinsic::aarch64_neon_ld2:
5200 case Intrinsic::aarch64_neon_st2:
5201 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5203 case Intrinsic::aarch64_neon_ld3:
5204 case Intrinsic::aarch64_neon_st3:
5205 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5207 case Intrinsic::aarch64_neon_ld4:
5208 case Intrinsic::aarch64_neon_st4:
5209 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5221 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5222 bool Considerable =
false;
5223 AllowPromotionWithoutCommonHeader =
false;
5224 if (!isa<SExtInst>(&
I))
5226 Type *ConsideredSExtType =
5228 if (
I.getType() != ConsideredSExtType)
5232 for (
const User *U :
I.users()) {
5234 Considerable =
true;
5238 if (GEPInst->getNumOperands() > 2) {
5239 AllowPromotionWithoutCommonHeader =
true;
5244 return Considerable;
5286 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
5292 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5302 return LegalizationCost + 2;
5312 LegalizationCost *= LT.first - 1;
5316 assert(ISD &&
"Invalid opcode");
5324 return LegalizationCost + 2;
5332 std::optional<FastMathFlags> FMF,
5338 if (
auto *VTy = dyn_cast<ScalableVectorType>(ValTy))
5343 if (
auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
5348 return BaseCost + FixedVTy->getNumElements();
5351 if (Opcode != Instruction::FAdd)
5354 auto *VTy = cast<ScalableVectorType>(ValTy);
5361 if (isa<ScalableVectorType>(ValTy))
5365 MVT MTy = LT.second;
5367 assert(ISD &&
"Invalid opcode");
5414 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5415 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5427 return (LT.first - 1) +
Log2_32(NElts);
5432 return (LT.first - 1) + Entry->Cost;
5440 auto *ValVTy = cast<FixedVectorType>(ValTy);
5444 if (LT.first != 1) {
5450 ExtraCost *= LT.first - 1;
5453 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5454 return Cost + ExtraCost;
5462 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5475 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5477 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5479 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5481 return (LT.first - 1) * 2 + 2;
5501 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5503 return LT.first + 2;
5537 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5552 if (LT.second.getScalarType() == MVT::i1) {
5561 assert(Entry &&
"Illegal Type for Splice");
5562 LegalizationCost += Entry->Cost;
5563 return LegalizationCost * LT.first;
5567 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5579 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||
5587 if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB ||
5589 (OpAExtend != OpBExtend && !ST->hasMatMulInt8() &&
5593 "Unexpected values for OpBExtend or InputTypeB");
5608 if (VFMinValue == Scale)
5612 (!ST->
isNeonAvailable() || !ST->hasDotProd() || AccumEVT == MVT::i64))
5615 if (InputEVT == MVT::i8) {
5616 switch (VFMinValue) {
5620 if (AccumEVT == MVT::i32)
5622 else if (AccumEVT != MVT::i64)
5626 if (AccumEVT == MVT::i64)
5628 else if (AccumEVT != MVT::i32)
5632 }
else if (InputEVT == MVT::i16) {
5635 if (VFMinValue != 8 || AccumEVT != MVT::i64)
5651 "Expected the Mask to match the return size if given");
5653 "Expected the same scalar types");
5658 if (!Mask.empty() && isa<FixedVectorType>(SrcTy) && LT.second.isVector() &&
5659 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
5661 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
5666 if (Args.size() >= 1 && isa<LoadInst>(Args[0]) &&
5669 return std::max<InstructionCost>(1, LT.first / 4);
5682 unsigned TpNumElts = Mask.size();
5683 unsigned LTNumElts = LT.second.getVectorNumElements();
5684 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
5686 LT.second.getVectorElementCount());
5688 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
5690 for (
unsigned N = 0;
N < NumVecs;
N++) {
5694 unsigned Source1 = -1U, Source2 = -1U;
5695 unsigned NumSources = 0;
5696 for (
unsigned E = 0; E < LTNumElts; E++) {
5697 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
5706 unsigned Source = MaskElt / LTNumElts;
5707 if (NumSources == 0) {
5710 }
else if (NumSources == 1 && Source != Source1) {
5713 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
5719 if (Source == Source1)
5721 else if (Source == Source2)
5722 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
5731 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
5742 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
5745 Result.first->second = NCost;
5756 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
5757 if (LT.second.is128BitVector() &&
5758 cast<FixedVectorType>(SubTp)->getNumElements() ==
5759 LT.second.getVectorNumElements() / 2) {
5762 if (Index == (
int)LT.second.getVectorNumElements() / 2)
5786 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
5805 bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
5806 if (IsLoad && LT.second.isVector() &&
5808 LT.second.getVectorElementCount()))
5814 if (Mask.size() == 4 &&
5818 all_of(Mask, [](
int E) {
return E < 8; }))
5822 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
5825 return M.value() < 0 || M.value() == (
int)M.index();
5832 if (LT.second.isFixedLengthVector() &&
5833 LT.second.getVectorNumElements() == Mask.size() &&
5835 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
5836 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
5837 isREVMask(Mask, LT.second.getScalarSizeInBits(),
5838 LT.second.getVectorNumElements(), 16) ||
5839 isREVMask(Mask, LT.second.getScalarSizeInBits(),
5840 LT.second.getVectorNumElements(), 32) ||
5841 isREVMask(Mask, LT.second.getScalarSizeInBits(),
5842 LT.second.getVectorNumElements(), 64) ||
5845 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
5974 return LT.first * Entry->Cost;
5983 LT.second.getSizeInBits() <= 128 && SubTp) {
5985 if (SubLT.second.isVector()) {
5986 int NumElts = LT.second.getVectorNumElements();
5987 int NumSubElts = SubLT.second.getVectorNumElements();
5988 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
5994 if (IsExtractSubvector)
6007 if (isa<LoadInst>(&
I) || isa<StoreInst>(&
I)) {
6023 return ST->useFixedOverScalableIfEqualCost();
6061 unsigned NumInsns = 0;
6063 NumInsns += BB->sizeWithoutDebug();
6073 int64_t Scale,
unsigned AddrSpace)
const {
6101 if (
I->getOpcode() == Instruction::Or &&
6102 isa<BranchInst>(
I->getNextNode()) &&
6103 cast<BranchInst>(
I->getNextNode())->isUnconditional())
6106 if (
I->getOpcode() == Instruction::Add ||
6107 I->getOpcode() == Instruction::Sub)
6131 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V))
6132 return all_equal(Shuf->getShuffleMask());
6139 bool AllowSplat =
false) {
6144 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6145 auto *FullTy = FullV->
getType();
6146 auto *HalfTy = HalfV->getType();
6148 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6151 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6152 auto *FullVT = cast<FixedVectorType>(FullV->
getType());
6153 auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
6154 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6158 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6172 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6173 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6180 int NumElements = cast<FixedVectorType>(Op1->
getType())->getNumElements() * 2;
6187 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6188 (M2Start != 0 && M2Start != (NumElements / 2)))
6190 if (S1Op1 && S2Op1 && M1Start != M2Start)
6200 return Ext->getType()->getScalarSizeInBits() ==
6201 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6206 !areExtDoubled(cast<Instruction>(Ext1)) ||
6207 !areExtDoubled(cast<Instruction>(Ext2)))
6215 Value *VectorOperand =
nullptr;
6220 isa<FixedVectorType>(VectorOperand->
getType()) &&
6221 cast<FixedVectorType>(VectorOperand->
getType())->getNumElements() == 2;
6231 auto *
GEP = dyn_cast<GetElementPtrInst>(Ptrs);
6232 if (!
GEP ||
GEP->getNumOperands() != 2)
6236 Value *Offsets =
GEP->getOperand(1);
6239 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6243 if (isa<SExtInst>(Offsets) || isa<ZExtInst>(Offsets)) {
6244 auto *OffsetsInst = cast<Instruction>(Offsets);
6245 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6246 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6262 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
6267 Value *ZExtOp = cast<Instruction>(
Op)->getOperand(0);
6268 Ops.
push_back(&cast<Instruction>(ZExtOp)->getOperandUse(0));
6269 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
6281 switch (
II->getIntrinsicID()) {
6282 case Intrinsic::aarch64_neon_smull:
6283 case Intrinsic::aarch64_neon_umull:
6292 case Intrinsic::fma:
6293 case Intrinsic::fmuladd:
6294 if (isa<VectorType>(
I->getType()) &&
6295 cast<VectorType>(
I->getType())->getElementType()->isHalfTy() &&
6299 case Intrinsic::aarch64_neon_sqdmull:
6300 case Intrinsic::aarch64_neon_sqdmulh:
6301 case Intrinsic::aarch64_neon_sqrdmulh:
6307 return !Ops.
empty();
6308 case Intrinsic::aarch64_neon_fmlal:
6309 case Intrinsic::aarch64_neon_fmlal2:
6310 case Intrinsic::aarch64_neon_fmlsl:
6311 case Intrinsic::aarch64_neon_fmlsl2:
6317 return !Ops.
empty();
6318 case Intrinsic::aarch64_sve_ptest_first:
6319 case Intrinsic::aarch64_sve_ptest_last:
6320 if (
auto *IIOp = dyn_cast<IntrinsicInst>(
II->getOperand(0)))
6321 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6323 return !Ops.
empty();
6324 case Intrinsic::aarch64_sme_write_horiz:
6325 case Intrinsic::aarch64_sme_write_vert:
6326 case Intrinsic::aarch64_sme_writeq_horiz:
6327 case Intrinsic::aarch64_sme_writeq_vert: {
6328 auto *
Idx = dyn_cast<Instruction>(
II->getOperand(1));
6329 if (!
Idx ||
Idx->getOpcode() != Instruction::Add)
6334 case Intrinsic::aarch64_sme_read_horiz:
6335 case Intrinsic::aarch64_sme_read_vert:
6336 case Intrinsic::aarch64_sme_readq_horiz:
6337 case Intrinsic::aarch64_sme_readq_vert:
6338 case Intrinsic::aarch64_sme_ld1b_vert:
6339 case Intrinsic::aarch64_sme_ld1h_vert:
6340 case Intrinsic::aarch64_sme_ld1w_vert:
6341 case Intrinsic::aarch64_sme_ld1d_vert:
6342 case Intrinsic::aarch64_sme_ld1q_vert:
6343 case Intrinsic::aarch64_sme_st1b_vert:
6344 case Intrinsic::aarch64_sme_st1h_vert:
6345 case Intrinsic::aarch64_sme_st1w_vert:
6346 case Intrinsic::aarch64_sme_st1d_vert:
6347 case Intrinsic::aarch64_sme_st1q_vert:
6348 case Intrinsic::aarch64_sme_ld1b_horiz:
6349 case Intrinsic::aarch64_sme_ld1h_horiz:
6350 case Intrinsic::aarch64_sme_ld1w_horiz:
6351 case Intrinsic::aarch64_sme_ld1d_horiz:
6352 case Intrinsic::aarch64_sme_ld1q_horiz:
6353 case Intrinsic::aarch64_sme_st1b_horiz:
6354 case Intrinsic::aarch64_sme_st1h_horiz:
6355 case Intrinsic::aarch64_sme_st1w_horiz:
6356 case Intrinsic::aarch64_sme_st1d_horiz:
6357 case Intrinsic::aarch64_sme_st1q_horiz: {
6358 auto *
Idx = dyn_cast<Instruction>(
II->getOperand(3));
6359 if (!
Idx ||
Idx->getOpcode() != Instruction::Add)
6364 case Intrinsic::aarch64_neon_pmull:
6370 case Intrinsic::aarch64_neon_pmull64:
6372 II->getArgOperand(1)))
6377 case Intrinsic::masked_gather:
6382 case Intrinsic::masked_scatter:
6392 auto ShouldSinkCondition = [](
Value *
Cond,
6394 if (!isa<IntrinsicInst>(
Cond))
6396 auto *
II = dyn_cast<IntrinsicInst>(
Cond);
6397 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6398 !isa<ScalableVectorType>(
II->getOperand(0)->getType()))
6400 if (isa<CmpInst>(
II->getOperand(0)))
6405 switch (
I->getOpcode()) {
6406 case Instruction::GetElementPtr:
6407 case Instruction::Add:
6408 case Instruction::Sub:
6410 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6417 case Instruction::Select: {
6418 if (!ShouldSinkCondition(
I->getOperand(0), Ops))
6424 case Instruction::Br: {
6425 if (cast<BranchInst>(
I)->isUnconditional())
6428 if (!ShouldSinkCondition(cast<BranchInst>(
I)->getCondition(), Ops))
6438 if (!
I->getType()->isVectorTy())
6441 switch (
I->getOpcode()) {
6442 case Instruction::Sub:
6443 case Instruction::Add: {
6449 auto Ext1 = cast<Instruction>(
I->getOperand(0));
6450 auto Ext2 = cast<Instruction>(
I->getOperand(1));
6461 case Instruction::Or: {
6464 if (ST->hasNEON()) {
6474 ? cast<Instruction>(
I->getOperand(1))
6475 : cast<Instruction>(
I->getOperand(0));
6478 if (
I->getParent() != MainAnd->
getParent() ||
6483 if (
I->getParent() != IA->getParent() ||
6484 I->getParent() != IB->getParent())
6499 case Instruction::Mul: {
6500 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6501 auto *Ty = cast<VectorType>(V->getType());
6510 int NumZExts = 0, NumSExts = 0;
6511 for (
auto &
Op :
I->operands()) {
6513 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
6517 auto *Ext = cast<Instruction>(
Op);
6518 auto *ExtOp = Ext->getOperand(0);
6519 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6523 if (isa<SExtInst>(Ext))
6554 Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
6559 dyn_cast<ConstantInt>(Insert->getOperand(2));
6561 if (!ElementConstant || !ElementConstant->
isZero())
6564 unsigned Opcode = OperandInstr->
getOpcode();
6565 if (Opcode == Instruction::SExt)
6567 else if (Opcode == Instruction::ZExt)
6572 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
6582 Ops.
push_back(&Insert->getOperandUse(1));
6588 if (!Ops.
empty() && (NumSExts == 2 || NumZExts == 2))
6592 if (!ShouldSinkSplatForIndexedVariant(
I))
6601 return !Ops.
empty();
6603 case Instruction::FMul: {
6605 if (
I->getType()->isScalableTy())
6608 if (cast<VectorType>(
I->getType())->getElementType()->isHalfTy() &&
6617 return !Ops.
empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
This file provides the interface for the instcombine pass implementation.
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static unsigned getNumElements(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getVectorInsertExtractBaseCost() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
bool isStreamingSVEAvailable() const
Returns true if the target has access to the streaming-compatible subset of SVE instructions.
unsigned getMaxInterleaveFactor() const
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
TailFoldingOpts getSVETailFoldingDefaultOpts() const
bool useSVEForFixedLengthVectors() const
unsigned getEpilogueVectorizationMinVF() const
unsigned getMinSVEVectorSizeInBits() const
bool isSVEAvailable() const
Returns true if the target has SVE and can use the full range of SVE instructions,...
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
bool prefersVectorizedAddressing() const override
bool preferFixedOverScalableIfEqualCost() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const override
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool enableScalableVectorization() const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
EVT getPromotedVTForPredicate(EVT VT) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
Compute a cost of the given call instruction.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isIntPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isFP128Ty() const
Return true if this is 'fp128'.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
int getNumOccurrences() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
constexpr bool isNonZero() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
@ C
The default llvm calling convention, compatible with C.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.