23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
81class TailFoldingOption {
96 bool NeedsDefault =
true;
100 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
115 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
116 "Initial bits should only include one of "
117 "(disabled|all|simple|default)");
118 Bits = NeedsDefault ? DefaultBits : InitialBits;
120 Bits &= ~DisableBits;
126 errs() <<
"invalid argument '" << Opt
127 <<
"' to -sve-tail-folding=; the option should be of the form\n"
128 " (disabled|all|default|simple)[+(reductions|recurrences"
129 "|reverse|noreductions|norecurrences|noreverse)]\n";
135 void operator=(
const std::string &Val) {
144 setNeedsDefault(
false);
147 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
149 unsigned StartIdx = 1;
150 if (TailFoldTypes[0] ==
"disabled")
151 setInitialBits(TailFoldingOpts::Disabled);
152 else if (TailFoldTypes[0] ==
"all")
153 setInitialBits(TailFoldingOpts::All);
154 else if (TailFoldTypes[0] ==
"default")
155 setNeedsDefault(
true);
156 else if (TailFoldTypes[0] ==
"simple")
157 setInitialBits(TailFoldingOpts::Simple);
160 setInitialBits(TailFoldingOpts::Disabled);
163 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
164 if (TailFoldTypes[
I] ==
"reductions")
165 setEnableBit(TailFoldingOpts::Reductions);
166 else if (TailFoldTypes[
I] ==
"recurrences")
167 setEnableBit(TailFoldingOpts::Recurrences);
168 else if (TailFoldTypes[
I] ==
"reverse")
169 setEnableBit(TailFoldingOpts::Reverse);
170 else if (TailFoldTypes[
I] ==
"noreductions")
171 setDisableBit(TailFoldingOpts::Reductions);
172 else if (TailFoldTypes[
I] ==
"norecurrences")
173 setDisableBit(TailFoldingOpts::Recurrences);
174 else if (TailFoldTypes[
I] ==
"noreverse")
175 setDisableBit(TailFoldingOpts::Reverse);
192 "Control the use of vectorisation using tail-folding for SVE where the"
193 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
194 "\ndisabled (Initial) No loop types will vectorize using "
196 "\ndefault (Initial) Uses the default tail-folding settings for "
198 "\nall (Initial) All legal loop types will vectorize using "
200 "\nsimple (Initial) Use tail-folding for simple loops (not "
201 "reductions or recurrences)"
202 "\nreductions Use tail-folding for loops containing reductions"
203 "\nnoreductions Inverse of above"
204 "\nrecurrences Use tail-folding for loops containing fixed order "
206 "\nnorecurrences Inverse of above"
207 "\nreverse Use tail-folding for loops requiring reversed "
209 "\nnoreverse Inverse of above"),
253 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
255 FeatureStr.
split(Features,
",");
260 return F.hasFnAttribute(
"fmv-features");
264 AArch64::FeatureExecuteOnly,
297 TM.getSubtargetImpl(*Caller)->getFeatureBits();
299 TM.getSubtargetImpl(*Callee)->getFeatureBits();
304 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
305 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
307 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
325 auto FVTy = dyn_cast<FixedVectorType>(Ty);
327 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
336 unsigned DefaultCallPenalty)
const {
361 if (
F ==
Call.getCaller())
367 return DefaultCallPenalty;
374 ST->isNeonAvailable());
398 assert(Ty->isIntegerTy());
400 unsigned BitSize = Ty->getPrimitiveSizeInBits();
407 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
412 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
418 return std::max<InstructionCost>(1,
Cost);
425 assert(Ty->isIntegerTy());
427 unsigned BitSize = Ty->getPrimitiveSizeInBits();
433 unsigned ImmIdx = ~0U;
437 case Instruction::GetElementPtr:
442 case Instruction::Store:
445 case Instruction::Add:
446 case Instruction::Sub:
447 case Instruction::Mul:
448 case Instruction::UDiv:
449 case Instruction::SDiv:
450 case Instruction::URem:
451 case Instruction::SRem:
452 case Instruction::And:
453 case Instruction::Or:
454 case Instruction::Xor:
455 case Instruction::ICmp:
459 case Instruction::Shl:
460 case Instruction::LShr:
461 case Instruction::AShr:
465 case Instruction::Trunc:
466 case Instruction::ZExt:
467 case Instruction::SExt:
468 case Instruction::IntToPtr:
469 case Instruction::PtrToInt:
470 case Instruction::BitCast:
471 case Instruction::PHI:
472 case Instruction::Call:
473 case Instruction::Select:
474 case Instruction::Ret:
475 case Instruction::Load:
480 int NumConstants = (BitSize + 63) / 64;
493 assert(Ty->isIntegerTy());
495 unsigned BitSize = Ty->getPrimitiveSizeInBits();
504 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
510 case Intrinsic::sadd_with_overflow:
511 case Intrinsic::uadd_with_overflow:
512 case Intrinsic::ssub_with_overflow:
513 case Intrinsic::usub_with_overflow:
514 case Intrinsic::smul_with_overflow:
515 case Intrinsic::umul_with_overflow:
517 int NumConstants = (BitSize + 63) / 64;
524 case Intrinsic::experimental_stackmap:
525 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
528 case Intrinsic::experimental_patchpoint_void:
529 case Intrinsic::experimental_patchpoint:
530 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
533 case Intrinsic::experimental_gc_statepoint:
534 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
544 if (TyWidth == 32 || TyWidth == 64)
568 unsigned TotalHistCnts = 1;
578 unsigned EC = VTy->getElementCount().getKnownMinValue();
583 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
585 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
589 TotalHistCnts = EC / NaturalVectorWidth;
609 switch (ICA.
getID()) {
610 case Intrinsic::experimental_vector_histogram_add: {
617 case Intrinsic::umin:
618 case Intrinsic::umax:
619 case Intrinsic::smin:
620 case Intrinsic::smax: {
621 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
622 MVT::v8i16, MVT::v2i32, MVT::v4i32,
623 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
627 if (LT.second == MVT::v2i64)
629 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
633 case Intrinsic::sadd_sat:
634 case Intrinsic::ssub_sat:
635 case Intrinsic::uadd_sat:
636 case Intrinsic::usub_sat: {
637 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
638 MVT::v8i16, MVT::v2i32, MVT::v4i32,
644 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
645 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
646 return LT.first * Instrs;
651 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
652 return LT.first * Instrs;
656 case Intrinsic::abs: {
657 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
658 MVT::v8i16, MVT::v2i32, MVT::v4i32,
661 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
665 case Intrinsic::bswap: {
666 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
667 MVT::v4i32, MVT::v2i64};
669 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
670 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
675 case Intrinsic::fmuladd: {
680 (EltTy->
isHalfTy() && ST->hasFullFP16()))
684 case Intrinsic::stepvector: {
693 Cost += AddCost * (LT.first - 1);
697 case Intrinsic::vector_extract:
698 case Intrinsic::vector_insert: {
711 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
712 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
720 getTLI()->getTypeConversion(
C, SubVecVT);
722 getTLI()->getTypeConversion(
C, VecVT);
730 case Intrinsic::bitreverse: {
732 {Intrinsic::bitreverse, MVT::i32, 1},
733 {Intrinsic::bitreverse, MVT::i64, 1},
734 {Intrinsic::bitreverse, MVT::v8i8, 1},
735 {Intrinsic::bitreverse, MVT::v16i8, 1},
736 {Intrinsic::bitreverse, MVT::v4i16, 2},
737 {Intrinsic::bitreverse, MVT::v8i16, 2},
738 {Intrinsic::bitreverse, MVT::v2i32, 2},
739 {Intrinsic::bitreverse, MVT::v4i32, 2},
740 {Intrinsic::bitreverse, MVT::v1i64, 2},
741 {Intrinsic::bitreverse, MVT::v2i64, 2},
749 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
750 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
751 return LegalisationCost.first * Entry->Cost + 1;
753 return LegalisationCost.first * Entry->Cost;
757 case Intrinsic::ctpop: {
758 if (!ST->hasNEON()) {
779 RetTy->getScalarSizeInBits()
782 return LT.first * Entry->Cost + ExtraCost;
786 case Intrinsic::sadd_with_overflow:
787 case Intrinsic::uadd_with_overflow:
788 case Intrinsic::ssub_with_overflow:
789 case Intrinsic::usub_with_overflow:
790 case Intrinsic::smul_with_overflow:
791 case Intrinsic::umul_with_overflow: {
793 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
794 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
795 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
796 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
797 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
798 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
799 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
800 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
801 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
802 {Intrinsic::usub_with_overflow, MVT::i8, 3},
803 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
804 {Intrinsic::usub_with_overflow, MVT::i16, 3},
805 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
806 {Intrinsic::usub_with_overflow, MVT::i32, 1},
807 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
808 {Intrinsic::usub_with_overflow, MVT::i64, 1},
809 {Intrinsic::smul_with_overflow, MVT::i8, 5},
810 {Intrinsic::umul_with_overflow, MVT::i8, 4},
811 {Intrinsic::smul_with_overflow, MVT::i16, 5},
812 {Intrinsic::umul_with_overflow, MVT::i16, 4},
813 {Intrinsic::smul_with_overflow, MVT::i32, 2},
814 {Intrinsic::umul_with_overflow, MVT::i32, 2},
815 {Intrinsic::smul_with_overflow, MVT::i64, 3},
816 {Intrinsic::umul_with_overflow, MVT::i64, 3},
818 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
825 case Intrinsic::fptosi_sat:
826 case Intrinsic::fptoui_sat: {
829 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
831 EVT MTy = TLI->getValueType(
DL, RetTy);
834 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
835 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
836 LT.second == MVT::v2f64)) {
838 (LT.second == MVT::f64 && MTy == MVT::i32) ||
839 (LT.second == MVT::f32 && MTy == MVT::i64)))
848 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
855 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
856 (LT.second == MVT::f16 && MTy == MVT::i64) ||
857 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
871 if ((LT.second.getScalarType() == MVT::f32 ||
872 LT.second.getScalarType() == MVT::f64 ||
873 LT.second.getScalarType() == MVT::f16) &&
877 if (LT.second.isVector())
881 LegalTy, {LegalTy, LegalTy});
884 LegalTy, {LegalTy, LegalTy});
886 return LT.first *
Cost +
887 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
893 RetTy = RetTy->getScalarType();
894 if (LT.second.isVector()) {
912 return LT.first *
Cost;
914 case Intrinsic::fshl:
915 case Intrinsic::fshr: {
927 {Intrinsic::fshl, MVT::v4i32, 2},
928 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
929 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
930 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
936 return LegalisationCost.first * Entry->Cost;
940 if (!RetTy->isIntegerTy())
945 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
946 RetTy->getScalarSizeInBits() < 64) ||
947 (RetTy->getScalarSizeInBits() % 64 != 0);
948 unsigned ExtraCost = HigherCost ? 1 : 0;
949 if (RetTy->getScalarSizeInBits() == 32 ||
950 RetTy->getScalarSizeInBits() == 64)
957 return TyL.first + ExtraCost;
959 case Intrinsic::get_active_lane_mask: {
962 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
964 if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
975 return RetTy->getNumElements() * 2;
980 case Intrinsic::experimental_vector_match: {
983 unsigned SearchSize = NeedleTy->getNumElements();
984 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
997 case Intrinsic::experimental_cttz_elts: {
999 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1018 auto RequiredType =
II.getType();
1021 assert(PN &&
"Expected Phi Node!");
1024 if (!PN->hasOneUse())
1025 return std::nullopt;
1027 for (
Value *IncValPhi : PN->incoming_values()) {
1030 Reinterpret->getIntrinsicID() !=
1031 Intrinsic::aarch64_sve_convert_to_svbool ||
1032 RequiredType != Reinterpret->getArgOperand(0)->getType())
1033 return std::nullopt;
1041 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1043 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1116 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1121 return GoverningPredicateIdx;
1126 GoverningPredicateIdx = Index;
1144 return UndefIntrinsic;
1149 UndefIntrinsic = IID;
1171 return ResultLanes == InactiveLanesTakenFromOperand;
1176 return OperandIdxForInactiveLanes;
1180 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1181 ResultLanes = InactiveLanesTakenFromOperand;
1182 OperandIdxForInactiveLanes = Index;
1187 return ResultLanes == InactiveLanesAreNotDefined;
1191 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1192 ResultLanes = InactiveLanesAreNotDefined;
1197 return ResultLanes == InactiveLanesAreUnused;
1201 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1202 ResultLanes = InactiveLanesAreUnused;
1212 ResultIsZeroInitialized =
true;
1223 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1228 return OperandIdxWithNoActiveLanes;
1233 OperandIdxWithNoActiveLanes = Index;
1238 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1241 unsigned IROpcode = 0;
1243 enum PredicationStyle {
1245 InactiveLanesTakenFromOperand,
1246 InactiveLanesAreNotDefined,
1247 InactiveLanesAreUnused
1250 bool ResultIsZeroInitialized =
false;
1251 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1252 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1260 return !isa<ScalableVectorType>(V->getType());
1268 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1269 case Intrinsic::aarch64_sve_fcvt_f16f32:
1270 case Intrinsic::aarch64_sve_fcvt_f16f64:
1271 case Intrinsic::aarch64_sve_fcvt_f32f16:
1272 case Intrinsic::aarch64_sve_fcvt_f32f64:
1273 case Intrinsic::aarch64_sve_fcvt_f64f16:
1274 case Intrinsic::aarch64_sve_fcvt_f64f32:
1275 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1276 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1277 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1278 case Intrinsic::aarch64_sve_fcvtzs:
1279 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1280 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1281 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1282 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1283 case Intrinsic::aarch64_sve_fcvtzu:
1284 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1285 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1286 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1287 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1288 case Intrinsic::aarch64_sve_scvtf:
1289 case Intrinsic::aarch64_sve_scvtf_f16i32:
1290 case Intrinsic::aarch64_sve_scvtf_f16i64:
1291 case Intrinsic::aarch64_sve_scvtf_f32i64:
1292 case Intrinsic::aarch64_sve_scvtf_f64i32:
1293 case Intrinsic::aarch64_sve_ucvtf:
1294 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1295 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1296 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1297 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1300 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1301 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1302 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1303 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1306 case Intrinsic::aarch64_sve_fabd:
1308 case Intrinsic::aarch64_sve_fadd:
1311 case Intrinsic::aarch64_sve_fdiv:
1314 case Intrinsic::aarch64_sve_fmax:
1316 case Intrinsic::aarch64_sve_fmaxnm:
1318 case Intrinsic::aarch64_sve_fmin:
1320 case Intrinsic::aarch64_sve_fminnm:
1322 case Intrinsic::aarch64_sve_fmla:
1324 case Intrinsic::aarch64_sve_fmls:
1326 case Intrinsic::aarch64_sve_fmul:
1329 case Intrinsic::aarch64_sve_fmulx:
1331 case Intrinsic::aarch64_sve_fnmla:
1333 case Intrinsic::aarch64_sve_fnmls:
1335 case Intrinsic::aarch64_sve_fsub:
1338 case Intrinsic::aarch64_sve_add:
1341 case Intrinsic::aarch64_sve_mla:
1343 case Intrinsic::aarch64_sve_mls:
1345 case Intrinsic::aarch64_sve_mul:
1348 case Intrinsic::aarch64_sve_sabd:
1350 case Intrinsic::aarch64_sve_sdiv:
1353 case Intrinsic::aarch64_sve_smax:
1355 case Intrinsic::aarch64_sve_smin:
1357 case Intrinsic::aarch64_sve_smulh:
1359 case Intrinsic::aarch64_sve_sub:
1362 case Intrinsic::aarch64_sve_uabd:
1364 case Intrinsic::aarch64_sve_udiv:
1367 case Intrinsic::aarch64_sve_umax:
1369 case Intrinsic::aarch64_sve_umin:
1371 case Intrinsic::aarch64_sve_umulh:
1373 case Intrinsic::aarch64_sve_asr:
1376 case Intrinsic::aarch64_sve_lsl:
1379 case Intrinsic::aarch64_sve_lsr:
1382 case Intrinsic::aarch64_sve_and:
1385 case Intrinsic::aarch64_sve_bic:
1387 case Intrinsic::aarch64_sve_eor:
1390 case Intrinsic::aarch64_sve_orr:
1393 case Intrinsic::aarch64_sve_sqsub:
1395 case Intrinsic::aarch64_sve_uqsub:
1398 case Intrinsic::aarch64_sve_add_u:
1401 case Intrinsic::aarch64_sve_and_u:
1404 case Intrinsic::aarch64_sve_asr_u:
1407 case Intrinsic::aarch64_sve_eor_u:
1410 case Intrinsic::aarch64_sve_fadd_u:
1413 case Intrinsic::aarch64_sve_fdiv_u:
1416 case Intrinsic::aarch64_sve_fmul_u:
1419 case Intrinsic::aarch64_sve_fsub_u:
1422 case Intrinsic::aarch64_sve_lsl_u:
1425 case Intrinsic::aarch64_sve_lsr_u:
1428 case Intrinsic::aarch64_sve_mul_u:
1431 case Intrinsic::aarch64_sve_orr_u:
1434 case Intrinsic::aarch64_sve_sdiv_u:
1437 case Intrinsic::aarch64_sve_sub_u:
1440 case Intrinsic::aarch64_sve_udiv_u:
1444 case Intrinsic::aarch64_sve_addqv:
1445 case Intrinsic::aarch64_sve_and_z:
1446 case Intrinsic::aarch64_sve_bic_z:
1447 case Intrinsic::aarch64_sve_brka_z:
1448 case Intrinsic::aarch64_sve_brkb_z:
1449 case Intrinsic::aarch64_sve_brkn_z:
1450 case Intrinsic::aarch64_sve_brkpa_z:
1451 case Intrinsic::aarch64_sve_brkpb_z:
1452 case Intrinsic::aarch64_sve_cntp:
1453 case Intrinsic::aarch64_sve_compact:
1454 case Intrinsic::aarch64_sve_eor_z:
1455 case Intrinsic::aarch64_sve_eorv:
1456 case Intrinsic::aarch64_sve_eorqv:
1457 case Intrinsic::aarch64_sve_nand_z:
1458 case Intrinsic::aarch64_sve_nor_z:
1459 case Intrinsic::aarch64_sve_orn_z:
1460 case Intrinsic::aarch64_sve_orr_z:
1461 case Intrinsic::aarch64_sve_orv:
1462 case Intrinsic::aarch64_sve_orqv:
1463 case Intrinsic::aarch64_sve_pnext:
1464 case Intrinsic::aarch64_sve_rdffr_z:
1465 case Intrinsic::aarch64_sve_saddv:
1466 case Intrinsic::aarch64_sve_uaddv:
1467 case Intrinsic::aarch64_sve_umaxv:
1468 case Intrinsic::aarch64_sve_umaxqv:
1469 case Intrinsic::aarch64_sve_cmpeq:
1470 case Intrinsic::aarch64_sve_cmpeq_wide:
1471 case Intrinsic::aarch64_sve_cmpge:
1472 case Intrinsic::aarch64_sve_cmpge_wide:
1473 case Intrinsic::aarch64_sve_cmpgt:
1474 case Intrinsic::aarch64_sve_cmpgt_wide:
1475 case Intrinsic::aarch64_sve_cmphi:
1476 case Intrinsic::aarch64_sve_cmphi_wide:
1477 case Intrinsic::aarch64_sve_cmphs:
1478 case Intrinsic::aarch64_sve_cmphs_wide:
1479 case Intrinsic::aarch64_sve_cmple_wide:
1480 case Intrinsic::aarch64_sve_cmplo_wide:
1481 case Intrinsic::aarch64_sve_cmpls_wide:
1482 case Intrinsic::aarch64_sve_cmplt_wide:
1483 case Intrinsic::aarch64_sve_cmpne:
1484 case Intrinsic::aarch64_sve_cmpne_wide:
1485 case Intrinsic::aarch64_sve_facge:
1486 case Intrinsic::aarch64_sve_facgt:
1487 case Intrinsic::aarch64_sve_fcmpeq:
1488 case Intrinsic::aarch64_sve_fcmpge:
1489 case Intrinsic::aarch64_sve_fcmpgt:
1490 case Intrinsic::aarch64_sve_fcmpne:
1491 case Intrinsic::aarch64_sve_fcmpuo:
1492 case Intrinsic::aarch64_sve_ld1:
1493 case Intrinsic::aarch64_sve_ld1_gather:
1494 case Intrinsic::aarch64_sve_ld1_gather_index:
1495 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1496 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1497 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1498 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1499 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1500 case Intrinsic::aarch64_sve_ld1q_gather_index:
1501 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1502 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1503 case Intrinsic::aarch64_sve_ld1ro:
1504 case Intrinsic::aarch64_sve_ld1rq:
1505 case Intrinsic::aarch64_sve_ld1udq:
1506 case Intrinsic::aarch64_sve_ld1uwq:
1507 case Intrinsic::aarch64_sve_ld2_sret:
1508 case Intrinsic::aarch64_sve_ld2q_sret:
1509 case Intrinsic::aarch64_sve_ld3_sret:
1510 case Intrinsic::aarch64_sve_ld3q_sret:
1511 case Intrinsic::aarch64_sve_ld4_sret:
1512 case Intrinsic::aarch64_sve_ld4q_sret:
1513 case Intrinsic::aarch64_sve_ldff1:
1514 case Intrinsic::aarch64_sve_ldff1_gather:
1515 case Intrinsic::aarch64_sve_ldff1_gather_index:
1516 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1517 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1518 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1519 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1520 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1521 case Intrinsic::aarch64_sve_ldnf1:
1522 case Intrinsic::aarch64_sve_ldnt1:
1523 case Intrinsic::aarch64_sve_ldnt1_gather:
1524 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1525 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1526 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1529 case Intrinsic::aarch64_sve_prf:
1530 case Intrinsic::aarch64_sve_prfb_gather_index:
1531 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1532 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1533 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1534 case Intrinsic::aarch64_sve_prfd_gather_index:
1535 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1536 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1537 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1538 case Intrinsic::aarch64_sve_prfh_gather_index:
1539 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1540 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1541 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1542 case Intrinsic::aarch64_sve_prfw_gather_index:
1543 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1544 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1545 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1548 case Intrinsic::aarch64_sve_st1_scatter:
1549 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1550 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1551 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1552 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1553 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1554 case Intrinsic::aarch64_sve_st1dq:
1555 case Intrinsic::aarch64_sve_st1q_scatter_index:
1556 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1557 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1558 case Intrinsic::aarch64_sve_st1wq:
1559 case Intrinsic::aarch64_sve_stnt1:
1560 case Intrinsic::aarch64_sve_stnt1_scatter:
1561 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1562 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1563 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1565 case Intrinsic::aarch64_sve_st2:
1566 case Intrinsic::aarch64_sve_st2q:
1568 case Intrinsic::aarch64_sve_st3:
1569 case Intrinsic::aarch64_sve_st3q:
1571 case Intrinsic::aarch64_sve_st4:
1572 case Intrinsic::aarch64_sve_st4q:
1581 Value *UncastedPred;
1589 Pred = UncastedPred;
1591 return (
C &&
C->isAllOnesValue());
1598 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1599 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1607static std::optional<Instruction *>
1614 Value *Op1 =
II.getOperand(1);
1615 Value *Op2 =
II.getOperand(2);
1641 return std::nullopt;
1649 if (SimpleII == Inactive)
1659static std::optional<Instruction *>
1663 return std::nullopt;
1692 II.setCalledFunction(NewDecl);
1702 return std::nullopt;
1714static std::optional<Instruction *>
1718 return std::nullopt;
1720 auto IntrinsicID = BinOp->getIntrinsicID();
1721 switch (IntrinsicID) {
1722 case Intrinsic::aarch64_sve_and_z:
1723 case Intrinsic::aarch64_sve_bic_z:
1724 case Intrinsic::aarch64_sve_eor_z:
1725 case Intrinsic::aarch64_sve_nand_z:
1726 case Intrinsic::aarch64_sve_nor_z:
1727 case Intrinsic::aarch64_sve_orn_z:
1728 case Intrinsic::aarch64_sve_orr_z:
1731 return std::nullopt;
1734 auto BinOpPred = BinOp->getOperand(0);
1735 auto BinOpOp1 = BinOp->getOperand(1);
1736 auto BinOpOp2 = BinOp->getOperand(2);
1740 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1741 return std::nullopt;
1743 auto PredOp = PredIntr->getOperand(0);
1745 if (PredOpTy !=
II.getType())
1746 return std::nullopt;
1750 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1751 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1752 if (BinOpOp1 == BinOpOp2)
1753 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1756 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1758 auto NarrowedBinOp =
1763static std::optional<Instruction *>
1770 return BinOpCombine;
1775 return std::nullopt;
1778 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1787 if (CursorVTy->getElementCount().getKnownMinValue() <
1788 IVTy->getElementCount().getKnownMinValue())
1792 if (Cursor->getType() == IVTy)
1793 EarliestReplacement = Cursor;
1798 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1799 Intrinsic::aarch64_sve_convert_to_svbool ||
1800 IntrinsicCursor->getIntrinsicID() ==
1801 Intrinsic::aarch64_sve_convert_from_svbool))
1804 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1805 Cursor = IntrinsicCursor->getOperand(0);
1810 if (!EarliestReplacement)
1811 return std::nullopt;
1819 auto *OpPredicate =
II.getOperand(0);
1832 return std::nullopt;
1835 return std::nullopt;
1837 const auto PTruePattern =
1839 if (PTruePattern != AArch64SVEPredPattern::vl1)
1840 return std::nullopt;
1845 II.getArgOperand(0),
II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1846 Insert->insertBefore(
II.getIterator());
1847 Insert->takeName(&
II);
1857 II.getArgOperand(0));
1867 return std::nullopt;
1872 if (!SplatValue || !SplatValue->isZero())
1873 return std::nullopt;
1878 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1879 return std::nullopt;
1883 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1884 return std::nullopt;
1887 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1888 return std::nullopt;
1893 return std::nullopt;
1896 return std::nullopt;
1900 return std::nullopt;
1904 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1905 return std::nullopt;
1907 unsigned NumElts = VecTy->getNumElements();
1908 unsigned PredicateBits = 0;
1911 for (
unsigned I = 0;
I < NumElts; ++
I) {
1914 return std::nullopt;
1916 PredicateBits |= 1 << (
I * (16 / NumElts));
1920 if (PredicateBits == 0) {
1922 PFalse->takeName(&
II);
1928 for (
unsigned I = 0;
I < 16; ++
I)
1929 if ((PredicateBits & (1 <<
I)) != 0)
1932 unsigned PredSize = Mask & -Mask;
1937 for (
unsigned I = 0;
I < 16;
I += PredSize)
1938 if ((PredicateBits & (1 <<
I)) == 0)
1939 return std::nullopt;
1944 {PredType}, {PTruePat});
1946 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1947 auto *ConvertFromSVBool =
1949 {
II.getType()}, {ConvertToSVBool});
1957 Value *Pg =
II.getArgOperand(0);
1958 Value *Vec =
II.getArgOperand(1);
1959 auto IntrinsicID =
II.getIntrinsicID();
1960 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
1972 auto OpC = OldBinOp->getOpcode();
1978 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
1984 if (IsAfter &&
C &&
C->isNullValue()) {
1988 Extract->insertBefore(
II.getIterator());
1989 Extract->takeName(&
II);
1995 return std::nullopt;
1997 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1998 return std::nullopt;
2000 const auto PTruePattern =
2006 return std::nullopt;
2008 unsigned Idx = MinNumElts - 1;
2018 if (Idx >= PgVTy->getMinNumElements())
2019 return std::nullopt;
2024 Extract->insertBefore(
II.getIterator());
2025 Extract->takeName(&
II);
2038 Value *Pg =
II.getArgOperand(0);
2040 Value *Vec =
II.getArgOperand(2);
2043 if (!Ty->isIntegerTy())
2044 return std::nullopt;
2049 return std::nullopt;
2066 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2079 {
II.getType()}, {AllPat});
2086static std::optional<Instruction *>
2090 if (
Pattern == AArch64SVEPredPattern::all) {
2099 return MinNumElts && NumElts >= MinNumElts
2101 II, ConstantInt::get(
II.getType(), MinNumElts)))
2105static std::optional<Instruction *>
2108 if (!ST->isStreaming())
2109 return std::nullopt;
2121 Value *PgVal =
II.getArgOperand(0);
2122 Value *OpVal =
II.getArgOperand(1);
2126 if (PgVal == OpVal &&
2127 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2128 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2143 return std::nullopt;
2147 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2148 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2162 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2163 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2164 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2165 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2166 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2167 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2168 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2169 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2170 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2171 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2172 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2173 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2174 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2184 return std::nullopt;
2187template <Intrinsic::ID MulOpc,
typename Intrinsic::ID FuseOpc>
2188static std::optional<Instruction *>
2190 bool MergeIntoAddendOp) {
2192 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2193 if (MergeIntoAddendOp) {
2194 AddendOp =
II.getOperand(1);
2195 Mul =
II.getOperand(2);
2197 AddendOp =
II.getOperand(2);
2198 Mul =
II.getOperand(1);
2203 return std::nullopt;
2205 if (!
Mul->hasOneUse())
2206 return std::nullopt;
2209 if (
II.getType()->isFPOrFPVectorTy()) {
2214 return std::nullopt;
2216 return std::nullopt;
2221 if (MergeIntoAddendOp)
2231static std::optional<Instruction *>
2233 Value *Pred =
II.getOperand(0);
2234 Value *PtrOp =
II.getOperand(1);
2235 Type *VecTy =
II.getType();
2239 Load->copyMetadata(
II);
2250static std::optional<Instruction *>
2252 Value *VecOp =
II.getOperand(0);
2253 Value *Pred =
II.getOperand(1);
2254 Value *PtrOp =
II.getOperand(2);
2258 Store->copyMetadata(
II);
2270 case Intrinsic::aarch64_sve_fmul_u:
2271 return Instruction::BinaryOps::FMul;
2272 case Intrinsic::aarch64_sve_fadd_u:
2273 return Instruction::BinaryOps::FAdd;
2274 case Intrinsic::aarch64_sve_fsub_u:
2275 return Instruction::BinaryOps::FSub;
2277 return Instruction::BinaryOpsEnd;
2281static std::optional<Instruction *>
2284 if (
II.isStrictFP())
2285 return std::nullopt;
2287 auto *OpPredicate =
II.getOperand(0);
2289 if (BinOpCode == Instruction::BinaryOpsEnd ||
2291 return std::nullopt;
2293 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2300 Intrinsic::aarch64_sve_mla>(
2304 Intrinsic::aarch64_sve_mad>(
2307 return std::nullopt;
2310static std::optional<Instruction *>
2314 Intrinsic::aarch64_sve_fmla>(IC,
II,
2319 Intrinsic::aarch64_sve_fmad>(IC,
II,
2324 Intrinsic::aarch64_sve_fmla>(IC,
II,
2327 return std::nullopt;
2330static std::optional<Instruction *>
2334 Intrinsic::aarch64_sve_fmla>(IC,
II,
2339 Intrinsic::aarch64_sve_fmad>(IC,
II,
2344 Intrinsic::aarch64_sve_fmla_u>(
2350static std::optional<Instruction *>
2354 Intrinsic::aarch64_sve_fmls>(IC,
II,
2359 Intrinsic::aarch64_sve_fnmsb>(
2364 Intrinsic::aarch64_sve_fmls>(IC,
II,
2367 return std::nullopt;
2370static std::optional<Instruction *>
2374 Intrinsic::aarch64_sve_fmls>(IC,
II,
2379 Intrinsic::aarch64_sve_fnmsb>(
2384 Intrinsic::aarch64_sve_fmls_u>(
2393 Intrinsic::aarch64_sve_mls>(
2396 return std::nullopt;
2401 Value *UnpackArg =
II.getArgOperand(0);
2403 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2404 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2417 return std::nullopt;
2421 auto *OpVal =
II.getOperand(0);
2422 auto *OpIndices =
II.getOperand(1);
2429 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2430 return std::nullopt;
2445 Type *RetTy =
II.getType();
2446 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2447 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2451 if ((
match(
II.getArgOperand(0),
2458 if (TyA ==
B->getType() &&
2463 TyA->getMinNumElements());
2469 return std::nullopt;
2477 if (
match(
II.getArgOperand(0),
2482 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2484 return std::nullopt;
2487static std::optional<Instruction *>
2489 Value *Mask =
II.getOperand(0);
2490 Value *BasePtr =
II.getOperand(1);
2491 Value *Index =
II.getOperand(2);
2502 BasePtr->getPointerAlignment(
II.getDataLayout());
2505 BasePtr, IndexBase);
2512 return std::nullopt;
2515static std::optional<Instruction *>
2517 Value *Val =
II.getOperand(0);
2518 Value *Mask =
II.getOperand(1);
2519 Value *BasePtr =
II.getOperand(2);
2520 Value *Index =
II.getOperand(3);
2530 BasePtr->getPointerAlignment(
II.getDataLayout());
2533 BasePtr, IndexBase);
2539 return std::nullopt;
2545 Value *Pred =
II.getOperand(0);
2546 Value *Vec =
II.getOperand(1);
2547 Value *DivVec =
II.getOperand(2);
2551 if (!SplatConstantInt)
2552 return std::nullopt;
2556 if (DivisorValue == -1)
2557 return std::nullopt;
2558 if (DivisorValue == 1)
2564 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2571 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2573 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2577 return std::nullopt;
2581 size_t VecSize = Vec.
size();
2586 size_t HalfVecSize = VecSize / 2;
2590 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2598 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2616 return std::nullopt;
2623 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2624 CurrentInsertElt = InsertElt->getOperand(0);
2630 return std::nullopt;
2634 for (
size_t I = 0;
I < Elts.
size();
I++) {
2635 if (Elts[
I] ==
nullptr)
2640 if (InsertEltChain ==
nullptr)
2641 return std::nullopt;
2647 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2648 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2649 IIScalableTy->getMinNumElements() /
2654 auto *WideShuffleMaskTy =
2665 auto NarrowBitcast =
2678 return std::nullopt;
2683 Value *Pred =
II.getOperand(0);
2684 Value *Vec =
II.getOperand(1);
2685 Value *Shift =
II.getOperand(2);
2688 Value *AbsPred, *MergedValue;
2694 return std::nullopt;
2702 return std::nullopt;
2707 return std::nullopt;
2710 {
II.getType()}, {Pred, Vec, Shift});
2717 Value *Vec =
II.getOperand(0);
2722 return std::nullopt;
2728 auto *NI =
II.getNextNode();
2731 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2733 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2734 auto *NIBB = NI->getParent();
2735 NI = NI->getNextNode();
2737 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2738 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2744 if (NextII &&
II.isIdenticalTo(NextII))
2747 return std::nullopt;
2755 {II.getType(), II.getOperand(0)->getType()},
2756 {II.getOperand(0), II.getOperand(1)}));
2763 return std::nullopt;
2769 Value *Passthru =
II.getOperand(0);
2777 auto *Mask = ConstantInt::get(Ty, MaskValue);
2783 return std::nullopt;
2786static std::optional<Instruction *>
2793 return std::nullopt;
2796std::optional<Instruction *>
2807 case Intrinsic::aarch64_dmb:
2809 case Intrinsic::aarch64_neon_fmaxnm:
2810 case Intrinsic::aarch64_neon_fminnm:
2812 case Intrinsic::aarch64_sve_convert_from_svbool:
2814 case Intrinsic::aarch64_sve_dup:
2816 case Intrinsic::aarch64_sve_dup_x:
2818 case Intrinsic::aarch64_sve_cmpne:
2819 case Intrinsic::aarch64_sve_cmpne_wide:
2821 case Intrinsic::aarch64_sve_rdffr:
2823 case Intrinsic::aarch64_sve_lasta:
2824 case Intrinsic::aarch64_sve_lastb:
2826 case Intrinsic::aarch64_sve_clasta_n:
2827 case Intrinsic::aarch64_sve_clastb_n:
2829 case Intrinsic::aarch64_sve_cntd:
2831 case Intrinsic::aarch64_sve_cntw:
2833 case Intrinsic::aarch64_sve_cnth:
2835 case Intrinsic::aarch64_sve_cntb:
2837 case Intrinsic::aarch64_sme_cntsd:
2839 case Intrinsic::aarch64_sve_ptest_any:
2840 case Intrinsic::aarch64_sve_ptest_first:
2841 case Intrinsic::aarch64_sve_ptest_last:
2843 case Intrinsic::aarch64_sve_fadd:
2845 case Intrinsic::aarch64_sve_fadd_u:
2847 case Intrinsic::aarch64_sve_fmul_u:
2849 case Intrinsic::aarch64_sve_fsub:
2851 case Intrinsic::aarch64_sve_fsub_u:
2853 case Intrinsic::aarch64_sve_add:
2855 case Intrinsic::aarch64_sve_add_u:
2857 Intrinsic::aarch64_sve_mla_u>(
2859 case Intrinsic::aarch64_sve_sub:
2861 case Intrinsic::aarch64_sve_sub_u:
2863 Intrinsic::aarch64_sve_mls_u>(
2865 case Intrinsic::aarch64_sve_tbl:
2867 case Intrinsic::aarch64_sve_uunpkhi:
2868 case Intrinsic::aarch64_sve_uunpklo:
2869 case Intrinsic::aarch64_sve_sunpkhi:
2870 case Intrinsic::aarch64_sve_sunpklo:
2872 case Intrinsic::aarch64_sve_uzp1:
2874 case Intrinsic::aarch64_sve_zip1:
2875 case Intrinsic::aarch64_sve_zip2:
2877 case Intrinsic::aarch64_sve_ld1_gather_index:
2879 case Intrinsic::aarch64_sve_st1_scatter_index:
2881 case Intrinsic::aarch64_sve_ld1:
2883 case Intrinsic::aarch64_sve_st1:
2885 case Intrinsic::aarch64_sve_sdiv:
2887 case Intrinsic::aarch64_sve_sel:
2889 case Intrinsic::aarch64_sve_srshl:
2891 case Intrinsic::aarch64_sve_dupq_lane:
2893 case Intrinsic::aarch64_sve_insr:
2895 case Intrinsic::aarch64_sve_whilelo:
2897 case Intrinsic::aarch64_sve_ptrue:
2899 case Intrinsic::aarch64_sve_uxtb:
2901 case Intrinsic::aarch64_sve_uxth:
2903 case Intrinsic::aarch64_sve_uxtw:
2905 case Intrinsic::aarch64_sme_in_streaming_mode:
2909 return std::nullopt;
2916 SimplifyAndSetOp)
const {
2917 switch (
II.getIntrinsicID()) {
2920 case Intrinsic::aarch64_neon_fcvtxn:
2921 case Intrinsic::aarch64_neon_rshrn:
2922 case Intrinsic::aarch64_neon_sqrshrn:
2923 case Intrinsic::aarch64_neon_sqrshrun:
2924 case Intrinsic::aarch64_neon_sqshrn:
2925 case Intrinsic::aarch64_neon_sqshrun:
2926 case Intrinsic::aarch64_neon_sqxtn:
2927 case Intrinsic::aarch64_neon_sqxtun:
2928 case Intrinsic::aarch64_neon_uqrshrn:
2929 case Intrinsic::aarch64_neon_uqshrn:
2930 case Intrinsic::aarch64_neon_uqxtn:
2931 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
2935 return std::nullopt;
2939 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
2949 if (ST->useSVEForFixedLengthVectors() &&
2952 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
2953 else if (ST->isNeonAvailable())
2958 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
2967bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
2969 Type *SrcOverrideTy)
const {
2984 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
2994 Type *SrcTy = SrcOverrideTy;
2996 case Instruction::Add:
2997 case Instruction::Sub:
3006 case Instruction::Mul: {
3042 assert(SrcTy &&
"Expected some SrcTy");
3044 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3050 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3052 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3056 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3068 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3069 (Src->isScalableTy() && !ST->hasSVE2()))
3079 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3083 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3087 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3088 Src->getScalarSizeInBits() !=
3112 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3116 if (
I &&
I->hasOneUser()) {
3119 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands, Src)) {
3123 if (SingleUser->getOpcode() == Instruction::Add) {
3124 if (
I == SingleUser->getOperand(1) ||
3126 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3141 return Cost == 0 ? 0 : 1;
3145 EVT SrcTy = TLI->getValueType(
DL, Src);
3146 EVT DstTy = TLI->getValueType(
DL, Dst);
3148 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3154 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3178 return AdjustCost(Entry->Cost);
3186 const unsigned int SVE_EXT_COST = 1;
3187 const unsigned int SVE_FCVT_COST = 1;
3188 const unsigned int SVE_UNPACK_ONCE = 4;
3189 const unsigned int SVE_UNPACK_TWICE = 16;
3267 {ISD::FP_EXTEND, MVT::f64, MVT::f32, 1},
3268 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1},
3269 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2},
3271 {ISD::FP_EXTEND, MVT::f32, MVT::f16, 1},
3272 {ISD::FP_EXTEND, MVT::f64, MVT::f16, 1},
3273 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
3274 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2},
3275 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2},
3276 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3},
3277 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6},
3279 {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1},
3280 {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2},
3281 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1},
3282 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2},
3283 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2},
3284 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3},
3285 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6},
3318 SVE_EXT_COST + SVE_FCVT_COST},
3323 SVE_EXT_COST + SVE_FCVT_COST},
3330 SVE_EXT_COST + SVE_FCVT_COST},
3334 SVE_EXT_COST + SVE_FCVT_COST},
3340 SVE_EXT_COST + SVE_FCVT_COST},
3343 SVE_EXT_COST + SVE_FCVT_COST},
3348 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3350 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3360 SVE_EXT_COST + SVE_FCVT_COST},
3365 SVE_EXT_COST + SVE_FCVT_COST},
3378 SVE_EXT_COST + SVE_FCVT_COST},
3382 SVE_EXT_COST + SVE_FCVT_COST},
3394 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3396 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3398 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3400 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3404 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3406 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3422 SVE_EXT_COST + SVE_FCVT_COST},
3427 SVE_EXT_COST + SVE_FCVT_COST},
3438 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3440 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3442 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3444 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3446 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3448 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3452 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3454 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3456 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3458 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3602 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
3603 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
3604 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
3607 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1},
3608 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1},
3609 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4},
3612 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
3613 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
3614 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
3617 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2},
3618 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6},
3619 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14},
3622 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
3623 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
3624 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
3627 {ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0},
3628 {ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0},
3629 {ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0},
3632 {ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0},
3633 {ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0},
3634 {ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0},
3657 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3660 ST->useSVEForFixedLengthVectors(WiderTy)) {
3661 std::pair<InstructionCost, MVT> LT =
3663 unsigned NumElements =
3675 return AdjustCost(Entry->Cost);
3702 if (ST->hasFullFP16())
3705 return AdjustCost(Entry->Cost);
3723 ST->isSVEorStreamingSVEAvailable() &&
3724 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3726 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3735 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3738 return Part1 + Part2;
3745 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3758 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3771 CostKind, Index,
nullptr,
nullptr);
3775 auto DstVT = TLI->getValueType(
DL, Dst);
3776 auto SrcVT = TLI->getValueType(
DL, Src);
3781 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3787 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3797 case Instruction::SExt:
3802 case Instruction::ZExt:
3803 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3816 return Opcode == Instruction::PHI ? 0 : 1;
3825 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
3833 if (!LT.second.isVector())
3838 if (LT.second.isFixedLengthVector()) {
3839 unsigned Width = LT.second.getVectorNumElements();
3840 Index = Index % Width;
3888 auto ExtractCanFuseWithFmul = [&]() {
3895 auto IsAllowedScalarTy = [&](
const Type *
T) {
3896 return T->isFloatTy() ||
T->isDoubleTy() ||
3897 (
T->isHalfTy() && ST->hasFullFP16());
3901 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
3904 return BO && BO->getOpcode() == BinaryOperator::FMul &&
3905 !BO->getType()->isVectorTy();
3910 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
3914 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
3923 DenseMap<User *, unsigned> UserToExtractIdx;
3924 for (
auto *U :
Scalar->users()) {
3925 if (!IsUserFMulScalarTy(U))
3929 UserToExtractIdx[
U];
3931 if (UserToExtractIdx.
empty())
3933 for (
auto &[S, U, L] : ScalarUserAndIdx) {
3934 for (
auto *U : S->users()) {
3935 if (UserToExtractIdx.
contains(U)) {
3937 auto *Op0 =
FMul->getOperand(0);
3938 auto *Op1 =
FMul->getOperand(1);
3939 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
3940 UserToExtractIdx[
U] =
L;
3946 for (
auto &[U, L] : UserToExtractIdx) {
3958 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
3959 if (!IsUserFMulScalarTy(U))
3964 const auto *BO = cast<BinaryOperator>(U);
3965 const auto *OtherEE = dyn_cast<ExtractElementInst>(
3966 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
3968 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
3971 return IsExtractLaneEquivalentToZero(
3972 cast<ConstantInt>(OtherEE->getIndexOperand())
3975 OtherEE->getType()->getScalarSizeInBits());
3983 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
3984 ExtractCanFuseWithFmul())
3989 :
ST->getVectorInsertExtractBaseCost();
3996 const Value *Op1)
const {
4000 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4003 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index);
4009 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
4010 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4017 unsigned Index)
const {
4018 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I);
4024 unsigned Index)
const {
4036 : ST->getVectorInsertExtractBaseCost() + 1;
4045 if (Ty->getElementType()->isFloatingPointTy())
4048 unsigned VecInstCost =
4050 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4057 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4058 return std::nullopt;
4059 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4060 return std::nullopt;
4067 Cost += InstCost(PromotedTy);
4090 Op2Info, Args, CxtI);
4094 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4101 Ty,
CostKind, Op1Info, Op2Info,
true,
4102 [&](
Type *PromotedTy) {
4106 return *PromotedCost;
4167 auto VT = TLI->getValueType(
DL, Ty);
4168 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4172 : (3 * AsrCost + AddCost);
4174 return MulCost + AsrCost + 2 * AddCost;
4176 }
else if (VT.isVector()) {
4186 if (Ty->isScalableTy() && ST->hasSVE())
4187 Cost += 2 * AsrCost;
4192 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4196 }
else if (LT.second == MVT::v2i64) {
4197 return VT.getVectorNumElements() *
4204 if (Ty->isScalableTy() && ST->hasSVE())
4205 return MulCost + 2 * AddCost + 2 * AsrCost;
4206 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4211 LT.second.isFixedLengthVector()) {
4221 return ExtractCost + InsertCost +
4229 auto VT = TLI->getValueType(
DL, Ty);
4245 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4246 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4247 LT.second == MVT::nxv16i8;
4248 bool Is128bit = LT.second.is128BitVector();
4260 (HasMULH ? 0 : ShrCost) +
4261 AddCost * 2 + ShrCost;
4262 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4269 if (!VT.isVector() && VT.getSizeInBits() > 64)
4273 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4275 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4279 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4289 if (
nullptr != Entry)
4294 if (LT.second.getScalarType() == MVT::i8)
4296 else if (LT.second.getScalarType() == MVT::i16)
4308 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4309 return (4 + DivCost) * VTy->getNumElements();
4315 -1,
nullptr,
nullptr);
4329 if (LT.second == MVT::v2i64 && ST->hasSVE())
4344 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
4366 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4367 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4376 if (!Ty->getScalarType()->isFP128Ty())
4383 if (!Ty->getScalarType()->isFP128Ty())
4384 return 2 * LT.first;
4391 if (!Ty->isVectorTy())
4407 int MaxMergeDistance = 64;
4411 return NumVectorInstToHideOverhead;
4421 unsigned Opcode1,
unsigned Opcode2)
const {
4424 if (!
Sched.hasInstrSchedModel())
4428 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4430 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4436 "Cannot handle variant scheduling classes without an MI");
4452 const int AmortizationCost = 20;
4460 VecPred = CurrentPred;
4468 static const auto ValidMinMaxTys = {
4469 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4470 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4471 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4474 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
4475 (ST->hasFullFP16() &&
4476 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
4481 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4482 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4483 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4484 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4485 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4486 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4487 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4488 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4489 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4490 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4491 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4493 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4494 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4503 if (Opcode == Instruction::FCmp) {
4505 ValTy,
CostKind, Op1Info, Op2Info,
false,
4506 [&](
Type *PromotedTy) {
4518 return *PromotedCost;
4522 if (LT.second.getScalarType() != MVT::f64 &&
4523 LT.second.getScalarType() != MVT::f32 &&
4524 LT.second.getScalarType() != MVT::f16)
4529 unsigned Factor = 1;
4544 AArch64::FCMEQv4f32))
4556 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4575 Op1Info, Op2Info,
I);
4581 if (ST->requiresStrictAlign()) {
4586 Options.AllowOverlappingLoads =
true;
4587 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4592 Options.LoadSizes = {8, 4, 2, 1};
4593 Options.AllowedTailExpansions = {3, 5, 6};
4598 return ST->hasSVE();
4609 if (!LT.first.isValid())
4614 if (VT->getElementType()->isIntegerTy(1))
4631 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4632 "Should be called on only load or stores.");
4634 case Instruction::Load:
4637 return ST->getGatherOverhead();
4639 case Instruction::Store:
4642 return ST->getScatterOverhead();
4650 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
4657 if (!LT.first.isValid())
4661 if (!LT.second.isVector() ||
4663 VT->getElementType()->isIntegerTy(1))
4673 ElementCount LegalVF = LT.second.getVectorElementCount();
4676 {TTI::OK_AnyValue, TTI::OP_None},
I);
4692 EVT VT = TLI->getValueType(
DL, Ty,
true);
4694 if (VT == MVT::Other)
4699 if (!LT.first.isValid())
4709 (VTy->getElementType()->isIntegerTy(1) &&
4710 !VTy->getElementCount().isKnownMultipleOf(
4721 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4722 LT.second.is128BitVector() && Alignment <
Align(16)) {
4728 const int AmortizationCost = 6;
4730 return LT.first * 2 * AmortizationCost;
4734 if (Ty->isPtrOrPtrVectorTy())
4739 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4741 if (VT == MVT::v4i8)
4748 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
4763 while (!TypeWorklist.
empty()) {
4785 bool UseMaskForCond,
bool UseMaskForGaps)
const {
4786 assert(Factor >= 2 &&
"Invalid interleave factor");
4801 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
4804 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
4805 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
4808 VecVTy->getElementCount().divideCoefficientBy(Factor));
4814 if (MinElts % Factor == 0 &&
4815 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
4816 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
4821 UseMaskForCond, UseMaskForGaps);
4828 for (
auto *
I : Tys) {
4829 if (!
I->isVectorTy())
4840 return ST->getMaxInterleaveFactor();
4850 enum { MaxStridedLoads = 7 };
4852 int StridedLoads = 0;
4855 for (
const auto BB : L->blocks()) {
4856 for (
auto &
I : *BB) {
4862 if (L->isLoopInvariant(PtrValue))
4867 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
4876 if (StridedLoads > MaxStridedLoads / 2)
4877 return StridedLoads;
4880 return StridedLoads;
4883 int StridedLoads = countStridedLoads(L, SE);
4885 <<
" strided loads\n");
4901 unsigned *FinalSize) {
4905 for (
auto *BB : L->getBlocks()) {
4906 for (
auto &
I : *BB) {
4912 if (!Cost.isValid())
4916 if (LoopCost > Budget)
4938 if (MaxTC > 0 && MaxTC <= 32)
4949 if (Blocks.
size() != 2)
4971 if (!L->isInnermost() || L->getNumBlocks() > 8)
4975 if (!L->getExitBlock())
4981 bool HasParellelizableReductions =
4982 L->getNumBlocks() == 1 &&
4983 any_of(L->getHeader()->phis(),
4985 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
4988 if (HasParellelizableReductions &&
5010 if (HasParellelizableReductions) {
5021 if (Header == Latch) {
5024 unsigned Width = 10;
5030 unsigned MaxInstsPerLine = 16;
5032 unsigned BestUC = 1;
5033 unsigned SizeWithBestUC = BestUC *
Size;
5035 unsigned SizeWithUC = UC *
Size;
5036 if (SizeWithUC > 48)
5038 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5039 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5041 SizeWithBestUC = BestUC *
Size;
5051 for (
auto *BB : L->blocks()) {
5052 for (
auto &
I : *BB) {
5062 for (
auto *U :
I.users())
5064 LoadedValuesPlus.
insert(U);
5071 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5084 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
5098 auto *I = dyn_cast<Instruction>(V);
5099 return I && DependsOnLoopLoad(I, Depth + 1);
5106 DependsOnLoopLoad(
I, 0)) {
5122 if (L->getLoopDepth() > 1)
5132 for (
auto *BB : L->getBlocks()) {
5133 for (
auto &
I : *BB) {
5137 if (IsVectorized &&
I.getType()->isVectorTy())
5150 switch (ST->getProcFamily()) {
5151 case AArch64Subtarget::AppleA14:
5152 case AArch64Subtarget::AppleA15:
5153 case AArch64Subtarget::AppleA16:
5154 case AArch64Subtarget::AppleM4:
5157 case AArch64Subtarget::Falkor:
5183 !ST->getSchedModel().isOutOfOrder()) {
5201 bool CanCreate)
const {
5205 case Intrinsic::aarch64_neon_st2:
5206 case Intrinsic::aarch64_neon_st3:
5207 case Intrinsic::aarch64_neon_st4: {
5210 if (!CanCreate || !ST)
5212 unsigned NumElts = Inst->
arg_size() - 1;
5213 if (ST->getNumElements() != NumElts)
5215 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5221 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5223 Res = Builder.CreateInsertValue(Res, L, i);
5227 case Intrinsic::aarch64_neon_ld2:
5228 case Intrinsic::aarch64_neon_ld3:
5229 case Intrinsic::aarch64_neon_ld4:
5230 if (Inst->
getType() == ExpectedType)
5241 case Intrinsic::aarch64_neon_ld2:
5242 case Intrinsic::aarch64_neon_ld3:
5243 case Intrinsic::aarch64_neon_ld4:
5244 Info.ReadMem =
true;
5245 Info.WriteMem =
false;
5248 case Intrinsic::aarch64_neon_st2:
5249 case Intrinsic::aarch64_neon_st3:
5250 case Intrinsic::aarch64_neon_st4:
5251 Info.ReadMem =
false;
5252 Info.WriteMem =
true;
5260 case Intrinsic::aarch64_neon_ld2:
5261 case Intrinsic::aarch64_neon_st2:
5262 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5264 case Intrinsic::aarch64_neon_ld3:
5265 case Intrinsic::aarch64_neon_st3:
5266 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5268 case Intrinsic::aarch64_neon_ld4:
5269 case Intrinsic::aarch64_neon_st4:
5270 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5282 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5283 bool Considerable =
false;
5284 AllowPromotionWithoutCommonHeader =
false;
5287 Type *ConsideredSExtType =
5289 if (
I.getType() != ConsideredSExtType)
5293 for (
const User *U :
I.users()) {
5295 Considerable =
true;
5299 if (GEPInst->getNumOperands() > 2) {
5300 AllowPromotionWithoutCommonHeader =
true;
5305 return Considerable;
5353 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5363 return LegalizationCost + 2;
5373 LegalizationCost *= LT.first - 1;
5376 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5385 return LegalizationCost + 2;
5393 std::optional<FastMathFlags> FMF,
5409 return BaseCost + FixedVTy->getNumElements();
5412 if (Opcode != Instruction::FAdd)
5426 MVT MTy = LT.second;
5427 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5475 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5476 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5478 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5488 return (LT.first - 1) +
Log2_32(NElts);
5493 return (LT.first - 1) + Entry->Cost;
5505 if (LT.first != 1) {
5511 ExtraCost *= LT.first - 1;
5514 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5515 return Cost + ExtraCost;
5523 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5525 EVT VecVT = TLI->getValueType(
DL, VecTy);
5526 EVT ResVT = TLI->getValueType(
DL, ResTy);
5536 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5538 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5540 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5542 return (LT.first - 1) * 2 + 2;
5553 EVT VecVT = TLI->getValueType(
DL, VecTy);
5554 EVT ResVT = TLI->getValueType(
DL, ResTy);
5557 RedOpcode == Instruction::Add) {
5563 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5565 return LT.first + 2;
5600 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5601 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5615 if (LT.second.getScalarType() == MVT::i1) {
5624 assert(Entry &&
"Illegal Type for Splice");
5625 LegalizationCost += Entry->Cost;
5626 return LegalizationCost * LT.first;
5630 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5642 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||
5650 if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB ||
5652 (OpAExtend != OpBExtend && !ST->hasMatMulInt8() &&
5653 !ST->isSVEorStreamingSVEAvailable())))
5656 "Unexpected values for OpBExtend or InputTypeB");
5664 if (!ST->isSVEorStreamingSVEAvailable())
5671 if (VFMinValue == Scale)
5675 (!ST->isNeonAvailable() || !ST->hasDotProd() || AccumEVT == MVT::i64))
5678 if (InputEVT == MVT::i8) {
5679 switch (VFMinValue) {
5683 if (AccumEVT == MVT::i32)
5685 else if (AccumEVT != MVT::i64)
5689 if (AccumEVT == MVT::i64)
5691 else if (AccumEVT != MVT::i32)
5695 }
else if (InputEVT == MVT::i16) {
5698 if (VFMinValue != 8 || AccumEVT != MVT::i64)
5714 "Expected the Mask to match the return size if given");
5716 "Expected the same scalar types");
5722 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
5723 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
5724 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
5732 return std::max<InstructionCost>(1, LT.first / 4);
5740 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
5742 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
5745 unsigned TpNumElts = Mask.size();
5746 unsigned LTNumElts = LT.second.getVectorNumElements();
5747 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
5749 LT.second.getVectorElementCount());
5751 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
5753 for (
unsigned N = 0;
N < NumVecs;
N++) {
5757 unsigned Source1 = -1U, Source2 = -1U;
5758 unsigned NumSources = 0;
5759 for (
unsigned E = 0; E < LTNumElts; E++) {
5760 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
5769 unsigned Source = MaskElt / LTNumElts;
5770 if (NumSources == 0) {
5773 }
else if (NumSources == 1 && Source != Source1) {
5776 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
5782 if (Source == Source1)
5784 else if (Source == Source2)
5785 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
5794 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
5805 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
5808 Result.first->second = NCost;
5822 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
5823 if (LT.second.getFixedSizeInBits() >= 128 &&
5825 LT.second.getVectorNumElements() / 2) {
5828 if (Index == (
int)LT.second.getVectorNumElements() / 2)
5842 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
5843 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
5852 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
5853 ST->isSVEorStreamingSVEAvailable() &&
5858 if (ST->isSVEorStreamingSVEAvailable() &&
5872 if (IsLoad && LT.second.isVector() &&
5874 LT.second.getVectorElementCount()))
5880 if (Mask.size() == 4 &&
5882 (SrcTy->getScalarSizeInBits() == 16 ||
5883 SrcTy->getScalarSizeInBits() == 32) &&
5884 all_of(Mask, [](
int E) {
return E < 8; }))
5888 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
5891 return M.value() < 0 || M.value() == (int)M.index();
5898 if (LT.second.isFixedLengthVector() &&
5899 LT.second.getVectorNumElements() == Mask.size() &&
5901 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
5902 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
5903 isREVMask(Mask, LT.second.getScalarSizeInBits(),
5904 LT.second.getVectorNumElements(), 16) ||
5905 isREVMask(Mask, LT.second.getScalarSizeInBits(),
5906 LT.second.getVectorNumElements(), 32) ||
5907 isREVMask(Mask, LT.second.getScalarSizeInBits(),
5908 LT.second.getVectorNumElements(), 64) ||
5911 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6040 return LT.first * Entry->Cost;
6049 LT.second.getSizeInBits() <= 128 && SubTp) {
6051 if (SubLT.second.isVector()) {
6052 int NumElts = LT.second.getVectorNumElements();
6053 int NumSubElts = SubLT.second.getVectorNumElements();
6054 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6060 if (IsExtractSubvector)
6095 return ST->useFixedOverScalableIfEqualCost();
6099 return ST->getEpilogueVectorizationMinVF();
6133 unsigned NumInsns = 0;
6135 NumInsns += BB->sizeWithoutDebug();
6145 int64_t Scale,
unsigned AddrSpace)
const {
6173 if (
I->getOpcode() == Instruction::Or &&
6178 if (
I->getOpcode() == Instruction::Add ||
6179 I->getOpcode() == Instruction::Sub)
6204 return all_equal(Shuf->getShuffleMask());
6211 bool AllowSplat =
false) {
6216 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6217 auto *FullTy = FullV->
getType();
6218 auto *HalfTy = HalfV->getType();
6220 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6223 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6226 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6230 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6244 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6245 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6259 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6260 (M2Start != 0 && M2Start != (NumElements / 2)))
6262 if (S1Op1 && S2Op1 && M1Start != M2Start)
6272 return Ext->getType()->getScalarSizeInBits() ==
6273 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6287 Value *VectorOperand =
nullptr;
6304 if (!
GEP ||
GEP->getNumOperands() != 2)
6308 Value *Offsets =
GEP->getOperand(1);
6311 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6317 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6318 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6319 Ops.push_back(&
GEP->getOperandUse(1));
6353 switch (
II->getIntrinsicID()) {
6354 case Intrinsic::aarch64_neon_smull:
6355 case Intrinsic::aarch64_neon_umull:
6358 Ops.push_back(&
II->getOperandUse(0));
6359 Ops.push_back(&
II->getOperandUse(1));
6364 case Intrinsic::fma:
6365 case Intrinsic::fmuladd:
6371 case Intrinsic::aarch64_neon_sqdmull:
6372 case Intrinsic::aarch64_neon_sqdmulh:
6373 case Intrinsic::aarch64_neon_sqrdmulh:
6376 Ops.push_back(&
II->getOperandUse(0));
6378 Ops.push_back(&
II->getOperandUse(1));
6379 return !
Ops.empty();
6380 case Intrinsic::aarch64_neon_fmlal:
6381 case Intrinsic::aarch64_neon_fmlal2:
6382 case Intrinsic::aarch64_neon_fmlsl:
6383 case Intrinsic::aarch64_neon_fmlsl2:
6386 Ops.push_back(&
II->getOperandUse(1));
6388 Ops.push_back(&
II->getOperandUse(2));
6389 return !
Ops.empty();
6390 case Intrinsic::aarch64_sve_ptest_first:
6391 case Intrinsic::aarch64_sve_ptest_last:
6393 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6394 Ops.push_back(&
II->getOperandUse(0));
6395 return !
Ops.empty();
6396 case Intrinsic::aarch64_sme_write_horiz:
6397 case Intrinsic::aarch64_sme_write_vert:
6398 case Intrinsic::aarch64_sme_writeq_horiz:
6399 case Intrinsic::aarch64_sme_writeq_vert: {
6401 if (!Idx || Idx->getOpcode() != Instruction::Add)
6403 Ops.push_back(&
II->getOperandUse(1));
6406 case Intrinsic::aarch64_sme_read_horiz:
6407 case Intrinsic::aarch64_sme_read_vert:
6408 case Intrinsic::aarch64_sme_readq_horiz:
6409 case Intrinsic::aarch64_sme_readq_vert:
6410 case Intrinsic::aarch64_sme_ld1b_vert:
6411 case Intrinsic::aarch64_sme_ld1h_vert:
6412 case Intrinsic::aarch64_sme_ld1w_vert:
6413 case Intrinsic::aarch64_sme_ld1d_vert:
6414 case Intrinsic::aarch64_sme_ld1q_vert:
6415 case Intrinsic::aarch64_sme_st1b_vert:
6416 case Intrinsic::aarch64_sme_st1h_vert:
6417 case Intrinsic::aarch64_sme_st1w_vert:
6418 case Intrinsic::aarch64_sme_st1d_vert:
6419 case Intrinsic::aarch64_sme_st1q_vert:
6420 case Intrinsic::aarch64_sme_ld1b_horiz:
6421 case Intrinsic::aarch64_sme_ld1h_horiz:
6422 case Intrinsic::aarch64_sme_ld1w_horiz:
6423 case Intrinsic::aarch64_sme_ld1d_horiz:
6424 case Intrinsic::aarch64_sme_ld1q_horiz:
6425 case Intrinsic::aarch64_sme_st1b_horiz:
6426 case Intrinsic::aarch64_sme_st1h_horiz:
6427 case Intrinsic::aarch64_sme_st1w_horiz:
6428 case Intrinsic::aarch64_sme_st1d_horiz:
6429 case Intrinsic::aarch64_sme_st1q_horiz: {
6431 if (!Idx || Idx->getOpcode() != Instruction::Add)
6433 Ops.push_back(&
II->getOperandUse(3));
6436 case Intrinsic::aarch64_neon_pmull:
6439 Ops.push_back(&
II->getOperandUse(0));
6440 Ops.push_back(&
II->getOperandUse(1));
6442 case Intrinsic::aarch64_neon_pmull64:
6444 II->getArgOperand(1)))
6446 Ops.push_back(&
II->getArgOperandUse(0));
6447 Ops.push_back(&
II->getArgOperandUse(1));
6449 case Intrinsic::masked_gather:
6452 Ops.push_back(&
II->getArgOperandUse(0));
6454 case Intrinsic::masked_scatter:
6457 Ops.push_back(&
II->getArgOperandUse(1));
6464 auto ShouldSinkCondition = [](
Value *
Cond,
6469 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6473 Ops.push_back(&
II->getOperandUse(0));
6477 switch (
I->getOpcode()) {
6478 case Instruction::GetElementPtr:
6479 case Instruction::Add:
6480 case Instruction::Sub:
6482 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6484 Ops.push_back(&
I->getOperandUse(
Op));
6489 case Instruction::Select: {
6490 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6493 Ops.push_back(&
I->getOperandUse(0));
6496 case Instruction::Br: {
6503 Ops.push_back(&
I->getOperandUse(0));
6510 if (!
I->getType()->isVectorTy())
6513 switch (
I->getOpcode()) {
6514 case Instruction::Sub:
6515 case Instruction::Add: {
6524 Ops.push_back(&Ext1->getOperandUse(0));
6525 Ops.push_back(&Ext2->getOperandUse(0));
6528 Ops.push_back(&
I->getOperandUse(0));
6529 Ops.push_back(&
I->getOperandUse(1));
6533 case Instruction::Or: {
6536 if (ST->hasNEON()) {
6550 if (
I->getParent() != MainAnd->
getParent() ||
6555 if (
I->getParent() != IA->getParent() ||
6556 I->getParent() != IB->getParent())
6561 Ops.push_back(&
I->getOperandUse(0));
6562 Ops.push_back(&
I->getOperandUse(1));
6571 case Instruction::Mul: {
6572 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6575 if (Ty->isScalableTy())
6579 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6582 int NumZExts = 0, NumSExts = 0;
6583 for (
auto &
Op :
I->operands()) {
6590 auto *ExtOp = Ext->getOperand(0);
6591 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6592 Ops.push_back(&Ext->getOperandUse(0));
6633 if (!ElementConstant || !ElementConstant->
isZero())
6636 unsigned Opcode = OperandInstr->
getOpcode();
6637 if (Opcode == Instruction::SExt)
6639 else if (Opcode == Instruction::ZExt)
6644 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
6654 Ops.push_back(&Insert->getOperandUse(1));
6660 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
6664 if (!ShouldSinkSplatForIndexedVariant(
I))
6669 Ops.push_back(&
I->getOperandUse(0));
6671 Ops.push_back(&
I->getOperandUse(1));
6673 return !
Ops.empty();
6675 case Instruction::FMul: {
6677 if (
I->getType()->isScalableTy())
6686 Ops.push_back(&
I->getOperandUse(0));
6688 Ops.push_back(&
I->getOperandUse(1));
6689 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static unsigned getNumElements(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const override
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool enableScalableVectorization() const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...