23#include "llvm/IR/IntrinsicsAArch64.h"
34#define DEBUG_TYPE "aarch64tti"
40 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
58 "Penalty of calling a function that requires a change to PSTATE.SM"));
62 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
73 cl::desc(
"The cost of a histcnt instruction"));
77 cl::desc(
"The number of instructions to search for a redundant dmb"));
80class TailFoldingOption {
95 bool NeedsDefault =
true;
99 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
114 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
115 "Initial bits should only include one of "
116 "(disabled|all|simple|default)");
117 Bits = NeedsDefault ? DefaultBits : InitialBits;
119 Bits &= ~DisableBits;
125 errs() <<
"invalid argument '" << Opt
126 <<
"' to -sve-tail-folding=; the option should be of the form\n"
127 " (disabled|all|default|simple)[+(reductions|recurrences"
128 "|reverse|noreductions|norecurrences|noreverse)]\n";
134 void operator=(
const std::string &Val) {
143 setNeedsDefault(
false);
148 unsigned StartIdx = 1;
149 if (TailFoldTypes[0] ==
"disabled")
150 setInitialBits(TailFoldingOpts::Disabled);
151 else if (TailFoldTypes[0] ==
"all")
152 setInitialBits(TailFoldingOpts::All);
153 else if (TailFoldTypes[0] ==
"default")
154 setNeedsDefault(
true);
155 else if (TailFoldTypes[0] ==
"simple")
156 setInitialBits(TailFoldingOpts::Simple);
159 setInitialBits(TailFoldingOpts::Disabled);
162 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
163 if (TailFoldTypes[
I] ==
"reductions")
164 setEnableBit(TailFoldingOpts::Reductions);
165 else if (TailFoldTypes[
I] ==
"recurrences")
166 setEnableBit(TailFoldingOpts::Recurrences);
167 else if (TailFoldTypes[
I] ==
"reverse")
168 setEnableBit(TailFoldingOpts::Reverse);
169 else if (TailFoldTypes[
I] ==
"noreductions")
170 setDisableBit(TailFoldingOpts::Reductions);
171 else if (TailFoldTypes[
I] ==
"norecurrences")
172 setDisableBit(TailFoldingOpts::Recurrences);
173 else if (TailFoldTypes[
I] ==
"noreverse")
174 setDisableBit(TailFoldingOpts::Reverse);
191 "Control the use of vectorisation using tail-folding for SVE where the"
192 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
193 "\ndisabled (Initial) No loop types will vectorize using "
195 "\ndefault (Initial) Uses the default tail-folding settings for "
197 "\nall (Initial) All legal loop types will vectorize using "
199 "\nsimple (Initial) Use tail-folding for simple loops (not "
200 "reductions or recurrences)"
201 "\nreductions Use tail-folding for loops containing reductions"
202 "\nnoreductions Inverse of above"
203 "\nrecurrences Use tail-folding for loops containing fixed order "
205 "\nnorecurrences Inverse of above"
206 "\nreverse Use tail-folding for loops requiring reversed "
208 "\nnoreverse Inverse of above"),
226 .
Case(
"__arm_sme_state",
true)
227 .
Case(
"__arm_tpidr2_save",
true)
228 .
Case(
"__arm_tpidr2_restore",
true)
229 .
Case(
"__arm_za_disable",
true)
243 if (isa<CallInst>(
I) && !
I.isDebugOrPseudoInst() &&
244 (cast<CallInst>(
I).isInlineAsm() || isa<IntrinsicInst>(
I) ||
255 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
257 FeatureStr.
split(Features,
",");
262 return F.hasFnAttribute(
"fmv-features");
267 SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee);
279 if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
280 CallerAttrs.requiresSMChange(CalleeAttrs) ||
281 CallerAttrs.requiresPreservingZT0(CalleeAttrs) ||
282 CallerAttrs.requiresPreservingAllZAState(CalleeAttrs)) {
305 auto FVTy = dyn_cast<FixedVectorType>(Ty);
307 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
316 unsigned DefaultCallPenalty)
const {
339 if (
F == Call.getCaller())
345 return DefaultCallPenalty;
384 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
389 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
395 return std::max<InstructionCost>(1,
Cost);
410 unsigned ImmIdx = ~0U;
414 case Instruction::GetElementPtr:
419 case Instruction::Store:
422 case Instruction::Add:
423 case Instruction::Sub:
424 case Instruction::Mul:
425 case Instruction::UDiv:
426 case Instruction::SDiv:
427 case Instruction::URem:
428 case Instruction::SRem:
429 case Instruction::And:
430 case Instruction::Or:
431 case Instruction::Xor:
432 case Instruction::ICmp:
436 case Instruction::Shl:
437 case Instruction::LShr:
438 case Instruction::AShr:
442 case Instruction::Trunc:
443 case Instruction::ZExt:
444 case Instruction::SExt:
445 case Instruction::IntToPtr:
446 case Instruction::PtrToInt:
447 case Instruction::BitCast:
448 case Instruction::PHI:
449 case Instruction::Call:
450 case Instruction::Select:
451 case Instruction::Ret:
452 case Instruction::Load:
457 int NumConstants = (BitSize + 63) / 64;
481 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
487 case Intrinsic::sadd_with_overflow:
488 case Intrinsic::uadd_with_overflow:
489 case Intrinsic::ssub_with_overflow:
490 case Intrinsic::usub_with_overflow:
491 case Intrinsic::smul_with_overflow:
492 case Intrinsic::umul_with_overflow:
494 int NumConstants = (BitSize + 63) / 64;
501 case Intrinsic::experimental_stackmap:
502 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
505 case Intrinsic::experimental_patchpoint_void:
506 case Intrinsic::experimental_patchpoint:
507 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
510 case Intrinsic::experimental_gc_statepoint:
511 if ((
Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
521 if (TyWidth == 32 || TyWidth == 64)
535 unsigned TotalHistCnts = 1;
544 if (
VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy)) {
545 unsigned EC = VTy->getElementCount().getKnownMinValue();
550 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
552 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
556 TotalHistCnts = EC / NaturalVectorWidth;
570 if (
auto *VTy = dyn_cast<ScalableVectorType>(
RetTy))
574 switch (ICA.
getID()) {
575 case Intrinsic::experimental_vector_histogram_add:
579 case Intrinsic::umin:
580 case Intrinsic::umax:
581 case Intrinsic::smin:
582 case Intrinsic::smax: {
583 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
584 MVT::v8i16, MVT::v2i32, MVT::v4i32,
585 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
589 if (LT.second == MVT::v2i64)
591 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }))
595 case Intrinsic::sadd_sat:
596 case Intrinsic::ssub_sat:
597 case Intrinsic::uadd_sat:
598 case Intrinsic::usub_sat: {
599 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
600 MVT::v8i16, MVT::v2i32, MVT::v4i32,
606 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits() ? 1 : 4;
607 if (
any_of(ValidSatTys, [<](
MVT M) {
return M == LT.second; }))
608 return LT.first * Instrs;
611 case Intrinsic::abs: {
612 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
613 MVT::v8i16, MVT::v2i32, MVT::v4i32,
616 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }))
620 case Intrinsic::bswap: {
621 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
622 MVT::v4i32, MVT::v2i64};
624 if (
any_of(ValidAbsTys, [<](
MVT M) {
return M == LT.second; }) &&
625 LT.second.getScalarSizeInBits() ==
RetTy->getScalarSizeInBits())
629 case Intrinsic::stepvector: {
638 Cost += AddCost * (LT.first - 1);
642 case Intrinsic::vector_extract:
643 case Intrinsic::vector_insert: {
656 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
675 case Intrinsic::bitreverse: {
677 {Intrinsic::bitreverse, MVT::i32, 1},
678 {Intrinsic::bitreverse, MVT::i64, 1},
679 {Intrinsic::bitreverse, MVT::v8i8, 1},
680 {Intrinsic::bitreverse, MVT::v16i8, 1},
681 {Intrinsic::bitreverse, MVT::v4i16, 2},
682 {Intrinsic::bitreverse, MVT::v8i16, 2},
683 {Intrinsic::bitreverse, MVT::v2i32, 2},
684 {Intrinsic::bitreverse, MVT::v4i32, 2},
685 {Intrinsic::bitreverse, MVT::v1i64, 2},
686 {Intrinsic::bitreverse, MVT::v2i64, 2},
696 return LegalisationCost.first * Entry->Cost + 1;
698 return LegalisationCost.first * Entry->Cost;
702 case Intrinsic::ctpop: {
703 if (!ST->hasNEON()) {
724 RetTy->getScalarSizeInBits()
727 return LT.first * Entry->Cost + ExtraCost;
731 case Intrinsic::sadd_with_overflow:
732 case Intrinsic::uadd_with_overflow:
733 case Intrinsic::ssub_with_overflow:
734 case Intrinsic::usub_with_overflow:
735 case Intrinsic::smul_with_overflow:
736 case Intrinsic::umul_with_overflow: {
738 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
739 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
740 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
741 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
742 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
743 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
744 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
745 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
746 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
747 {Intrinsic::usub_with_overflow, MVT::i8, 3},
748 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
749 {Intrinsic::usub_with_overflow, MVT::i16, 3},
750 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
751 {Intrinsic::usub_with_overflow, MVT::i32, 1},
752 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
753 {Intrinsic::usub_with_overflow, MVT::i64, 1},
754 {Intrinsic::smul_with_overflow, MVT::i8, 5},
755 {Intrinsic::umul_with_overflow, MVT::i8, 4},
756 {Intrinsic::smul_with_overflow, MVT::i16, 5},
757 {Intrinsic::umul_with_overflow, MVT::i16, 4},
758 {Intrinsic::smul_with_overflow, MVT::i32, 2},
759 {Intrinsic::umul_with_overflow, MVT::i32, 2},
760 {Intrinsic::smul_with_overflow, MVT::i64, 3},
761 {Intrinsic::umul_with_overflow, MVT::i64, 3},
770 case Intrinsic::fptosi_sat:
771 case Intrinsic::fptoui_sat: {
774 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
779 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
780 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
781 LT.second == MVT::v2f64)) {
783 (LT.second == MVT::f64 && MTy == MVT::i32) ||
784 (LT.second == MVT::f32 && MTy == MVT::i64)))
793 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
800 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
801 (LT.second == MVT::f16 && MTy == MVT::i64) ||
802 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
816 if ((LT.second.getScalarType() == MVT::f32 ||
817 LT.second.getScalarType() == MVT::f64 ||
818 LT.second.getScalarType() == MVT::f16) &&
822 if (LT.second.isVector())
826 LegalTy, {LegalTy, LegalTy});
829 LegalTy, {LegalTy, LegalTy});
831 return LT.first *
Cost +
832 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
839 if (LT.second.isVector()) {
851 Type *CondTy =
RetTy->getWithNewBitWidth(1);
857 return LT.first *
Cost;
859 case Intrinsic::fshl:
860 case Intrinsic::fshr: {
873 {Intrinsic::fshl, MVT::v4i32, 3},
874 {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
875 {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
876 {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
882 return LegalisationCost.first * Entry->Cost;
886 if (!
RetTy->isIntegerTy())
891 bool HigherCost = (
RetTy->getScalarSizeInBits() != 32 &&
892 RetTy->getScalarSizeInBits() < 64) ||
893 (
RetTy->getScalarSizeInBits() % 64 != 0);
894 unsigned ExtraCost = HigherCost ? 1 : 0;
895 if (
RetTy->getScalarSizeInBits() == 32 ||
896 RetTy->getScalarSizeInBits() == 64)
903 return TyL.first + ExtraCost;
905 case Intrinsic::get_active_lane_mask: {
910 if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
921 return RetTy->getNumElements() * 2;
926 case Intrinsic::experimental_vector_match: {
927 auto *NeedleTy = cast<FixedVectorType>(ICA.
getArgTypes()[1]);
929 unsigned SearchSize = NeedleTy->getNumElements();
930 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
937 if (isa<FixedVectorType>(
RetTy))
943 case Intrinsic::experimental_cttz_elts: {
945 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
964 auto RequiredType =
II.getType();
966 auto *PN = dyn_cast<PHINode>(
II.getArgOperand(0));
967 assert(PN &&
"Expected Phi Node!");
970 if (!PN->hasOneUse())
973 for (
Value *IncValPhi : PN->incoming_values()) {
974 auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
976 Reinterpret->getIntrinsicID() !=
977 Intrinsic::aarch64_sve_convert_to_svbool ||
978 RequiredType != Reinterpret->getArgOperand(0)->getType())
987 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
988 auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(
I));
989 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1006static std::optional<Instruction *>
1008 auto BinOp = dyn_cast<IntrinsicInst>(
II.getOperand(0));
1010 return std::nullopt;
1012 auto IntrinsicID = BinOp->getIntrinsicID();
1013 switch (IntrinsicID) {
1014 case Intrinsic::aarch64_sve_and_z:
1015 case Intrinsic::aarch64_sve_bic_z:
1016 case Intrinsic::aarch64_sve_eor_z:
1017 case Intrinsic::aarch64_sve_nand_z:
1018 case Intrinsic::aarch64_sve_nor_z:
1019 case Intrinsic::aarch64_sve_orn_z:
1020 case Intrinsic::aarch64_sve_orr_z:
1023 return std::nullopt;
1026 auto BinOpPred = BinOp->getOperand(0);
1027 auto BinOpOp1 = BinOp->getOperand(1);
1028 auto BinOpOp2 = BinOp->getOperand(2);
1030 auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
1032 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1033 return std::nullopt;
1035 auto PredOp = PredIntr->getOperand(0);
1036 auto PredOpTy = cast<VectorType>(PredOp->getType());
1037 if (PredOpTy !=
II.getType())
1038 return std::nullopt;
1042 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1043 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1044 if (BinOpOp1 == BinOpOp2)
1045 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1048 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1050 auto NarrowedBinOp =
1055static std::optional<Instruction *>
1058 if (isa<PHINode>(
II.getArgOperand(0)))
1062 return BinOpCombine;
1065 if (isa<TargetExtType>(
II.getArgOperand(0)->getType()) ||
1066 isa<TargetExtType>(
II.getType()))
1067 return std::nullopt;
1070 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1072 const auto *IVTy = cast<VectorType>(
II.getType());
1078 const auto *CursorVTy = cast<VectorType>(Cursor->
getType());
1079 if (CursorVTy->getElementCount().getKnownMinValue() <
1080 IVTy->getElementCount().getKnownMinValue())
1084 if (Cursor->
getType() == IVTy)
1085 EarliestReplacement = Cursor;
1087 auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
1090 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1091 Intrinsic::aarch64_sve_convert_to_svbool ||
1092 IntrinsicCursor->getIntrinsicID() ==
1093 Intrinsic::aarch64_sve_convert_from_svbool))
1096 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1097 Cursor = IntrinsicCursor->getOperand(0);
1102 if (!EarliestReplacement)
1103 return std::nullopt;
1110 Value *UncastedPred;
1111 if (
match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
1112 m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
1116 if (cast<ScalableVectorType>(Pred->
getType())->getMinNumElements() <=
1117 cast<ScalableVectorType>(UncastedPred->
getType())->getMinNumElements())
1118 Pred = UncastedPred;
1120 return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1121 m_ConstantInt<AArch64SVEPredPattern::all>()));
1126static std::optional<Instruction *>
1128 bool hasInactiveVector) {
1129 int PredOperand = hasInactiveVector ? 1 : 0;
1130 int ReplaceOperand = hasInactiveVector ? 0 : 1;
1135 return std::nullopt;
1140static std::optional<Instruction *>
1143 !isa<llvm::UndefValue>(
II.getOperand(0)) &&
1144 !isa<llvm::PoisonValue>(
II.getOperand(0))) {
1152static std::optional<Instruction *>
1158 return std::nullopt;
1163static std::optional<Instruction *>
1168 if (
RetTy->isStructTy()) {
1169 auto StructT = cast<StructType>(
RetTy);
1170 auto VecT = StructT->getElementType(0);
1172 for (
unsigned i = 0; i < StructT->getNumElements(); i++) {
1173 ZerVec.
push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0)
1174 : ConstantInt::get(VecT, 0));
1179 : ConstantInt::get(
II.getType(), 0);
1184 return std::nullopt;
1190 auto *OpPredicate =
II.getOperand(0);
1203 return std::nullopt;
1206 return std::nullopt;
1208 const auto PTruePattern =
1209 cast<ConstantInt>(Pg->
getOperand(0))->getZExtValue();
1210 if (PTruePattern != AArch64SVEPredPattern::vl1)
1211 return std::nullopt;
1216 II.getArgOperand(0),
II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1217 Insert->insertBefore(
II.getIterator());
1218 Insert->takeName(&
II);
1226 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1228 II.getArgOperand(0));
1242 auto *Pg = dyn_cast<IntrinsicInst>(
II.getArgOperand(0));
1243 if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1244 return std::nullopt;
1246 const auto PTruePattern =
1247 cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
1248 if (PTruePattern != AArch64SVEPredPattern::all)
1249 return std::nullopt;
1254 if (!SplatValue || !SplatValue->isZero())
1255 return std::nullopt;
1258 auto *DupQLane = dyn_cast<IntrinsicInst>(
II.getArgOperand(1));
1260 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1261 return std::nullopt;
1264 auto *DupQLaneIdx = dyn_cast<ConstantInt>(DupQLane->getArgOperand(1));
1265 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1266 return std::nullopt;
1268 auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
1269 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1270 return std::nullopt;
1274 if (!isa<UndefValue>(VecIns->getArgOperand(0)))
1275 return std::nullopt;
1277 if (!cast<ConstantInt>(VecIns->getArgOperand(2))->isZero())
1278 return std::nullopt;
1280 auto *ConstVec = dyn_cast<Constant>(VecIns->getArgOperand(1));
1282 return std::nullopt;
1284 auto *VecTy = dyn_cast<FixedVectorType>(ConstVec->getType());
1285 auto *OutTy = dyn_cast<ScalableVectorType>(
II.getType());
1286 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1287 return std::nullopt;
1289 unsigned NumElts = VecTy->getNumElements();
1290 unsigned PredicateBits = 0;
1293 for (
unsigned I = 0;
I < NumElts; ++
I) {
1294 auto *Arg = dyn_cast<ConstantInt>(ConstVec->getAggregateElement(
I));
1296 return std::nullopt;
1298 PredicateBits |= 1 << (
I * (16 / NumElts));
1302 if (PredicateBits == 0) {
1304 PFalse->takeName(&
II);
1310 for (
unsigned I = 0;
I < 16; ++
I)
1311 if ((PredicateBits & (1 <<
I)) != 0)
1314 unsigned PredSize = Mask & -Mask;
1319 for (
unsigned I = 0;
I < 16;
I += PredSize)
1320 if ((PredicateBits & (1 <<
I)) == 0)
1321 return std::nullopt;
1326 {PredType}, {PTruePat});
1328 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
1329 auto *ConvertFromSVBool =
1331 {
II.getType()}, {ConvertToSVBool});
1339 Value *Pg =
II.getArgOperand(0);
1340 Value *Vec =
II.getArgOperand(1);
1341 auto IntrinsicID =
II.getIntrinsicID();
1342 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
1353 auto *OldBinOp = cast<BinaryOperator>(Vec);
1354 auto OpC = OldBinOp->getOpcode();
1360 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
1365 auto *
C = dyn_cast<Constant>(Pg);
1366 if (IsAfter &&
C &&
C->isNullValue()) {
1370 Extract->insertBefore(
II.getIterator());
1371 Extract->takeName(&
II);
1375 auto *IntrPG = dyn_cast<IntrinsicInst>(Pg);
1377 return std::nullopt;
1379 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1380 return std::nullopt;
1382 const auto PTruePattern =
1383 cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
1388 return std::nullopt;
1390 unsigned Idx = MinNumElts - 1;
1399 auto *PgVTy = cast<ScalableVectorType>(Pg->
getType());
1400 if (
Idx >= PgVTy->getMinNumElements())
1401 return std::nullopt;
1406 Extract->insertBefore(
II.getIterator());
1407 Extract->takeName(&
II);
1420 Value *Pg =
II.getArgOperand(0);
1422 Value *Vec =
II.getArgOperand(2);
1426 return std::nullopt;
1431 return std::nullopt;
1445 FPTy, cast<VectorType>(Vec->
getType())->getElementCount());
1448 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
1461 {
II.getType()}, {AllPat});
1468static std::optional<Instruction *>
1470 const auto Pattern = cast<ConstantInt>(
II.getArgOperand(0))->getZExtValue();
1472 if (
Pattern == AArch64SVEPredPattern::all) {
1473 Constant *StepVal = ConstantInt::get(
II.getType(), NumElts);
1481 return MinNumElts && NumElts >= MinNumElts
1483 II, ConstantInt::get(
II.getType(), MinNumElts)))
1489 Value *PgVal =
II.getArgOperand(0);
1490 Value *OpVal =
II.getArgOperand(1);
1494 if (PgVal == OpVal &&
1495 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
1496 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
1497 Value *Ops[] = {PgVal, OpVal};
1511 return std::nullopt;
1515 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
1516 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
1530 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
1531 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
1532 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
1533 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
1534 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
1535 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
1536 (OpIID == Intrinsic::aarch64_sve_and_z) ||
1537 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
1538 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
1539 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
1540 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
1541 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
1542 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
1552 return std::nullopt;
1555template <Intrinsic::ID MulOpc,
typename Intrinsic::ID FuseOpc>
1556static std::optional<Instruction *>
1558 bool MergeIntoAddendOp) {
1560 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
1561 if (MergeIntoAddendOp) {
1562 AddendOp =
II.getOperand(1);
1563 Mul =
II.getOperand(2);
1565 AddendOp =
II.getOperand(2);
1566 Mul =
II.getOperand(1);
1571 return std::nullopt;
1573 if (!
Mul->hasOneUse())
1574 return std::nullopt;
1577 if (
II.getType()->isFPOrFPVectorTy()) {
1581 if (FAddFlags != cast<CallInst>(
Mul)->getFastMathFlags())
1582 return std::nullopt;
1584 return std::nullopt;
1589 if (MergeIntoAddendOp)
1599static std::optional<Instruction *>
1601 Value *Pred =
II.getOperand(0);
1602 Value *PtrOp =
II.getOperand(1);
1603 Type *VecTy =
II.getType();
1611 Load->copyMetadata(
II);
1622static std::optional<Instruction *>
1624 Value *VecOp =
II.getOperand(0);
1625 Value *Pred =
II.getOperand(1);
1626 Value *PtrOp =
II.getOperand(2);
1630 Store->copyMetadata(
II);
1641 switch (Intrinsic) {
1642 case Intrinsic::aarch64_sve_fmul_u:
1643 return Instruction::BinaryOps::FMul;
1644 case Intrinsic::aarch64_sve_fadd_u:
1645 return Instruction::BinaryOps::FAdd;
1646 case Intrinsic::aarch64_sve_fsub_u:
1647 return Instruction::BinaryOps::FSub;
1649 return Instruction::BinaryOpsEnd;
1653static std::optional<Instruction *>
1656 if (
II.isStrictFP())
1657 return std::nullopt;
1659 auto *OpPredicate =
II.getOperand(0);
1661 if (BinOpCode == Instruction::BinaryOpsEnd ||
1662 !
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1663 m_ConstantInt<AArch64SVEPredPattern::all>())))
1664 return std::nullopt;
1666 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
1674 auto *OpPredicate =
II.getOperand(0);
1675 if (!
match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1676 m_ConstantInt<AArch64SVEPredPattern::all>())))
1677 return std::nullopt;
1679 auto *
Mod =
II.getModule();
1681 II.setCalledFunction(NewDecl);
1688static std::optional<Instruction *>
1705 Intrinsic::aarch64_sve_mla>(
1709 Intrinsic::aarch64_sve_mad>(
1712 return std::nullopt;
1715static std::optional<Instruction *>
1722 Intrinsic::aarch64_sve_fmla>(IC,
II,
1727 Intrinsic::aarch64_sve_fmad>(IC,
II,
1732 Intrinsic::aarch64_sve_fmla>(IC,
II,
1735 return std::nullopt;
1738static std::optional<Instruction *>
1742 Intrinsic::aarch64_sve_fmla>(IC,
II,
1747 Intrinsic::aarch64_sve_fmad>(IC,
II,
1752 Intrinsic::aarch64_sve_fmla_u>(
1758static std::optional<Instruction *>
1765 Intrinsic::aarch64_sve_fmls>(IC,
II,
1770 Intrinsic::aarch64_sve_fnmsb>(
1775 Intrinsic::aarch64_sve_fmls>(IC,
II,
1778 return std::nullopt;
1781static std::optional<Instruction *>
1785 Intrinsic::aarch64_sve_fmls>(IC,
II,
1790 Intrinsic::aarch64_sve_fnmsb>(
1795 Intrinsic::aarch64_sve_fmls_u>(
1807 Intrinsic::aarch64_sve_mls>(
1810 return std::nullopt;
1816 auto *OpPredicate =
II.getOperand(0);
1817 auto *OpMultiplicand =
II.getOperand(1);
1818 auto *OpMultiplier =
II.getOperand(2);
1821 auto IsUnitSplat = [](
auto *
I) {
1830 auto IsUnitDup = [](
auto *
I) {
1831 auto *IntrI = dyn_cast<IntrinsicInst>(
I);
1832 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
1835 auto *SplatValue = IntrI->getOperand(2);
1839 if (IsUnitSplat(OpMultiplier)) {
1841 OpMultiplicand->takeName(&
II);
1843 }
else if (IsUnitDup(OpMultiplier)) {
1845 auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
1846 auto *DupPg = DupInst->getOperand(1);
1849 if (OpPredicate == DupPg) {
1850 OpMultiplicand->takeName(&
II);
1860 Value *UnpackArg =
II.getArgOperand(0);
1861 auto *
RetTy = cast<ScalableVectorType>(
II.getType());
1862 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
1863 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
1876 return std::nullopt;
1880 auto *OpVal =
II.getOperand(0);
1881 auto *OpIndices =
II.getOperand(1);
1886 auto *SplatValue = dyn_cast_or_null<ConstantInt>(
getSplatValue(OpIndices));
1888 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
1889 return std::nullopt;
1905 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
1906 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
1910 if ((
match(
II.getArgOperand(0),
1911 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
A)))) &&
1913 m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(
m_Value(
B))))) ||
1916 auto *TyA = cast<ScalableVectorType>(
A->getType());
1917 if (TyA ==
B->getType() &&
1928 return std::nullopt;
1936 if (
match(
II.getArgOperand(0),
1938 match(
II.getArgOperand(1), m_Intrinsic<Intrinsic::aarch64_sve_uzp2>(
1941 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
1943 return std::nullopt;
1946static std::optional<Instruction *>
1948 Value *Mask =
II.getOperand(0);
1949 Value *BasePtr =
II.getOperand(1);
1950 Value *Index =
II.getOperand(2);
1962 if (
match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1965 BasePtr->getPointerAlignment(
II.getDataLayout());
1968 BasePtr, IndexBase);
1975 return std::nullopt;
1978static std::optional<Instruction *>
1980 Value *Val =
II.getOperand(0);
1981 Value *Mask =
II.getOperand(1);
1982 Value *BasePtr =
II.getOperand(2);
1983 Value *Index =
II.getOperand(3);
1990 if (
match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
1993 BasePtr->getPointerAlignment(
II.getDataLayout());
1996 BasePtr, IndexBase);
2002 return std::nullopt;
2008 Value *Pred =
II.getOperand(0);
2009 Value *Vec =
II.getOperand(1);
2010 Value *DivVec =
II.getOperand(2);
2013 ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
2014 if (!SplatConstantInt)
2015 return std::nullopt;
2019 if (DivisorValue == -1)
2020 return std::nullopt;
2021 if (DivisorValue == 1)
2027 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2034 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2036 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2040 return std::nullopt;
2044 size_t VecSize = Vec.
size();
2049 size_t HalfVecSize = VecSize / 2;
2053 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2061 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2076 m_Intrinsic<Intrinsic::vector_insert>(
2078 !isa<FixedVectorType>(CurrentInsertElt->
getType()))
2079 return std::nullopt;
2080 auto IIScalableTy = cast<ScalableVectorType>(
II.getType());
2084 while (
auto InsertElt = dyn_cast<InsertElementInst>(CurrentInsertElt)) {
2085 auto Idx = cast<ConstantInt>(InsertElt->getOperand(2));
2086 Elts[
Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2087 CurrentInsertElt = InsertElt->getOperand(0);
2091 isa<PoisonValue>(CurrentInsertElt) && isa<PoisonValue>(
Default);
2093 return std::nullopt;
2097 for (
size_t I = 0;
I < Elts.
size();
I++) {
2098 if (Elts[
I] ==
nullptr)
2103 if (InsertEltChain ==
nullptr)
2104 return std::nullopt;
2110 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2111 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2112 IIScalableTy->getMinNumElements() /
2117 auto *WideShuffleMaskTy =
2128 auto NarrowBitcast =
2141 return std::nullopt;
2146 Value *Pred =
II.getOperand(0);
2147 Value *Vec =
II.getOperand(1);
2148 Value *Shift =
II.getOperand(2);
2151 Value *AbsPred, *MergedValue;
2152 if (!
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
2154 !
match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
2157 return std::nullopt;
2165 return std::nullopt;
2170 return std::nullopt;
2173 {
II.getType()}, {Pred, Vec, Shift});
2180 Value *Vec =
II.getOperand(0);
2185 return std::nullopt;
2191 auto *NI =
II.getNextNonDebugInstruction();
2194 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2196 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2197 auto *NIBB = NI->getParent();
2198 NI = NI->getNextNonDebugInstruction();
2200 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2201 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2206 auto *NextII = dyn_cast_or_null<IntrinsicInst>(NI);
2207 if (NextII &&
II.isIdenticalTo(NextII))
2210 return std::nullopt;
2213std::optional<Instruction *>
2220 case Intrinsic::aarch64_dmb:
2222 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
2223 case Intrinsic::aarch64_sve_fcvt_f16f32:
2224 case Intrinsic::aarch64_sve_fcvt_f16f64:
2225 case Intrinsic::aarch64_sve_fcvt_f32f16:
2226 case Intrinsic::aarch64_sve_fcvt_f32f64:
2227 case Intrinsic::aarch64_sve_fcvt_f64f16:
2228 case Intrinsic::aarch64_sve_fcvt_f64f32:
2229 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
2230 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
2231 case Intrinsic::aarch64_sve_fcvtx_f32f64:
2232 case Intrinsic::aarch64_sve_fcvtzs:
2233 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
2234 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
2235 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
2236 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
2237 case Intrinsic::aarch64_sve_fcvtzu:
2238 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
2239 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
2240 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
2241 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
2242 case Intrinsic::aarch64_sve_scvtf:
2243 case Intrinsic::aarch64_sve_scvtf_f16i32:
2244 case Intrinsic::aarch64_sve_scvtf_f16i64:
2245 case Intrinsic::aarch64_sve_scvtf_f32i64:
2246 case Intrinsic::aarch64_sve_scvtf_f64i32:
2247 case Intrinsic::aarch64_sve_ucvtf:
2248 case Intrinsic::aarch64_sve_ucvtf_f16i32:
2249 case Intrinsic::aarch64_sve_ucvtf_f16i64:
2250 case Intrinsic::aarch64_sve_ucvtf_f32i64:
2251 case Intrinsic::aarch64_sve_ucvtf_f64i32:
2253 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
2254 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
2255 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
2256 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
2258 case Intrinsic::aarch64_sve_st1_scatter:
2259 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
2260 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
2261 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
2262 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
2263 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
2264 case Intrinsic::aarch64_sve_st1dq:
2265 case Intrinsic::aarch64_sve_st1q_scatter_index:
2266 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
2267 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
2268 case Intrinsic::aarch64_sve_st1wq:
2269 case Intrinsic::aarch64_sve_stnt1:
2270 case Intrinsic::aarch64_sve_stnt1_scatter:
2271 case Intrinsic::aarch64_sve_stnt1_scatter_index:
2272 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
2273 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
2275 case Intrinsic::aarch64_sve_st2:
2276 case Intrinsic::aarch64_sve_st2q:
2278 case Intrinsic::aarch64_sve_st3:
2279 case Intrinsic::aarch64_sve_st3q:
2281 case Intrinsic::aarch64_sve_st4:
2282 case Intrinsic::aarch64_sve_st4q:
2284 case Intrinsic::aarch64_sve_addqv:
2285 case Intrinsic::aarch64_sve_and_z:
2286 case Intrinsic::aarch64_sve_bic_z:
2287 case Intrinsic::aarch64_sve_brka_z:
2288 case Intrinsic::aarch64_sve_brkb_z:
2289 case Intrinsic::aarch64_sve_brkn_z:
2290 case Intrinsic::aarch64_sve_brkpa_z:
2291 case Intrinsic::aarch64_sve_brkpb_z:
2292 case Intrinsic::aarch64_sve_cntp:
2293 case Intrinsic::aarch64_sve_compact:
2294 case Intrinsic::aarch64_sve_eor_z:
2295 case Intrinsic::aarch64_sve_eorv:
2296 case Intrinsic::aarch64_sve_eorqv:
2297 case Intrinsic::aarch64_sve_nand_z:
2298 case Intrinsic::aarch64_sve_nor_z:
2299 case Intrinsic::aarch64_sve_orn_z:
2300 case Intrinsic::aarch64_sve_orr_z:
2301 case Intrinsic::aarch64_sve_orv:
2302 case Intrinsic::aarch64_sve_orqv:
2303 case Intrinsic::aarch64_sve_pnext:
2304 case Intrinsic::aarch64_sve_rdffr_z:
2305 case Intrinsic::aarch64_sve_saddv:
2306 case Intrinsic::aarch64_sve_uaddv:
2307 case Intrinsic::aarch64_sve_umaxv:
2308 case Intrinsic::aarch64_sve_umaxqv:
2309 case Intrinsic::aarch64_sve_cmpeq:
2310 case Intrinsic::aarch64_sve_cmpeq_wide:
2311 case Intrinsic::aarch64_sve_cmpge:
2312 case Intrinsic::aarch64_sve_cmpge_wide:
2313 case Intrinsic::aarch64_sve_cmpgt:
2314 case Intrinsic::aarch64_sve_cmpgt_wide:
2315 case Intrinsic::aarch64_sve_cmphi:
2316 case Intrinsic::aarch64_sve_cmphi_wide:
2317 case Intrinsic::aarch64_sve_cmphs:
2318 case Intrinsic::aarch64_sve_cmphs_wide:
2319 case Intrinsic::aarch64_sve_cmple_wide:
2320 case Intrinsic::aarch64_sve_cmplo_wide:
2321 case Intrinsic::aarch64_sve_cmpls_wide:
2322 case Intrinsic::aarch64_sve_cmplt_wide:
2323 case Intrinsic::aarch64_sve_facge:
2324 case Intrinsic::aarch64_sve_facgt:
2325 case Intrinsic::aarch64_sve_fcmpeq:
2326 case Intrinsic::aarch64_sve_fcmpge:
2327 case Intrinsic::aarch64_sve_fcmpgt:
2328 case Intrinsic::aarch64_sve_fcmpne:
2329 case Intrinsic::aarch64_sve_fcmpuo:
2330 case Intrinsic::aarch64_sve_ld1_gather:
2331 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
2332 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
2333 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
2334 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
2335 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
2336 case Intrinsic::aarch64_sve_ld1q_gather_index:
2337 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
2338 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
2339 case Intrinsic::aarch64_sve_ld1ro:
2340 case Intrinsic::aarch64_sve_ld1rq:
2341 case Intrinsic::aarch64_sve_ld1udq:
2342 case Intrinsic::aarch64_sve_ld1uwq:
2343 case Intrinsic::aarch64_sve_ld2_sret:
2344 case Intrinsic::aarch64_sve_ld2q_sret:
2345 case Intrinsic::aarch64_sve_ld3_sret:
2346 case Intrinsic::aarch64_sve_ld3q_sret:
2347 case Intrinsic::aarch64_sve_ld4_sret:
2348 case Intrinsic::aarch64_sve_ld4q_sret:
2349 case Intrinsic::aarch64_sve_ldff1:
2350 case Intrinsic::aarch64_sve_ldff1_gather:
2351 case Intrinsic::aarch64_sve_ldff1_gather_index:
2352 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
2353 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
2354 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
2355 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
2356 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
2357 case Intrinsic::aarch64_sve_ldnf1:
2358 case Intrinsic::aarch64_sve_ldnt1:
2359 case Intrinsic::aarch64_sve_ldnt1_gather:
2360 case Intrinsic::aarch64_sve_ldnt1_gather_index:
2361 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
2362 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
2364 case Intrinsic::aarch64_sve_prf:
2365 case Intrinsic::aarch64_sve_prfb_gather_index:
2366 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
2367 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
2368 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
2369 case Intrinsic::aarch64_sve_prfd_gather_index:
2370 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
2371 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
2372 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
2373 case Intrinsic::aarch64_sve_prfh_gather_index:
2374 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
2375 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
2376 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
2377 case Intrinsic::aarch64_sve_prfw_gather_index:
2378 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
2379 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
2380 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
2382 case Intrinsic::aarch64_neon_fmaxnm:
2383 case Intrinsic::aarch64_neon_fminnm:
2385 case Intrinsic::aarch64_sve_convert_from_svbool:
2387 case Intrinsic::aarch64_sve_dup:
2389 case Intrinsic::aarch64_sve_dup_x:
2391 case Intrinsic::aarch64_sve_cmpne:
2392 case Intrinsic::aarch64_sve_cmpne_wide:
2394 case Intrinsic::aarch64_sve_rdffr:
2396 case Intrinsic::aarch64_sve_lasta:
2397 case Intrinsic::aarch64_sve_lastb:
2399 case Intrinsic::aarch64_sve_clasta_n:
2400 case Intrinsic::aarch64_sve_clastb_n:
2402 case Intrinsic::aarch64_sve_cntd:
2404 case Intrinsic::aarch64_sve_cntw:
2406 case Intrinsic::aarch64_sve_cnth:
2408 case Intrinsic::aarch64_sve_cntb:
2410 case Intrinsic::aarch64_sve_ptest_any:
2411 case Intrinsic::aarch64_sve_ptest_first:
2412 case Intrinsic::aarch64_sve_ptest_last:
2414 case Intrinsic::aarch64_sve_fabd:
2416 case Intrinsic::aarch64_sve_fadd:
2418 case Intrinsic::aarch64_sve_fadd_u:
2420 case Intrinsic::aarch64_sve_fdiv:
2422 case Intrinsic::aarch64_sve_fmax:
2424 case Intrinsic::aarch64_sve_fmaxnm:
2426 case Intrinsic::aarch64_sve_fmin:
2428 case Intrinsic::aarch64_sve_fminnm:
2430 case Intrinsic::aarch64_sve_fmla:
2432 case Intrinsic::aarch64_sve_fmls:
2434 case Intrinsic::aarch64_sve_fmul:
2439 case Intrinsic::aarch64_sve_fmul_u:
2441 case Intrinsic::aarch64_sve_fmulx:
2443 case Intrinsic::aarch64_sve_fnmla:
2445 case Intrinsic::aarch64_sve_fnmls:
2447 case Intrinsic::aarch64_sve_fsub:
2449 case Intrinsic::aarch64_sve_fsub_u:
2451 case Intrinsic::aarch64_sve_add:
2453 case Intrinsic::aarch64_sve_add_u:
2455 Intrinsic::aarch64_sve_mla_u>(
2457 case Intrinsic::aarch64_sve_mla:
2459 case Intrinsic::aarch64_sve_mls:
2461 case Intrinsic::aarch64_sve_mul:
2466 case Intrinsic::aarch64_sve_mul_u:
2468 case Intrinsic::aarch64_sve_sabd:
2470 case Intrinsic::aarch64_sve_smax:
2472 case Intrinsic::aarch64_sve_smin:
2474 case Intrinsic::aarch64_sve_smulh:
2476 case Intrinsic::aarch64_sve_sub:
2478 case Intrinsic::aarch64_sve_sub_u:
2480 Intrinsic::aarch64_sve_mls_u>(
2482 case Intrinsic::aarch64_sve_uabd:
2484 case Intrinsic::aarch64_sve_umax:
2486 case Intrinsic::aarch64_sve_umin:
2488 case Intrinsic::aarch64_sve_umulh:
2490 case Intrinsic::aarch64_sve_asr:
2492 case Intrinsic::aarch64_sve_lsl:
2494 case Intrinsic::aarch64_sve_lsr:
2496 case Intrinsic::aarch64_sve_and:
2498 case Intrinsic::aarch64_sve_bic:
2500 case Intrinsic::aarch64_sve_eor:
2502 case Intrinsic::aarch64_sve_orr:
2504 case Intrinsic::aarch64_sve_sqsub:
2506 case Intrinsic::aarch64_sve_uqsub:
2508 case Intrinsic::aarch64_sve_tbl:
2510 case Intrinsic::aarch64_sve_uunpkhi:
2511 case Intrinsic::aarch64_sve_uunpklo:
2512 case Intrinsic::aarch64_sve_sunpkhi:
2513 case Intrinsic::aarch64_sve_sunpklo:
2515 case Intrinsic::aarch64_sve_uzp1:
2517 case Intrinsic::aarch64_sve_zip1:
2518 case Intrinsic::aarch64_sve_zip2:
2520 case Intrinsic::aarch64_sve_ld1_gather_index:
2522 case Intrinsic::aarch64_sve_st1_scatter_index:
2524 case Intrinsic::aarch64_sve_ld1:
2526 case Intrinsic::aarch64_sve_st1:
2528 case Intrinsic::aarch64_sve_sdiv:
2530 case Intrinsic::aarch64_sve_sel:
2532 case Intrinsic::aarch64_sve_srshl:
2534 case Intrinsic::aarch64_sve_dupq_lane:
2536 case Intrinsic::aarch64_sve_insr:
2540 return std::nullopt;
2547 SimplifyAndSetOp)
const {
2548 switch (
II.getIntrinsicID()) {
2551 case Intrinsic::aarch64_neon_fcvtxn:
2552 case Intrinsic::aarch64_neon_rshrn:
2553 case Intrinsic::aarch64_neon_sqrshrn:
2554 case Intrinsic::aarch64_neon_sqrshrun:
2555 case Intrinsic::aarch64_neon_sqshrn:
2556 case Intrinsic::aarch64_neon_sqshrun:
2557 case Intrinsic::aarch64_neon_sqxtn:
2558 case Intrinsic::aarch64_neon_sqxtun:
2559 case Intrinsic::aarch64_neon_uqrshrn:
2560 case Intrinsic::aarch64_neon_uqshrn:
2561 case Intrinsic::aarch64_neon_uqxtn:
2562 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
2566 return std::nullopt;
2598bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
2600 Type *SrcOverrideTy) {
2605 cast<VectorType>(DstTy)->getElementCount());
2615 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
2625 Type *SrcTy = SrcOverrideTy;
2627 case Instruction::Add:
2628 case Instruction::Sub:
2630 if (isa<SExtInst>(Args[1]) || isa<ZExtInst>(Args[1])) {
2637 case Instruction::Mul: {
2639 if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
2640 (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
2644 }
else if (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1])) {
2673 assert(SrcTy &&
"Expected some SrcTy");
2675 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
2681 DstTyL.first * DstTyL.second.getVectorMinNumElements();
2683 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
2687 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
2700 (Src->isScalableTy() && !ST->hasSVE2()))
2709 dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2710 if (AddUser && AddUser->getOpcode() == Instruction::Add)
2713 auto *Shr = dyn_cast_or_null<Instruction>(
Add->getUniqueUndroppableUser());
2714 if (!Shr || Shr->getOpcode() != Instruction::LShr)
2717 auto *Trunc = dyn_cast_or_null<Instruction>(Shr->getUniqueUndroppableUser());
2718 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
2719 Src->getScalarSizeInBits() !=
2720 cast<CastInst>(Trunc)->getDestTy()->getScalarSizeInBits())
2744 assert(ISD &&
"Invalid opcode");
2747 if (
I &&
I->hasOneUser()) {
2748 auto *SingleUser = cast<Instruction>(*
I->user_begin());
2750 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands, Src)) {
2754 if (SingleUser->getOpcode() == Instruction::Add) {
2755 if (
I == SingleUser->getOperand(1) ||
2756 (isa<CastInst>(SingleUser->getOperand(1)) &&
2757 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
2764 if ((isa<ZExtInst>(
I) || isa<SExtInst>(
I)) &&
2772 return Cost == 0 ? 0 : 1;
2796 return AdjustCost(Entry->Cost);
3130 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3134 std::pair<InstructionCost, MVT> LT =
3136 unsigned NumElements =
3148 return AdjustCost(Entry->Cost);
3175 if (ST->hasFullFP16())
3178 return AdjustCost(Entry->Cost);
3194 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3197 return Part1 + Part2;
3217 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3225 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
3231 CostKind, Index,
nullptr,
nullptr);
3241 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
3247 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3257 case Instruction::SExt:
3262 case Instruction::ZExt:
3263 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3276 return Opcode == Instruction::PHI ? 0 : 1;
3283 unsigned Opcode,
Type *Val,
unsigned Index,
bool HasRealUse,
3285 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
3293 if (!LT.second.isVector())
3298 if (LT.second.isFixedLengthVector()) {
3299 unsigned Width = LT.second.getVectorNumElements();
3300 Index = Index % Width;
3316 if (
I && dyn_cast<LoadInst>(
I->getOperand(1)))
3346 auto ExtractCanFuseWithFmul = [&]() {
3353 auto IsAllowedScalarTy = [&](
const Type *
T) {
3354 return T->isFloatTy() ||
T->isDoubleTy() ||
3355 (
T->isHalfTy() && ST->hasFullFP16());
3359 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
3361 const auto *BO = dyn_cast<BinaryOperator>(EEUser);
3362 return BO && BO->getOpcode() == BinaryOperator::FMul &&
3363 !BO->getType()->isVectorTy();
3368 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
3372 return Idx == 0 || (RegWidth != 0 && (
Idx * EltSz) % RegWidth == 0);
3377 if (!isa<FixedVectorType>(Val) || !IsAllowedScalarTy(Val->
getScalarType()))
3382 for (
auto *U :
Scalar->users()) {
3383 if (!IsUserFMulScalarTy(U))
3387 UserToExtractIdx[
U];
3389 if (UserToExtractIdx.
empty())
3391 for (
auto &[S, U, L] : ScalarUserAndIdx) {
3392 for (
auto *U : S->users()) {
3393 if (UserToExtractIdx.
find(U) != UserToExtractIdx.
end()) {
3394 auto *
FMul = cast<BinaryOperator>(U);
3395 auto *Op0 =
FMul->getOperand(0);
3396 auto *Op1 =
FMul->getOperand(1);
3397 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
3398 UserToExtractIdx[
U] =
L;
3404 for (
auto &[U, L] : UserToExtractIdx) {
3410 const auto *EE = cast<ExtractElementInst>(
I);
3412 const auto *IdxOp = dyn_cast<ConstantInt>(EE->getIndexOperand());
3416 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
3417 if (!IsUserFMulScalarTy(U))
3422 const auto *BO = cast<BinaryOperator>(U);
3423 const auto *OtherEE = dyn_cast<ExtractElementInst>(
3424 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
3426 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
3429 return IsExtractLaneEquivalentToZero(
3430 cast<ConstantInt>(OtherEE->getIndexOperand())
3433 OtherEE->getType()->getScalarSizeInBits());
3441 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
3442 ExtractCanFuseWithFmul())
3446 return ST->getVectorInsertExtractBaseCost();
3451 unsigned Index,
Value *Op0,
3454 Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
3455 return getVectorInstrCostHelper(Opcode, Val, Index, HasRealUse);
3461 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
3462 return getVectorInstrCostHelper(Opcode, Val, Index,
false,
nullptr, Scalar,
3470 return getVectorInstrCostHelper(
I.getOpcode(), Val, Index,
3477 if (isa<ScalableVectorType>(Ty))
3482 return DemandedElts.
popcount() * (Insert + Extract) *
3496 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3503 Op2Info, Args, CxtI);
3545 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
3552 if (!VT.isVector() && VT.getSizeInBits() > 64)
3556 Opcode, Ty,
CostKind, Op1Info, Op2Info);
3561 if (isa<FixedVectorType>(Ty) && cast<FixedVectorType>(Ty)
3562 ->getPrimitiveSizeInBits()
3563 .getFixedValue() < 128) {
3574 if (
nullptr != Entry)
3579 if (LT.second.getScalarType() == MVT::i8)
3581 else if (LT.second.getScalarType() == MVT::i16)
3591 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
3594 return (4 + DivCost) * VTy->getNumElements();
3614 if (LT.second == MVT::v2i64 && ST->hasSVE())
3629 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))
3631 return cast<VectorType>(Ty)->getElementCount().getKnownMinValue() *
3652 (Ty->
isHalfTy() && ST->hasFullFP16())) &&
3665 return 2 * LT.first;
3674 return 2 * LT.first;
3696 int MaxMergeDistance = 64;
3700 return NumVectorInstToHideOverhead;
3714 Op1Info, Op2Info,
I);
3719 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SELECT) {
3721 const int AmortizationCost = 20;
3729 VecPred = CurrentPred;
3737 static const auto ValidMinMaxTys = {
3738 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
3739 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
3740 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
3743 if (
any_of(ValidMinMaxTys, [<](
MVT M) {
return M == LT.second; }) ||
3744 (ST->hasFullFP16() &&
3745 any_of(ValidFP16MinMaxTys, [<](
MVT M) {
return M == LT.second; })))
3750 VectorSelectTbl[] = {
3759 {
ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
3760 {
ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
3761 {
ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
3774 if (isa<FixedVectorType>(ValTy) && ISD ==
ISD::SETCC) {
3777 if (LT.second == MVT::v4f16 && !ST->hasFullFP16())
3778 return LT.first * 4;
3794 Op1Info, Op2Info,
I);
3800 if (ST->requiresStrictAlign()) {
3805 Options.AllowOverlappingLoads =
true;
3811 Options.LoadSizes = {8, 4, 2, 1};
3812 Options.AllowedTailExpansions = {3, 5, 6};
3817 return ST->hasSVE();
3828 if (!LT.first.isValid())
3832 auto *VT = cast<VectorType>(Src);
3833 if (VT->getElementType()->isIntegerTy(1))
3850 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
3851 "Should be called on only load or stores.");
3853 case Instruction::Load:
3856 return ST->getGatherOverhead();
3858 case Instruction::Store:
3861 return ST->getScatterOverhead();
3869 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
3874 auto *VT = cast<VectorType>(DataTy);
3876 if (!LT.first.isValid())
3880 if (!LT.second.isVector() ||
3882 VT->getElementType()->isIntegerTy(1))
3892 ElementCount LegalVF = LT.second.getVectorElementCount();
3895 {TTI::OK_AnyValue, TTI::OP_None},
I);
3913 if (VT == MVT::Other)
3918 if (!LT.first.isValid())
3926 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
3928 (VTy->getElementType()->isIntegerTy(1) &&
3929 !VTy->getElementCount().isKnownMultipleOf(
3940 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
3941 LT.second.is128BitVector() && (!Alignment || *Alignment <
Align(16))) {
3947 const int AmortizationCost = 6;
3949 return LT.first * 2 * AmortizationCost;
3960 if (VT == MVT::v4i8)
3963 return cast<FixedVectorType>(Ty)->getNumElements() * 2;
3967 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
3969 *Alignment !=
Align(1))
3983 while (!TypeWorklist.
empty()) {
4005 bool UseMaskForCond,
bool UseMaskForGaps) {
4006 assert(Factor >= 2 &&
"Invalid interleave factor");
4007 auto *VecVTy = cast<VectorType>(VecTy);
4014 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
4017 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
4018 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
4021 VecVTy->getElementCount().divideCoefficientBy(Factor));
4027 if (MinElts % Factor == 0 &&
4034 UseMaskForCond, UseMaskForGaps);
4041 for (
auto *
I : Tys) {
4042 if (!
I->isVectorTy())
4044 if (
I->getScalarSizeInBits() * cast<FixedVectorType>(
I)->getNumElements() ==
4063 enum { MaxStridedLoads = 7 };
4065 int StridedLoads = 0;
4068 for (
const auto BB : L->blocks()) {
4069 for (
auto &
I : *BB) {
4070 LoadInst *LMemI = dyn_cast<LoadInst>(&
I);
4075 if (L->isLoopInvariant(PtrValue))
4079 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
4080 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
4089 if (StridedLoads > MaxStridedLoads / 2)
4090 return StridedLoads;
4093 return StridedLoads;
4096 int StridedLoads = countStridedLoads(L, SE);
4098 <<
" strided loads\n");
4120 if (!L->isInnermost() || !L->getExitBlock() || L->getNumBlocks() > 8)
4124 if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
4132 for (
auto *BB : L->getBlocks()) {
4133 for (
auto &
I : *BB) {
4134 if (!isa<IntrinsicInst>(&
I) && isa<CallBase>(&
I))
4148 if (Header == L->getLoopLatch()) {
4154 for (
auto *BB : L->blocks()) {
4155 for (
auto &
I : *BB) {
4162 if (isa<LoadInst>(&
I))
4171 unsigned MaxInstsPerLine = 16;
4173 unsigned BestUC = 1;
4174 unsigned SizeWithBestUC = BestUC *
Size;
4176 unsigned SizeWithUC = UC *
Size;
4177 if (SizeWithUC > 48)
4179 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
4180 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
4182 SizeWithBestUC = BestUC *
Size;
4188 return LoadedValues.
contains(SI->getOperand(0));
4199 auto *Term = dyn_cast<BranchInst>(Header->getTerminator());
4200 auto *Latch = L->getLoopLatch();
4202 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
4209 if (isa<PHINode>(
I) || L->isLoopInvariant(
I) ||
Depth > 8)
4212 if (isa<LoadInst>(
I))
4216 auto *I = dyn_cast<Instruction>(V);
4217 return I && DependsOnLoopLoad(I, Depth + 1);
4224 DependsOnLoopLoad(
I, 0)) {
4240 if (L->getLoopDepth() > 1)
4248 case AArch64Subtarget::AppleA14:
4249 case AArch64Subtarget::AppleA15:
4250 case AArch64Subtarget::AppleA16:
4251 case AArch64Subtarget::AppleM4:
4254 case AArch64Subtarget::Falkor:
4265 for (
auto *BB : L->getBlocks()) {
4266 for (
auto &
I : *BB) {
4268 if (
I.getType()->isVectorTy())
4271 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
4286 !ST->getSchedModel().isOutOfOrder()) {
4303 Type *ExpectedType) {
4307 case Intrinsic::aarch64_neon_st2:
4308 case Intrinsic::aarch64_neon_st3:
4309 case Intrinsic::aarch64_neon_st4: {
4311 StructType *ST = dyn_cast<StructType>(ExpectedType);
4314 unsigned NumElts = Inst->
arg_size() - 1;
4315 if (ST->getNumElements() != NumElts)
4317 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
4323 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
4329 case Intrinsic::aarch64_neon_ld2:
4330 case Intrinsic::aarch64_neon_ld3:
4331 case Intrinsic::aarch64_neon_ld4:
4332 if (Inst->
getType() == ExpectedType)
4343 case Intrinsic::aarch64_neon_ld2:
4344 case Intrinsic::aarch64_neon_ld3:
4345 case Intrinsic::aarch64_neon_ld4:
4346 Info.ReadMem =
true;
4347 Info.WriteMem =
false;
4350 case Intrinsic::aarch64_neon_st2:
4351 case Intrinsic::aarch64_neon_st3:
4352 case Intrinsic::aarch64_neon_st4:
4353 Info.ReadMem =
false;
4354 Info.WriteMem =
true;
4362 case Intrinsic::aarch64_neon_ld2:
4363 case Intrinsic::aarch64_neon_st2:
4364 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
4366 case Intrinsic::aarch64_neon_ld3:
4367 case Intrinsic::aarch64_neon_st3:
4368 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
4370 case Intrinsic::aarch64_neon_ld4:
4371 case Intrinsic::aarch64_neon_st4:
4372 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
4384 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
4385 bool Considerable =
false;
4386 AllowPromotionWithoutCommonHeader =
false;
4387 if (!isa<SExtInst>(&
I))
4389 Type *ConsideredSExtType =
4391 if (
I.getType() != ConsideredSExtType)
4395 for (
const User *U :
I.users()) {
4397 Considerable =
true;
4401 if (GEPInst->getNumOperands() > 2) {
4402 AllowPromotionWithoutCommonHeader =
true;
4407 return Considerable;
4448 if (
auto *VTy = dyn_cast<ScalableVectorType>(Ty))
4454 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
4464 return LegalizationCost + 2;
4474 LegalizationCost *= LT.first - 1;
4478 assert(ISD &&
"Invalid opcode");
4486 return LegalizationCost + 2;
4494 std::optional<FastMathFlags> FMF,
4500 if (
auto *VTy = dyn_cast<ScalableVectorType>(ValTy))
4505 if (
auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
4510 return BaseCost + FixedVTy->getNumElements();
4513 if (Opcode != Instruction::FAdd)
4516 auto *VTy = cast<ScalableVectorType>(ValTy);
4523 if (isa<ScalableVectorType>(ValTy))
4527 MVT MTy = LT.second;
4529 assert(ISD &&
"Invalid opcode");
4575 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
4576 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
4588 return (LT.first - 1) +
Log2_32(NElts);
4593 return (LT.first - 1) + Entry->Cost;
4601 auto *ValVTy = cast<FixedVectorType>(ValTy);
4605 if (LT.first != 1) {
4611 ExtraCost *= LT.first - 1;
4614 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
4615 return Cost + ExtraCost;
4649 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
4664 if (LT.second.getScalarType() == MVT::i1) {
4673 assert(Entry &&
"Illegal Type for Splice");
4674 LegalizationCost += Entry->Cost;
4675 return LegalizationCost * LT.first;
4679 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
4682 std::optional<unsigned> BinOp)
const {
4686 if (Opcode != Instruction::Add)
4689 if (InputTypeA != InputTypeB)
4705 if (VFMinValue == Scale)
4709 (!ST->
isNeonAvailable() || !ST->hasDotProd() || AccumEVT == MVT::i64))
4712 if (InputEVT == MVT::i8) {
4713 switch (VFMinValue) {
4717 if (AccumEVT == MVT::i32)
4719 else if (AccumEVT != MVT::i64)
4723 if (AccumEVT == MVT::i64)
4725 else if (AccumEVT != MVT::i32)
4729 }
else if (InputEVT == MVT::i16) {
4732 if (VFMinValue != 8 || AccumEVT != MVT::i64)
4740 (OpAExtend != OpBExtend && !ST->hasMatMulInt8() &&
4744 if (!BinOp || *BinOp != Instruction::Mul)
4758 if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
4760 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
4766 if (Args.size() >= 1 && isa<LoadInst>(Args[0]) &&
4769 return std::max<InstructionCost>(1, LT.first / 4);
4782 unsigned TpNumElts = Mask.size();
4783 unsigned LTNumElts = LT.second.getVectorNumElements();
4784 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
4788 for (
unsigned N = 0;
N < NumVecs;
N++) {
4792 unsigned Source1, Source2;
4793 unsigned NumSources = 0;
4794 for (
unsigned E = 0; E < LTNumElts; E++) {
4795 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
4804 unsigned Source = MaskElt / LTNumElts;
4805 if (NumSources == 0) {
4808 }
else if (NumSources == 1 && Source != Source1) {
4811 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
4817 if (Source == Source1)
4819 else if (Source == Source2)
4820 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
4827 if (NumSources <= 2)
4830 NTp, NMask,
CostKind, 0,
nullptr, Args, CxtI);
4842 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
4843 if (LT.second.is128BitVector() &&
4844 cast<FixedVectorType>(SubTp)->getNumElements() ==
4845 LT.second.getVectorNumElements() / 2) {
4848 if (Index == (
int)LT.second.getVectorNumElements() / 2)
4862 bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
4863 if (IsLoad && LT.second.isVector() &&
4865 LT.second.getVectorElementCount()))
4873 all_of(Mask, [](
int E) {
return E < 8; }))
4877 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
4880 return M.value() < 0 || M.value() == (
int)M.index();
4887 if (LT.second.isFixedLengthVector() &&
4888 LT.second.getVectorNumElements() == Mask.size() &&
4890 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4891 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
4894 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
5017 return LT.first * Entry->Cost;
5026 LT.second.getSizeInBits() <= 128 && SubTp) {
5028 if (SubLT.second.isVector()) {
5029 int NumElts = LT.second.getVectorNumElements();
5030 int NumSubElts = SubLT.second.getVectorNumElements();
5031 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
5037 if (IsExtractSubvector)
5050 if (isa<LoadInst>(&
I) || isa<StoreInst>(&
I)) {
5066 return ST->useFixedOverScalableIfEqualCost();
5104 unsigned NumInsns = 0;
5106 NumInsns += BB->sizeWithoutDebug();
5116 int64_t Scale,
unsigned AddrSpace)
const {
5143 if (
I->getOpcode() == Instruction::Or &&
5144 isa<BranchInst>(
I->getNextNode()) &&
5145 cast<BranchInst>(
I->getNextNode())->isUnconditional())
5148 if (
I->getOpcode() == Instruction::Add ||
5149 I->getOpcode() == Instruction::Sub)
5172 if (
auto *Shuf = dyn_cast<ShuffleVectorInst>(V))
5173 return all_equal(Shuf->getShuffleMask());
5180 bool AllowSplat =
false) {
5185 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
5186 auto *FullTy = FullV->
getType();
5187 auto *HalfTy = HalfV->getType();
5189 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
5192 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
5193 auto *FullVT = cast<FixedVectorType>(FullV->
getType());
5194 auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
5195 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
5199 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
5213 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
5214 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
5221 int NumElements = cast<FixedVectorType>(Op1->
getType())->getNumElements() * 2;
5228 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
5229 (M2Start != 0 && M2Start != (NumElements / 2)))
5231 if (S1Op1 && S2Op1 && M1Start != M2Start)
5241 return Ext->getType()->getScalarSizeInBits() ==
5242 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
5247 !areExtDoubled(cast<Instruction>(Ext1)) ||
5248 !areExtDoubled(cast<Instruction>(Ext2)))
5256 Value *VectorOperand =
nullptr;
5261 isa<FixedVectorType>(VectorOperand->
getType()) &&
5262 cast<FixedVectorType>(VectorOperand->
getType())->getNumElements() == 2;
5272 auto *
GEP = dyn_cast<GetElementPtrInst>(Ptrs);
5273 if (!
GEP ||
GEP->getNumOperands() != 2)
5277 Value *Offsets =
GEP->getOperand(1);
5280 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
5284 if (isa<SExtInst>(Offsets) || isa<ZExtInst>(Offsets)) {
5285 auto *OffsetsInst = cast<Instruction>(Offsets);
5286 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
5287 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
5303 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
5308 Value *ZExtOp = cast<Instruction>(
Op)->getOperand(0);
5309 Ops.
push_back(&cast<Instruction>(ZExtOp)->getOperandUse(0));
5310 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
5322 switch (
II->getIntrinsicID()) {
5323 case Intrinsic::aarch64_neon_smull:
5324 case Intrinsic::aarch64_neon_umull:
5333 case Intrinsic::fma:
5334 case Intrinsic::fmuladd:
5335 if (isa<VectorType>(
I->getType()) &&
5336 cast<VectorType>(
I->getType())->getElementType()->isHalfTy() &&
5340 case Intrinsic::aarch64_neon_sqdmull:
5341 case Intrinsic::aarch64_neon_sqdmulh:
5342 case Intrinsic::aarch64_neon_sqrdmulh:
5348 return !Ops.
empty();
5349 case Intrinsic::aarch64_neon_fmlal:
5350 case Intrinsic::aarch64_neon_fmlal2:
5351 case Intrinsic::aarch64_neon_fmlsl:
5352 case Intrinsic::aarch64_neon_fmlsl2:
5358 return !Ops.
empty();
5359 case Intrinsic::aarch64_sve_ptest_first:
5360 case Intrinsic::aarch64_sve_ptest_last:
5361 if (
auto *IIOp = dyn_cast<IntrinsicInst>(
II->getOperand(0)))
5362 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
5364 return !Ops.
empty();
5365 case Intrinsic::aarch64_sme_write_horiz:
5366 case Intrinsic::aarch64_sme_write_vert:
5367 case Intrinsic::aarch64_sme_writeq_horiz:
5368 case Intrinsic::aarch64_sme_writeq_vert: {
5369 auto *
Idx = dyn_cast<Instruction>(
II->getOperand(1));
5370 if (!
Idx ||
Idx->getOpcode() != Instruction::Add)
5375 case Intrinsic::aarch64_sme_read_horiz:
5376 case Intrinsic::aarch64_sme_read_vert:
5377 case Intrinsic::aarch64_sme_readq_horiz:
5378 case Intrinsic::aarch64_sme_readq_vert:
5379 case Intrinsic::aarch64_sme_ld1b_vert:
5380 case Intrinsic::aarch64_sme_ld1h_vert:
5381 case Intrinsic::aarch64_sme_ld1w_vert:
5382 case Intrinsic::aarch64_sme_ld1d_vert:
5383 case Intrinsic::aarch64_sme_ld1q_vert:
5384 case Intrinsic::aarch64_sme_st1b_vert:
5385 case Intrinsic::aarch64_sme_st1h_vert:
5386 case Intrinsic::aarch64_sme_st1w_vert:
5387 case Intrinsic::aarch64_sme_st1d_vert:
5388 case Intrinsic::aarch64_sme_st1q_vert:
5389 case Intrinsic::aarch64_sme_ld1b_horiz:
5390 case Intrinsic::aarch64_sme_ld1h_horiz:
5391 case Intrinsic::aarch64_sme_ld1w_horiz:
5392 case Intrinsic::aarch64_sme_ld1d_horiz:
5393 case Intrinsic::aarch64_sme_ld1q_horiz:
5394 case Intrinsic::aarch64_sme_st1b_horiz:
5395 case Intrinsic::aarch64_sme_st1h_horiz:
5396 case Intrinsic::aarch64_sme_st1w_horiz:
5397 case Intrinsic::aarch64_sme_st1d_horiz:
5398 case Intrinsic::aarch64_sme_st1q_horiz: {
5399 auto *
Idx = dyn_cast<Instruction>(
II->getOperand(3));
5400 if (!
Idx ||
Idx->getOpcode() != Instruction::Add)
5405 case Intrinsic::aarch64_neon_pmull:
5411 case Intrinsic::aarch64_neon_pmull64:
5413 II->getArgOperand(1)))
5418 case Intrinsic::masked_gather:
5423 case Intrinsic::masked_scatter:
5433 auto ShouldSinkCondition = [](
Value *
Cond) ->
bool {
5434 auto *
II = dyn_cast<IntrinsicInst>(
Cond);
5435 return II &&
II->getIntrinsicID() == Intrinsic::vector_reduce_or &&
5436 isa<ScalableVectorType>(
II->getOperand(0)->getType());
5439 switch (
I->getOpcode()) {
5440 case Instruction::GetElementPtr:
5441 case Instruction::Add:
5442 case Instruction::Sub:
5444 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
5451 case Instruction::Select: {
5452 if (!ShouldSinkCondition(
I->getOperand(0)))
5458 case Instruction::Br: {
5459 if (cast<BranchInst>(
I)->isUnconditional())
5462 if (!ShouldSinkCondition(cast<BranchInst>(
I)->getCondition()))
5472 if (!
I->getType()->isVectorTy())
5475 switch (
I->getOpcode()) {
5476 case Instruction::Sub:
5477 case Instruction::Add: {
5483 auto Ext1 = cast<Instruction>(
I->getOperand(0));
5484 auto Ext2 = cast<Instruction>(
I->getOperand(1));
5495 case Instruction::Or: {
5498 if (ST->hasNEON()) {
5508 ? cast<Instruction>(
I->getOperand(1))
5509 : cast<Instruction>(
I->getOperand(0));
5512 if (
I->getParent() != MainAnd->
getParent() ||
5517 if (
I->getParent() != IA->getParent() ||
5518 I->getParent() != IB->getParent())
5533 case Instruction::Mul: {
5534 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
5535 auto *Ty = cast<VectorType>(V->getType());
5537 if (Ty->isScalableTy())
5541 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
5544 int NumZExts = 0, NumSExts = 0;
5545 for (
auto &
Op :
I->operands()) {
5547 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
5551 auto *Ext = cast<Instruction>(
Op);
5552 auto *ExtOp = Ext->getOperand(0);
5553 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
5557 if (isa<SExtInst>(Ext))
5588 Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
5593 dyn_cast<ConstantInt>(Insert->getOperand(2));
5595 if (!ElementConstant || !ElementConstant->
isZero())
5598 unsigned Opcode = OperandInstr->
getOpcode();
5599 if (Opcode == Instruction::SExt)
5601 else if (Opcode == Instruction::ZExt)
5606 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
5617 Ops.
push_back(&Insert->getOperandUse(1));
5623 if (!Ops.
empty() && (NumSExts == 2 || NumZExts == 2))
5627 if (!ShouldSinkSplatForIndexedVariant(
I))
5636 return !Ops.
empty();
5638 case Instruction::FMul: {
5640 if (
I->getType()->isScalableTy())
5643 if (cast<VectorType>(
I->getType())->getElementType()->isHalfTy() &&
5652 return !Ops.
empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file provides the interface for the instcombine pass implementation.
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getVectorInsertExtractBaseCost() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
unsigned getMaxInterleaveFactor() const
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
TailFoldingOpts getSVETailFoldingDefaultOpts() const
bool useSVEForFixedLengthVectors() const
unsigned getEpilogueVectorizationMinVF() const
unsigned getMinSVEVectorSizeInBits() const
bool isSVEAvailable() const
Returns true if the target has SVE and can use the full range of SVE instructions,...
InstructionCost getSpliceCost(VectorType *Tp, int Index)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool shouldTreatInstructionLikeSelect(const Instruction *I)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
bool prefersVectorizedAddressing() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
unsigned getEpilogueVectorizationMinVF() const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
uint64_t getFeatureMask(const Function &F) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isElementTypeLegalForScalableVector(Type *Ty) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
bool preferFixedOverScalableIfEqualCost() const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool enableScalableVectorization() const
bool useNeonVector(const Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isMultiversionedFunction(const Function &F) const
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
bool isLegalMaskedGatherScatter(Type *DataType) const
unsigned getMaxInterleaveFactor(ElementCount VF)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
EVT getPromotedVTForPredicate(EVT VT) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isIntPredicate() const
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool requiresSMChange(const SMEAttrs &Callee) const
void set(unsigned M, bool Enable=true)
bool hasStreamingBody() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
This instruction constructs a fixed permutation of two input vectors.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Class to represent struct types.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isFP128Ty() const
Return true if this is 'fp128'.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
uint64_t getFMVPriority(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
VScaleVal_match m_VScale()
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
constexpr int PoisonMaskElem
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Mod
The access may modify the value stored in memory.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FAnyOf
Any_of reduction with select(fcmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ IAnyOf
Any_of reduction with select(icmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.