74#include "llvm/IR/IntrinsicsAArch64.h"
109#define DEBUG_TYPE "aarch64-lower"
112STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
119 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
124 cl::desc(
"Enable AArch64 logical imm instruction "
134 cl::desc(
"Combine extends of AArch64 masked "
135 "gather intrinsics"),
139 cl::desc(
"Combine ext and trunc to TBL"),
154 cl::desc(
"Enable / disable SVE scalable vectors in Global ISel"),
161 cl::desc(
"Generate ISD::PTRADD nodes for pointer arithmetic in "
162 "SelectionDAG for FEAT_CPA"),
172 AArch64::X3, AArch64::X4, AArch64::X5,
173 AArch64::X6, AArch64::X7};
175 AArch64::Q3, AArch64::Q4, AArch64::Q5,
176 AArch64::Q6, AArch64::Q7};
201 return MVT::nxv8bf16;
208 switch (EC.getKnownMinValue()) {
224 "Expected scalable predicate vector type!");
246 "Expected legal vector type!");
257 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
258 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
259 case AArch64ISD::REVH_MERGE_PASSTHRU:
260 case AArch64ISD::REVW_MERGE_PASSTHRU:
261 case AArch64ISD::REVD_MERGE_PASSTHRU:
262 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
263 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
264 case AArch64ISD::DUP_MERGE_PASSTHRU:
265 case AArch64ISD::ABS_MERGE_PASSTHRU:
266 case AArch64ISD::NEG_MERGE_PASSTHRU:
267 case AArch64ISD::FNEG_MERGE_PASSTHRU:
268 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
269 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
270 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
271 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
272 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
273 case AArch64ISD::FRINT_MERGE_PASSTHRU:
274 case AArch64ISD::FROUND_MERGE_PASSTHRU:
275 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
276 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
277 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
278 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
279 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
280 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
281 case AArch64ISD::FCVTX_MERGE_PASSTHRU:
282 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
283 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
284 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
285 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
286 case AArch64ISD::FABS_MERGE_PASSTHRU:
293 switch (
Op.getOpcode()) {
298 case ISD::GET_ACTIVE_LANE_MASK:
299 case AArch64ISD::PTRUE:
300 case AArch64ISD::SETCC_MERGE_ZERO:
303 switch (
Op.getConstantOperandVal(0)) {
306 case Intrinsic::aarch64_sve_ptrue:
307 case Intrinsic::aarch64_sve_pnext:
308 case Intrinsic::aarch64_sve_cmpeq:
309 case Intrinsic::aarch64_sve_cmpne:
310 case Intrinsic::aarch64_sve_cmpge:
311 case Intrinsic::aarch64_sve_cmpgt:
312 case Intrinsic::aarch64_sve_cmphs:
313 case Intrinsic::aarch64_sve_cmphi:
314 case Intrinsic::aarch64_sve_cmpeq_wide:
315 case Intrinsic::aarch64_sve_cmpne_wide:
316 case Intrinsic::aarch64_sve_cmpge_wide:
317 case Intrinsic::aarch64_sve_cmpgt_wide:
318 case Intrinsic::aarch64_sve_cmplt_wide:
319 case Intrinsic::aarch64_sve_cmple_wide:
320 case Intrinsic::aarch64_sve_cmphs_wide:
321 case Intrinsic::aarch64_sve_cmphi_wide:
322 case Intrinsic::aarch64_sve_cmplo_wide:
323 case Intrinsic::aarch64_sve_cmpls_wide:
324 case Intrinsic::aarch64_sve_fcmpeq:
325 case Intrinsic::aarch64_sve_fcmpne:
326 case Intrinsic::aarch64_sve_fcmpge:
327 case Intrinsic::aarch64_sve_fcmpgt:
328 case Intrinsic::aarch64_sve_fcmpuo:
329 case Intrinsic::aarch64_sve_facgt:
330 case Intrinsic::aarch64_sve_facge:
331 case Intrinsic::aarch64_sve_whilege:
332 case Intrinsic::aarch64_sve_whilegt:
333 case Intrinsic::aarch64_sve_whilehi:
334 case Intrinsic::aarch64_sve_whilehs:
335 case Intrinsic::aarch64_sve_whilele:
336 case Intrinsic::aarch64_sve_whilelo:
337 case Intrinsic::aarch64_sve_whilels:
338 case Intrinsic::aarch64_sve_whilelt:
339 case Intrinsic::aarch64_sve_match:
340 case Intrinsic::aarch64_sve_nmatch:
341 case Intrinsic::aarch64_sve_whilege_x2:
342 case Intrinsic::aarch64_sve_whilegt_x2:
343 case Intrinsic::aarch64_sve_whilehi_x2:
344 case Intrinsic::aarch64_sve_whilehs_x2:
345 case Intrinsic::aarch64_sve_whilele_x2:
346 case Intrinsic::aarch64_sve_whilelo_x2:
347 case Intrinsic::aarch64_sve_whilels_x2:
348 case Intrinsic::aarch64_sve_whilelt_x2:
354static std::tuple<SDValue, SDValue>
375 if (!ConstDiscN || !
isUInt<16>(ConstDiscN->getZExtValue()))
381 AddrDisc = DAG->
getRegister(AArch64::NoRegister, MVT::i64);
383 return std::make_tuple(
402 if (Subtarget->hasLS64()) {
408 if (Subtarget->hasFPARMv8()) {
417 if (Subtarget->hasNEON()) {
421 addDRType(MVT::v2f32);
422 addDRType(MVT::v8i8);
423 addDRType(MVT::v4i16);
424 addDRType(MVT::v2i32);
425 addDRType(MVT::v1i64);
426 addDRType(MVT::v1f64);
427 addDRType(MVT::v4f16);
428 addDRType(MVT::v4bf16);
430 addQRType(MVT::v4f32);
431 addQRType(MVT::v2f64);
432 addQRType(MVT::v16i8);
433 addQRType(MVT::v8i16);
434 addQRType(MVT::v4i32);
435 addQRType(MVT::v2i64);
436 addQRType(MVT::v8f16);
437 addQRType(MVT::v8bf16);
440 if (Subtarget->isSVEorStreamingSVEAvailable()) {
468 if (Subtarget->useSVEForFixedLengthVectors()) {
508 if (Subtarget->hasFPARMv8()) {
599 if (Subtarget->hasFPARMv8()) {
605 if (Subtarget->hasFPARMv8()) {
659 if (Subtarget->hasCSSC()) {
738 if (Subtarget->hasFullFP16()) {
747 ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
748 ISD::FSINCOSPI, ISD::FMODF, ISD::FACOS,
749 ISD::FASIN, ISD::FATAN, ISD::FATAN2,
750 ISD::FCOSH, ISD::FSINH, ISD::FTANH,
751 ISD::FTAN, ISD::FEXP, ISD::FEXP2,
752 ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
770 if (Subtarget->hasFullFP16()) {
783 auto LegalizeNarrowFP = [
this](
MVT ScalarVT) {
828 for (
auto Op : {ISD::FNEG, ISD::FABS})
833 for (
auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
889 if (!Subtarget->hasFullFP16()) {
890 LegalizeNarrowFP(MVT::f16);
892 LegalizeNarrowFP(MVT::bf16);
899 {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
900 ISD::FRINT, ISD::FTRUNC, ISD::FROUND,
901 ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
902 ISD::FMINIMUM, ISD::FMAXIMUM, ISD::LROUND,
903 ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
904 ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE,
910 for (
MVT Ty : {MVT::f32, MVT::f64})
912 if (Subtarget->hasFullFP16())
920 for (
MVT Ty : {MVT::f32, MVT::f64})
922 if (Subtarget->hasFullFP16())
935 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
947 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
975 if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
1002 if (Subtarget->hasLSE128()) {
1016 if (Subtarget->hasLSE2()) {
1073 if (WideVT.getScalarSizeInBits() > NarrowVT.getScalarSizeInBits()) {
1079 if (Subtarget->hasFPARMv8()) {
1152 {ISD::MGATHER, ISD::MSCATTER, ISD::EXPERIMENTAL_VECTOR_HISTOGRAM});
1203 if (!Subtarget->isTargetWindows())
1219 if (Subtarget->hasSME())
1222 if (Subtarget->isNeonAvailable()) {
1230 ISD::FNEG, ISD::FABS, ISD::FCEIL,
1231 ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
1232 ISD::FSIN, ISD::FCOS, ISD::FTAN,
1233 ISD::FASIN, ISD::FACOS, ISD::FATAN,
1234 ISD::FSINH, ISD::FCOSH, ISD::FTANH,
1235 ISD::FPOW, ISD::FLOG, ISD::FLOG2,
1236 ISD::FLOG10, ISD::FEXP, ISD::FEXP2,
1237 ISD::FEXP10, ISD::FRINT, ISD::FROUND,
1238 ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM,
1239 ISD::FMAXNUM, ISD::FMINIMUM, ISD::FMAXIMUM,
1240 ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE,
1267 for (
auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1270 if (Subtarget->hasFullFP16()) {
1303 for (
auto VT : {MVT::v1i64, MVT::v2i64}) {
1319 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64,
1320 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1327 for (
MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1338 for (
MVT VT : { MVT::v4f16, MVT::v2f32,
1339 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1340 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1349 if (Subtarget->hasFullFP16())
1352 for (
MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1353 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1375 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1397 {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
1398 ISD::FROUND, ISD::FROUNDEVEN, ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE,
1402 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1404 if (Subtarget->hasFullFP16())
1405 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1410 for (
auto Op : {ISD::LRINT, ISD::LLRINT}) {
1411 for (
MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
1413 if (Subtarget->hasFullFP16())
1414 for (
MVT Ty : {MVT::v4f16, MVT::v8f16})
1449 for (
MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1452 for (
MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
1455 if (Subtarget->hasDotProd()) {
1456 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1457 ISD::PARTIAL_REDUCE_UMLA};
1464 if (Subtarget->hasMatMulInt8()) {
1480 if (VT.is128BitVector() || VT.is64BitVector()) {
1495 for (
MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
1501 if (Subtarget->hasSME()) {
1507 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1509 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1515 for (
auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1520 if (Subtarget->hasSVE2p1() ||
1521 (Subtarget->hasSME2() && Subtarget->isStreaming()))
1524 for (
auto VT : {MVT::v16i8, MVT::v8i8, MVT::v4i16, MVT::v2i32})
1527 for (
auto VT : {MVT::v8f16, MVT::v4f32, MVT::v2f64})
1531 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1532 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1600 if (!Subtarget->isLittleEndian())
1603 if (Subtarget->hasSVE2() ||
1604 (Subtarget->hasSME() && Subtarget->isStreaming()))
1610 for (
auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1616 for (
auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32})
1620 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
1621 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
1633 {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
1647 if (VT != MVT::nxv16i1) {
1657 {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1658 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1659 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1698 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1699 MVT::nxv4f32, MVT::nxv2f64}) {
1776 for (
auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1794 if (Subtarget->hasSVEB16B16() &&
1795 Subtarget->isNonStreamingSVEorSME2Available()) {
1797 for (
auto VT : {MVT::v4bf16, MVT::v8bf16, MVT::nxv2bf16, MVT::nxv4bf16,
1811 {ISD::FCEIL,
ISD::FDIV, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
1812 ISD::FROUND, ISD::FROUNDEVEN, ISD::FSQRT, ISD::FTRUNC,
ISD::SETCC,
1813 ISD::VECREDUCE_FADD, ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMAXIMUM,
1814 ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMINIMUM}) {
1820 if (!Subtarget->hasSVEB16B16() ||
1821 !Subtarget->isNonStreamingSVEorSME2Available()) {
1822 for (
MVT VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1832 if (VT != MVT::nxv2bf16 && Subtarget->hasBF16())
1838 if (Subtarget->hasBF16() && Subtarget->isNeonAvailable())
1847 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1848 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1861 if (Subtarget->useSVEForFixedLengthVectors()) {
1864 VT, !Subtarget->isNeonAvailable()))
1865 addTypeForFixedLengthSVE(VT);
1869 VT, !Subtarget->isNeonAvailable()))
1870 addTypeForFixedLengthSVE(VT);
1874 for (
auto VT : {MVT::v8i8, MVT::v4i16})
1879 for (
auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1881 for (
auto VT : {MVT::v8f16, MVT::v4f32, MVT::v8bf16})
1903 for (
auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1904 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1913 for (
auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1924 for (
auto VT : {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1})
1929 if (Subtarget->isSVEorStreamingSVEAvailable()) {
1932 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1933 ISD::PARTIAL_REDUCE_UMLA};
1939 if (Subtarget->hasMatMulInt8()) {
1941 MVT::nxv16i8,
Legal);
1947 if (Subtarget->hasSVE2() || Subtarget->hasSME()) {
1954 if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
1956 MVT::nxv8f16,
Legal);
1964 if (Subtarget->hasSVE2() ||
1965 (Subtarget->hasSME() && Subtarget->isStreaming())) {
1967 for (
auto VT : {MVT::v2i32, MVT::v4i16, MVT::v8i8, MVT::v16i8}) {
1971 for (
auto VT : {MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1, MVT::nxv16i1}) {
1978 if (Subtarget->isSVEAvailable()) {
1979 for (
auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64,
1980 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1981 MVT::nxv4f32, MVT::nxv2f64, MVT::nxv2bf16, MVT::nxv4bf16,
1982 MVT::nxv8bf16, MVT::v4f16, MVT::v8f16, MVT::v2f32,
1983 MVT::v4f32, MVT::v1f64, MVT::v2f64, MVT::v8i8,
1984 MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1985 MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1990 for (
auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1991 MVT::nxv4f32, MVT::nxv2f64, MVT::v4f16, MVT::v8f16,
1992 MVT::v2f32, MVT::v4f32, MVT::v2f64})
1996 for (
auto VT : {MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64,
1997 MVT::nxv2f32, MVT::nxv2f64, MVT::nxv4i8, MVT::nxv4i16,
1998 MVT::nxv4i32, MVT::nxv4f32}) {
2008 for (
auto VT : {MVT::v2i8, MVT::v2i16, MVT::v2i32, MVT::v2i64, MVT::v2f32,
2009 MVT::v2f64, MVT::v4i8, MVT::v4i16, MVT::v4i32, MVT::v4f32})
2013 if (Subtarget->hasSVE2()) {
2019 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
2020 ISD::PARTIAL_REDUCE_UMLA};
2031 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
2038 if (Subtarget->hasSVE()) {
2052 if (Subtarget->isTargetWindows()) {
2069void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
2079 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
2100 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
2101 ((VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v4f16 ||
2102 VT == MVT::v8f16) &&
2103 Subtarget->hasFullFP16()))
2126 if (VT != MVT::v8i8 && VT != MVT::v16i8)
2135 for (
unsigned Opcode :
2153 for (
unsigned Opcode :
2154 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM,
2184 if (Subtarget->isLittleEndian()) {
2195 if (Subtarget->hasD128()) {
2213 if (!Subtarget->isSVEorStreamingSVEAvailable() ||
2220 (OpVT != MVT::i32 && OpVT != MVT::i64))))
2232 if (!Subtarget->isSVEorStreamingSVEAvailable())
2237 return VT != MVT::nxv16i1 && VT != MVT::nxv8i1 && VT != MVT::nxv4i1 &&
2238 VT != MVT::nxv2i1 && VT != MVT::v16i1 && VT != MVT::v8i1 &&
2239 VT != MVT::v4i1 && VT != MVT::v2i1;
2243 unsigned SearchSize)
const {
2245 if (!Subtarget->hasSVE2() || !Subtarget->isSVEAvailable())
2248 if (VT == MVT::nxv8i16 || VT == MVT::v8i16)
2249 return SearchSize != 8;
2250 if (VT == MVT::nxv16i8 || VT == MVT::v16i8 || VT == MVT::v8i8)
2251 return SearchSize != 8 && SearchSize != 16;
2255void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
2280 while (InnerVT != VT) {
2294 while (InnerVT != VT) {
2303 bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
2305 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
2306 ISD::PARTIAL_REDUCE_UMLA};
2324 if (Subtarget->hasMatMulInt8()) {
2433void AArch64TargetLowering::addDRType(
MVT VT) {
2435 if (Subtarget->isNeonAvailable())
2439void AArch64TargetLowering::addQRType(
MVT VT) {
2441 if (Subtarget->isNeonAvailable())
2458 Imm =
C->getZExtValue();
2466 case AArch64ISD::SQDMULH:
2478 return N->getOpcode() ==
Opc &&
2483 const APInt &Demanded,
2486 uint64_t OldImm = Imm, NewImm, Enc;
2491 if (Imm == 0 || Imm == Mask ||
2495 unsigned EltSize =
Size;
2512 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
2514 uint64_t Sum = RotatedImm + NonDemandedBits;
2515 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
2516 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
2517 NewImm = (Imm | Ones) & Mask;
2545 while (EltSize <
Size) {
2546 NewImm |= NewImm << EltSize;
2552 "demanded bits should never be altered");
2553 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
2556 EVT VT =
Op.getValueType();
2562 if (NewImm == 0 || NewImm == OrigMask) {
2587 EVT VT =
Op.getValueType();
2601 switch (
Op.getOpcode()) {
2605 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
2608 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
2611 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
2626 switch (
Op.getOpcode()) {
2629 case AArch64ISD::DUP: {
2632 if (
SrcOp.getValueSizeInBits() !=
Op.getScalarValueSizeInBits()) {
2633 assert(
SrcOp.getValueSizeInBits() >
Op.getScalarValueSizeInBits() &&
2634 "Expected DUP implicit truncation");
2635 Known = Known.
trunc(
Op.getScalarValueSizeInBits());
2639 case AArch64ISD::CSEL: {
2646 case AArch64ISD::CSNEG:
2647 case AArch64ISD::CSINC:
2648 case AArch64ISD::CSINV: {
2656 if (
Op.getOpcode() == AArch64ISD::CSINC)
2660 else if (
Op.getOpcode() == AArch64ISD::CSINV)
2662 else if (
Op.getOpcode() == AArch64ISD::CSNEG)
2665 Op.getScalarValueSizeInBits())));
2670 case AArch64ISD::BICi: {
2673 ~(
Op->getConstantOperandAPInt(1) <<
Op->getConstantOperandAPInt(2))
2679 case AArch64ISD::VLSHR: {
2686 case AArch64ISD::VASHR: {
2693 case AArch64ISD::VSHL: {
2700 case AArch64ISD::MOVI: {
2705 case AArch64ISD::MOVIshift: {
2708 <<
Op->getConstantOperandVal(1)));
2711 case AArch64ISD::MOVImsl: {
2714 Known.
getBitWidth(), ~(~
Op->getConstantOperandVal(0) << ShiftAmt)));
2717 case AArch64ISD::MOVIedit: {
2723 case AArch64ISD::MVNIshift: {
2726 ~(
Op->getConstantOperandVal(0) <<
Op->getConstantOperandVal(1)),
2730 case AArch64ISD::MVNImsl: {
2737 case AArch64ISD::LOADgot:
2738 case AArch64ISD::ADDlow: {
2739 if (!Subtarget->isTargetILP32())
2745 case AArch64ISD::ASSERT_ZEXT_BOOL: {
2755 case Intrinsic::aarch64_ldaxr:
2756 case Intrinsic::aarch64_ldxr: {
2768 unsigned IntNo =
Op.getConstantOperandVal(0);
2772 case Intrinsic::aarch64_neon_uaddlv: {
2773 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2775 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2776 unsigned Bound = (VT == MVT::v8i8) ? 11 : 12;
2783 case Intrinsic::aarch64_neon_umaxv:
2784 case Intrinsic::aarch64_neon_uminv: {
2789 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
2791 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
2795 }
else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
2809 unsigned Depth)
const {
2810 EVT VT =
Op.getValueType();
2812 unsigned Opcode =
Op.getOpcode();
2814 case AArch64ISD::FCMEQ:
2815 case AArch64ISD::FCMGE:
2816 case AArch64ISD::FCMGT:
2819 case AArch64ISD::VASHR: {
2822 return std::min<uint64_t>(Tmp +
Op.getConstantOperandVal(1), VTBits);
2836 unsigned *
Fast)
const {
2846 if (ElementSizeBits % 8 == 0 && Alignment >=
Align(ElementSizeBits / 8))
2850 if (Subtarget->requiresStrictAlign())
2855 *
Fast = !Subtarget->isMisaligned128StoreSlow() || VT.
getStoreSize() != 16 ||
2874 unsigned *
Fast)
const {
2875 if (Subtarget->requiresStrictAlign())
2880 *
Fast = !Subtarget->isMisaligned128StoreSlow() ||
2881 Ty.getSizeInBytes() != 16 ||
2924 Register DestReg =
MI.getOperand(0).getReg();
2925 Register IfTrueReg =
MI.getOperand(1).getReg();
2926 Register IfFalseReg =
MI.getOperand(2).getReg();
2927 unsigned CondCode =
MI.getOperand(3).getImm();
2928 bool NZCVKilled =
MI.getOperand(4).isKill();
2942 MBB->addSuccessor(TrueBB);
2943 MBB->addSuccessor(EndBB);
2959 MI.eraseFromParent();
2967 "SEH does not use catchret!");
2978 Register TargetReg =
MI.getOperand(0).getReg();
2980 TII.probedStackAlloc(
MBBI, TargetReg,
false);
2982 MI.eraseFromParent();
2983 return NextInst->getParent();
2995 Register RegVL_GPR =
MRI.createVirtualRegister(RC_GPR);
2996 Register RegVL_GPRsp =
MRI.createVirtualRegister(RC_GPRsp);
2997 Register RegSVL_GPR =
MRI.createVirtualRegister(RC_GPR);
2998 Register RegSVL_GPRsp =
MRI.createVirtualRegister(RC_GPRsp);
3036 MBB->addSuccessor(TrapBB);
3037 MBB->addSuccessor(PassBB);
3039 MI.eraseFromParent();
3051 MIB.
add(
MI.getOperand(1));
3052 MIB.
add(
MI.getOperand(2));
3053 MIB.
add(
MI.getOperand(3));
3054 MIB.
add(
MI.getOperand(4));
3055 MIB.
add(
MI.getOperand(5));
3057 MI.eraseFromParent();
3068 MIB.
add(
MI.getOperand(0));
3069 MIB.
add(
MI.getOperand(1));
3070 MIB.
add(
MI.getOperand(2));
3071 MIB.
add(
MI.getOperand(1));
3073 MI.eraseFromParent();
3080 bool Op0IsDef)
const {
3086 for (
unsigned I = 1;
I <
MI.getNumOperands(); ++
I)
3087 MIB.
add(
MI.getOperand(
I));
3089 MI.eraseFromParent();
3099 unsigned StartIdx = 0;
3101 bool HasTile = BaseReg != AArch64::ZA;
3102 bool HasZPROut = HasTile &&
MI.getOperand(0).isReg();
3104 MIB.
add(
MI.getOperand(StartIdx));
3108 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm(),
3110 MIB.
addReg(BaseReg +
MI.getOperand(StartIdx).getImm());
3114 if (
MI.getOperand(0).isReg() && !
MI.getOperand(1).isImm()) {
3115 MIB.
add(
MI.getOperand(StartIdx));
3120 for (
unsigned I = StartIdx;
I <
MI.getNumOperands(); ++
I)
3121 MIB.
add(
MI.getOperand(
I));
3123 MI.eraseFromParent();
3132 MIB.
add(
MI.getOperand(0));
3134 unsigned Mask =
MI.getOperand(0).getImm();
3135 for (
unsigned I = 0;
I < 8;
I++) {
3136 if (Mask & (1 <<
I))
3140 MI.eraseFromParent();
3151 if (TPIDR2.Uses > 0) {
3154 if (!Subtarget->isLittleEndian())
3156 "TPIDR2 block initialization is not supported on big-endian targets");
3184 "Lazy ZA save is not yet supported on Windows");
3188 if (TPIDR2.
Uses > 0) {
3194 Register SP =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3195 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), SP)
3199 auto Size =
MI.getOperand(1).getReg();
3200 auto Dest =
MI.getOperand(0).getReg();
3201 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::MSUBXrrr), Dest)
3225 "Lazy ZA save is not yet supported on Windows");
3230 auto Size =
MI.getOperand(1).getReg();
3231 auto Dest =
MI.getOperand(0).getReg();
3232 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(AArch64::SUBXrx64), AArch64::SP)
3236 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::COPY), Dest)
3242 BuildMI(*BB,
MI,
MI.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF),
3243 MI.getOperand(0).getReg());
3257 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
3264 MI.getOperand(0).getReg())
3268 MI.getOperand(0).getReg())
3280 Register ResultReg =
MI.getOperand(0).getReg();
3283 }
else if (Subtarget->hasSME()) {
3285 .
addImm(AArch64SysReg::SVCR)
3288 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
3297 MI.eraseFromParent();
3305 while (
Reg.isVirtual()) {
3307 assert(
DefMI &&
"Virtual register definition not found");
3308 unsigned Opcode =
DefMI->getOpcode();
3310 if (Opcode == AArch64::COPY) {
3311 Reg =
DefMI->getOperand(1).getReg();
3313 if (
Reg.isPhysical())
3317 if (Opcode == AArch64::SUBREG_TO_REG) {
3318 Reg =
DefMI->getOperand(2).getReg();
3335 int64_t IntDisc = IntDiscOp.
getImm();
3336 assert(IntDisc == 0 &&
"Blend components are already expanded");
3341 case AArch64::MOVKXi:
3350 case AArch64::MOVi32imm:
3351 case AArch64::MOVi64imm:
3355 AddrDisc = AArch64::NoRegister;
3364 if (AddrDisc == AArch64::XZR)
3365 AddrDisc = AArch64::NoRegister;
3368 if (AddrDisc &&
MRI.getRegClass(AddrDisc) != AddrDiscRC) {
3369 Register TmpReg =
MRI.createVirtualRegister(AddrDiscRC);
3374 AddrDiscOp.
setReg(AddrDisc);
3375 IntDiscOp.
setImm(IntDisc);
3382 if (SMEOrigInstr != -1) {
3386 switch (SMEMatrixType) {
3402 switch (
MI.getOpcode()) {
3408 case AArch64::InitTPIDR2Obj:
3410 case AArch64::AllocateZABuffer:
3412 case AArch64::AllocateSMESaveBuffer:
3414 case AArch64::GetSMESaveSize:
3416 case AArch64::EntryPStateSM:
3418 case AArch64::F128CSEL:
3420 case TargetOpcode::STATEPOINT:
3426 MI.addOperand(*
MI.getMF(),
3432 case TargetOpcode::STACKMAP:
3433 case TargetOpcode::PATCHPOINT:
3436 case TargetOpcode::PATCHABLE_EVENT_CALL:
3437 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
3440 case AArch64::CATCHRET:
3443 case AArch64::PROBED_STACKALLOC_DYN:
3446 case AArch64::CHECK_MATCHING_VL_PSEUDO:
3449 case AArch64::LD1_MXIPXX_H_PSEUDO_B:
3450 return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0,
MI, BB);
3451 case AArch64::LD1_MXIPXX_H_PSEUDO_H:
3452 return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0,
MI, BB);
3453 case AArch64::LD1_MXIPXX_H_PSEUDO_S:
3454 return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0,
MI, BB);
3455 case AArch64::LD1_MXIPXX_H_PSEUDO_D:
3456 return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0,
MI, BB);
3457 case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
3458 return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0,
MI, BB);
3459 case AArch64::LD1_MXIPXX_V_PSEUDO_B:
3460 return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0,
MI, BB);
3461 case AArch64::LD1_MXIPXX_V_PSEUDO_H:
3462 return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0,
MI, BB);
3463 case AArch64::LD1_MXIPXX_V_PSEUDO_S:
3464 return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0,
MI, BB);
3465 case AArch64::LD1_MXIPXX_V_PSEUDO_D:
3466 return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0,
MI, BB);
3467 case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
3468 return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0,
MI, BB);
3469 case AArch64::LDR_ZA_PSEUDO:
3471 case AArch64::LDR_TX_PSEUDO:
3473 case AArch64::STR_TX_PSEUDO:
3475 case AArch64::ZERO_M_PSEUDO:
3477 case AArch64::ZERO_T_PSEUDO:
3479 case AArch64::MOVT_TIZ_PSEUDO:
3484 &AArch64::GPR64noipRegClass);
3510 while (
N->getOpcode() == ISD::BITCAST)
3511 N =
N->getOperand(0).getNode();
3516 if (
N->getOpcode() != AArch64ISD::DUP)
3519 auto Opnd0 =
N->getOperand(0);
3673 CondCode, CondCode2);
3686 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
3688 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
3707 if (
Op->getFlags().hasNoSignedWrap())
3733 (isIntEqualitySetCC(CC) ||
3741 EVT VT =
LHS.getValueType();
3746 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3751 Chain =
RHS.getValue(1);
3754 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
3760 EVT VT =
LHS.getValueType();
3765 if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
3776 unsigned Opcode = AArch64ISD::SUBS;
3780 Opcode = AArch64ISD::ADDS;
3783 isIntEqualitySetCC(CC)) {
3786 Opcode = AArch64ISD::ADDS;
3795 LHS.getOperand(0),
LHS.getOperand(1));
3799 }
else if (
LHS.getOpcode() == AArch64ISD::ANDS) {
3801 return LHS.getValue(1);
3867 unsigned Opcode = 0;
3870 if (
LHS.getValueType().isFloatingPoint()) {
3871 assert(
LHS.getValueType() != MVT::f128);
3872 if ((
LHS.getValueType() == MVT::f16 && !FullFP16) ||
3873 LHS.getValueType() == MVT::bf16) {
3877 Opcode = AArch64ISD::FCCMP;
3879 APInt Imm = Const->getAPIntValue();
3880 if (Imm.isNegative() && Imm.sgt(-32)) {
3881 Opcode = AArch64ISD::CCMN;
3885 Opcode = AArch64ISD::CCMN;
3888 isIntEqualitySetCC(CC)) {
3891 Opcode = AArch64ISD::CCMN;
3895 Opcode = AArch64ISD::CCMP;
3921 bool &CanNegate,
bool &MustBeFirst,
3922 bool &PreferFirst,
bool WillNegate,
3923 unsigned Depth = 0) {
3929 if (VT == MVT::f128)
3932 MustBeFirst =
false;
3936 {Val->getOperand(0), Val->getOperand(1)});
3943 bool IsOR = Opcode ==
ISD::OR;
3959 if (MustBeFirstL && MustBeFirstR)
3965 if (!CanNegateL && !CanNegateR)
3969 CanNegate = WillNegate && CanNegateL && CanNegateR;
3972 MustBeFirst = !CanNegate;
3977 MustBeFirst = MustBeFirstL || MustBeFirstR;
3979 PreferFirst = PreferFirstL || PreferFirstR;
4002 bool isInteger =
LHS.getValueType().isInteger();
4004 CC = getSetCCInverse(CC,
LHS.getValueType());
4010 assert(
LHS.getValueType().isFloatingPoint());
4036 bool IsOR = Opcode ==
ISD::OR;
4043 PreferFirstL, IsOR);
4044 assert(ValidL &&
"Valid conjunction/disjunction tree");
4052 PreferFirstR, IsOR);
4053 assert(ValidR &&
"Valid conjunction/disjunction tree");
4056 bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
4059 if (MustBeFirstL || ShouldFirstL) {
4060 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4069 bool NegateAfterAll;
4073 assert(CanNegateR &&
"at least one side must be negatable");
4074 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4078 NegateAfterR =
true;
4081 NegateR = CanNegateR;
4082 NegateAfterR = !CanNegateR;
4085 NegateAfterAll = !Negate;
4087 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
4088 assert(!Negate &&
"Valid conjunction/disjunction tree");
4092 NegateAfterR =
false;
4093 NegateAfterAll =
false;
4113 bool DummyCanNegate;
4114 bool DummyMustBeFirst;
4115 bool DummyPreferFirst;
4117 DummyPreferFirst,
false))
4128 auto isSupportedExtend = [&](
SDValue V) {
4134 uint64_t Mask = MaskCst->getZExtValue();
4135 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
4141 if (!
Op.hasOneUse())
4144 if (isSupportedExtend(
Op))
4147 unsigned Opc =
Op.getOpcode();
4150 uint64_t Shift = ShiftCst->getZExtValue();
4151 if (isSupportedExtend(
Op.getOperand(0)))
4152 return (Shift <= 4) ? 2 : 1;
4153 EVT VT =
Op.getValueType();
4154 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
4166 if (
LHS.getOpcode() !=
ISD::AND &&
LHS.getOpcode() != AArch64ISD::ANDS)
4186 EVT VT =
RHS.getValueType();
4187 APInt C = RHSC->getAPIntValue();
4202 if (!
C.isMinSignedValue()) {
4214 assert(!
C.isZero() &&
"C should not be zero here");
4225 if (!
C.isMaxSignedValue()) {
4236 if (!
C.isAllOnes()) {
4261 bool LHSIsCMN =
isCMN(
LHS, CC, DAG);
4262 bool RHSIsCMN =
isCMN(
RHS, CC, DAG);
4297 LHS.getNode()->hasNUsesOfValue(1, 0)) {
4298 int16_t ValueofRHS =
RHS->getAsZExtVal();
4326static std::pair<SDValue, SDValue>
4328 assert((
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::i64) &&
4329 "Unsupported value type");
4335 switch (
Op.getOpcode()) {
4339 Opc = AArch64ISD::ADDS;
4343 Opc = AArch64ISD::ADDS;
4347 Opc = AArch64ISD::SUBS;
4351 Opc = AArch64ISD::SUBS;
4359 if (
Op.getValueType() == MVT::i32) {
4382 assert(
Op.getValueType() == MVT::i64 &&
"Expected an i64 value type");
4392 Overflow = DAG.
getNode(AArch64ISD::SUBS,
DL, VTs, UpperBits, LowerBits)
4411 Overflow =
Value.getValue(1);
4413 return std::make_pair(
Value, Overflow);
4418 !Subtarget->isNeonAvailable()))
4419 return LowerToScalableOp(
Op, DAG);
4443 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
4466 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
4473 if (!CFVal || !CTVal)
4510 return Cmp.getValue(1);
4523 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4533 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, One, Zero, CC, Glue);
4539 unsigned Opcode,
bool IsSigned) {
4540 EVT VT0 =
Op.getValue(0).getValueType();
4541 EVT VT1 =
Op.getValue(1).getValueType();
4543 if (VT0 != MVT::i32 && VT0 != MVT::i64)
4546 bool InvertCarry = Opcode == AArch64ISD::SBCS;
4566 auto getFloatVT = [](
EVT VT) {
4567 assert((VT == MVT::i32 || VT == MVT::i64) &&
"Unexpected VT");
4568 return VT == MVT::i32 ? MVT::f32 : MVT::f64;
4570 auto bitcastToFloat = [&](
SDValue Val) {
4571 return DAG.
getBitcast(getFloatVT(Val.getValueType()), Val);
4574 NewOps.
reserve(
Op.getNumOperands() - 1);
4576 for (
unsigned I = 1,
E =
Op.getNumOperands();
I <
E; ++
I)
4578 EVT OrigVT =
Op.getValueType();
4603 DAG.
getNode(AArch64ISD::CSEL,
DL, MVT::i32, FVal, TVal, CCVal, Overflow);
4615 unsigned IsWrite =
Op.getConstantOperandVal(2);
4616 unsigned Locality =
Op.getConstantOperandVal(3);
4617 unsigned IsData =
Op.getConstantOperandVal(4);
4619 bool IsStream = !Locality;
4623 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
4627 Locality = 3 - Locality;
4631 unsigned PrfOp = (IsWrite << 4) |
4635 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other,
Op.getOperand(0),
4648 if (LHSConstOp && RHSConst) {
4652 uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
4665 EVT VT =
Op.getValueType();
4669 if (VT == MVT::nxv2f64 && SrcVal.
getValueType() == MVT::nxv2bf16) {
4673 return DAG.
getNode(ISD::FP_EXTEND,
DL, VT,
4674 DAG.
getNode(ISD::FP_EXTEND,
DL, MVT::nxv2f32, SrcVal));
4677 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
4681 return LowerFixedLengthFPExtendToSVE(
Op, DAG);
4683 bool IsStrict =
Op->isStrictFPOpcode();
4684 SDValue Op0 =
Op.getOperand(IsStrict ? 1 : 0);
4686 if (VT == MVT::f64) {
4688 if (Op0VT == MVT::f32 || Op0VT == MVT::f16)
4691 if (Op0VT == MVT::bf16 && IsStrict) {
4694 {Op0,
Op.getOperand(0)});
4698 if (Op0VT == MVT::bf16)
4699 return DAG.
getNode(ISD::FP_EXTEND, SDLoc(
Op), VT,
4700 DAG.
getNode(ISD::FP_EXTEND, SDLoc(
Op), MVT::f32, Op0));
4704 assert(
Op.getValueType() == MVT::f128 &&
"Unexpected lowering");
4710 EVT VT =
Op.getValueType();
4711 bool IsStrict =
Op->isStrictFPOpcode();
4712 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4714 bool Trunc =
Op.getConstantOperandVal(IsStrict ? 2 : 1) == 1;
4718 if (SrcVT == MVT::nxv8f32)
4722 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4725 constexpr EVT
I32 = MVT::nxv4i32;
4731 if (SrcVT == MVT::nxv2f32 || SrcVT == MVT::nxv4f32) {
4732 if (Subtarget->hasBF16())
4733 return LowerToPredicatedOp(
Op, DAG,
4734 AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
4736 Narrow = getSVESafeBitCast(I32, SrcVal, DAG);
4741 }
else if (SrcVT == MVT::nxv2f64 &&
4742 (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable())) {
4745 Narrow = DAG.
getNode(AArch64ISD::FCVTX_MERGE_PASSTHRU,
DL, MVT::nxv2f32,
4746 Pg, SrcVal, DAG.
getUNDEF(MVT::nxv2f32));
4752 NewOps.
push_back(
Op.getOperand(IsStrict ? 2 : 1));
4753 return DAG.
getNode(
Op.getOpcode(),
DL, VT, NewOps,
Op->getFlags());
4767 EVT
I1 =
I32.changeElementType(MVT::i1);
4770 IsNaN = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, I1, IsNaN);
4771 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4776 return getSVESafeBitCast(VT, Narrow, DAG);
4780 return LowerFixedLengthFPRoundToSVE(
Op, DAG);
4785 !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
4786 Subtarget->hasBF16())) {
4794 Narrow = DAG.
getNode(ISD::BITCAST,
DL, I32, Narrow);
4801 Narrow = DAG.
getNode(AArch64ISD::FCVTXN,
DL,
F32, Narrow);
4802 Narrow = DAG.
getNode(ISD::BITCAST,
DL, I32, Narrow);
4822 Narrow = DAG.
getSelect(
DL, I32, IsNaN, NaN, Narrow);
4829 EVT
I16 =
I32.changeVectorElementType(MVT::i16);
4831 return DAG.
getNode(ISD::BITCAST,
DL, VT, Narrow);
4839 if (SrcVT != MVT::f128) {
4856 bool IsStrict =
Op->isStrictFPOpcode();
4857 EVT InVT =
Op.getOperand(IsStrict ? 1 : 0).getValueType();
4858 EVT VT =
Op.getValueType();
4861 "Unimplemented SVE support for STRICT_FP_to_INT!");
4870 {
Op.getOperand(0),
Op.getOperand(1)});
4871 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4872 {Ext.getValue(1), Ext.getValue(0)});
4875 Op.getOpcode(),
DL,
Op.getValueType(),
4876 DAG.
getNode(ISD::FP_EXTEND,
DL, NewVT,
Op.getOperand(0)));
4889 if (InVT == MVT::nxv8f32)
4893 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
4894 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
4895 return LowerToPredicatedOp(
Op, DAG, Opcode);
4900 return LowerFixedLengthFPToIntToSVE(
Op, DAG);
4904 if (VTSize < InVTSize) {
4909 {Op.getOperand(0), Op.getOperand(1)});
4919 if (VTSize > InVTSize) {
4926 {
Op.getOperand(0),
Op.getOperand(1)});
4927 return DAG.
getNode(
Op.getOpcode(),
DL, {VT, MVT::Other},
4928 {Ext.getValue(1), Ext.getValue(0)});
4943 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
4944 {Op.getOperand(0), Extract});
4945 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
4954 bool IsStrict =
Op->isStrictFPOpcode();
4955 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
4958 return LowerVectorFP_TO_INT(
Op, DAG);
4961 if ((SrcVal.
getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
4967 {
Op.getOperand(0), SrcVal});
4968 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
4969 {Ext.getValue(1), Ext.getValue(0)});
4972 DAG.
getNode(ISD::FP_EXTEND,
DL, MVT::f32, SrcVal));
4984AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(
SDValue Op,
4990 EVT DstVT =
Op.getValueType();
4996 assert(SatWidth <= DstElementWidth &&
4997 "Saturation width cannot exceed result width");
5010 if ((SrcElementVT == MVT::f16 &&
5011 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) ||
5012 SrcElementVT == MVT::bf16) {
5014 SrcVal = DAG.
getNode(ISD::FP_EXTEND,
DL, F32VT, SrcVal);
5022 SrcElementVT = MVT::f32;
5023 SrcElementWidth = 32;
5024 }
else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
5025 SrcElementVT != MVT::f16 && SrcElementVT != MVT::bf16)
5030 if (SatWidth == 64 && SrcElementWidth < 64) {
5032 SrcVal = DAG.
getNode(ISD::FP_EXTEND,
DL, F64VT, SrcVal);
5034 SrcElementVT = MVT::f64;
5035 SrcElementWidth = 64;
5038 if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth) {
5053 if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
5060 SrcVal2 ? DAG.
getNode(
Op.getOpcode(),
DL, IntVT, SrcVal2,
5096 return LowerVectorFP_TO_INT_SAT(
Op, DAG);
5098 EVT DstVT =
Op.getValueType();
5102 assert(SatWidth <= DstWidth &&
"Saturation width cannot exceed result width");
5105 if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) {
5106 SrcVal = DAG.
getNode(ISD::FP_EXTEND, SDLoc(
Op), MVT::f32, SrcVal);
5108 }
else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16 &&
5114 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
5115 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
5116 DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
5117 return DAG.
getNode(
Op.getOpcode(),
DL, DstVT, SrcVal,
5123 if (DstWidth < SatWidth)
5126 if (SrcVT == MVT::f16 && SatVT == MVT::i16 && DstVT == MVT::i32) {
5129 DAG.
getNode(AArch64ISD::FCVTZS_HALF,
DL, MVT::f32, SrcVal);
5134 SDValue CVTf32 = DAG.
getNode(AArch64ISD::FCVTZU_HALF,
DL, MVT::f32, SrcVal);
5159 EVT VT =
Op.getValueType();
5182 bool IsStrict =
Op->isStrictFPOpcode();
5183 EVT VT =
Op.getValueType();
5186 EVT InVT =
In.getValueType();
5187 unsigned Opc =
Op.getOpcode();
5191 "Unimplemented SVE support for ISD:::STRICT_INT_TO_FP!");
5206 {Op.getOperand(0), In});
5208 {
Op.getValueType(), MVT::Other},
5219 if (VT == MVT::nxv8f32)
5222 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
5223 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
5224 return LowerToPredicatedOp(
Op, DAG, Opcode);
5229 return LowerFixedLengthIntToFPToSVE(
Op, DAG);
5233 if (VTSize < InVTSize) {
5239 bool IsTargetf16 =
false;
5240 if (
Op.hasOneUse() &&
5245 SDNode *
U = *
Op->user_begin();
5246 if (
U->hasOneUse() &&
U->user_begin()->getOpcode() ==
ISD::FP_ROUND) {
5247 EVT TmpVT =
U->user_begin()->getValueType(0);
5253 if (IsTargetf32 && !IsTargetf16) {
5263 {
In.getValue(1),
In.getValue(0),
5271 if (VTSize > InVTSize) {
5288 return DAG.
getNode(
Op.getOpcode(),
DL, {ScalarVT, MVT::Other},
5289 {Op.getOperand(0), Extract});
5290 return DAG.
getNode(
Op.getOpcode(),
DL, ScalarVT, Extract);
5298 if (
Op.getValueType().isVector())
5299 return LowerVectorINT_TO_FP(
Op, DAG);
5301 bool IsStrict =
Op->isStrictFPOpcode();
5302 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
5307 auto IntToFpViaPromotion = [&](EVT PromoteVT) {
5311 {Op.getOperand(0), SrcVal});
5313 {
Op.getValueType(), MVT::Other},
5318 DAG.
getNode(
Op.getOpcode(),
DL, PromoteVT, SrcVal),
5322 if (
Op.getValueType() == MVT::bf16) {
5323 unsigned MaxWidth = IsSigned
5327 if (MaxWidth <= 24) {
5328 return IntToFpViaPromotion(MVT::f32);
5332 if (MaxWidth <= 53) {
5333 return IntToFpViaPromotion(MVT::f64);
5384 IsStrict ? DAG.
getNode(
Op.getOpcode(),
DL, {MVT::f64, MVT::Other},
5385 {Op.getOperand(0), ToRound})
5386 : DAG.
getNode(
Op.getOpcode(),
DL, MVT::f64, ToRound);
5413 {
Op.getValueType(), MVT::Other},
5417 DAG.getIntPtrConstant(0,
DL,
true));
5422 if (
Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5423 return IntToFpViaPromotion(MVT::f32);
5432 if (
Op.getValueType() != MVT::f128)
5440AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(
SDValue Op,
5443 EVT VT =
Op.getValueType();
5447 if (VT != MVT::v16i8 && VT != MVT::nxv16i1)
5451 if (VT != MVT::v8i8 && VT != MVT::nxv8i1)
5455 if (VT != MVT::v4i16 && VT != MVT::nxv4i1)
5459 if (VT != MVT::v2i32 && VT != MVT::nxv2i1)
5475 return DAG.
getNode(
Op.getOpcode(),
DL, VT, PtrA, PtrB, EltSize, LaneOffset);
5487 DAG.
getNode(
Op.getOpcode(),
DL, WhileVT, PtrA, PtrB, EltSize, LaneOffset);
5495 EVT OpVT =
Op.getValueType();
5496 EVT ArgVT =
Op.getOperand(0).getValueType();
5499 return LowerFixedLengthBitcastToSVE(
Op, DAG);
5507 "Expected int->fp bitcast!");
5520 return getSVESafeBitCast(OpVT, ExtResult, DAG);
5531 return getSVESafeBitCast(OpVT,
Op.getOperand(0), DAG);
5534 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
5538 if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
5541 assert(ArgVT == MVT::i16);
5551static std::optional<uint64_t>
5555 return std::nullopt;
5560 return std::nullopt;
5562 return C->getZExtValue();
5567 EVT VT =
N.getValueType();
5572 for (
const SDValue &Elt :
N->op_values()) {
5575 unsigned HalfSize = EltSize / 2;
5577 if (!
isIntN(HalfSize,
C->getSExtValue()))
5580 if (!
isUIntN(HalfSize,
C->getZExtValue()))
5592 EVT VT =
N.getValueType();
5614 unsigned Opcode =
N.getOpcode();
5625 unsigned Opcode =
N.getOpcode();
5767 if (IsN0SExt && IsN1SExt)
5768 return AArch64ISD::SMULL;
5773 if (IsN0ZExt && IsN1ZExt)
5774 return AArch64ISD::UMULL;
5780 if (IsN0ZExt || IsN1ZExt) {
5782 return AArch64ISD::UMULL;
5787 return AArch64ISD::UMULL;
5790 if (IsN0SExt || IsN1SExt) {
5792 return AArch64ISD::SMULL;
5795 return AArch64ISD::SMULL;
5798 if (!IsN1SExt && !IsN1ZExt)
5805 return AArch64ISD::SMULL;
5809 return AArch64ISD::UMULL;
5814 return AArch64ISD::UMULL;
5820 EVT VT =
Op.getValueType();
5822 bool OverrideNEON = !Subtarget->isNeonAvailable();
5824 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5829 "unexpected type for custom-lowering ISD::MUL");
5845 if (VT == MVT::v1i64) {
5846 if (Subtarget->hasSVE())
5847 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5863 if (Subtarget->hasSVE())
5864 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MUL_PRED);
5879 "unexpected types for extended operands to VMULL");
5894 DAG.
getNode(ISD::BITCAST,
DL, Op1VT, N00), Op1),
5896 DAG.
getNode(ISD::BITCAST,
DL, Op1VT, N01), Op1)),
5902 if (
Pattern == AArch64SVEPredPattern::all)
5904 return DAG.
getNode(AArch64ISD::PTRUE,
DL, VT,
5909 bool IsSigned,
bool IsEqual) {
5913 if (!
N->getValueType(0).isScalableVector() ||
5918 APInt Y =
N->getConstantOperandAPInt(Op1);
5923 if (IsSigned ?
Y.isMaxSignedValue() :
Y.isMaxValue())
5929 APInt X =
N->getConstantOperandAPInt(Op0);
5932 APInt NumActiveElems =
5933 IsSigned ?
Y.ssub_ov(
X, Overflow) :
Y.usub_ov(
X, Overflow);
5940 NumActiveElems = IsSigned ? NumActiveElems.
sadd_ov(One, Overflow)
5941 : NumActiveElems.
uadd_ov(One, Overflow);
5946 std::optional<unsigned> PredPattern =
5948 unsigned MinSVEVectorSize = std::max(
5950 unsigned ElementSize = 128 /
N->getValueType(0).getVectorMinNumElements();
5951 if (PredPattern != std::nullopt &&
5952 NumActiveElems.
getZExtValue() <= (MinSVEVectorSize / ElementSize))
5953 return getPTrue(DAG,
DL,
N->getValueType(0), *PredPattern);
5962 EVT InVT =
Op.getValueType();
5966 "Expected a predicate-to-predicate bitcast");
5970 "Only expect to cast between legal scalable predicate types!");
5980 Op.getConstantOperandVal(0) == Intrinsic::aarch64_sve_convert_to_svbool &&
5981 Op.getOperand(1).getValueType().bitsGT(VT))
5982 Op =
Op.getOperand(1);
6000 Mask = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VT, Mask);
6007 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
6012 TargetLowering::CallLoweringInfo CLI(DAG);
6014 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
6016 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
6062 SDValue TileSlice =
N->getOperand(2);
6065 int32_t ConstAddend = 0;
6074 ConstAddend = ImmNode->getSExtValue();
6078 int32_t ImmAddend = ConstAddend % 16;
6079 if (int32_t
C = (ConstAddend - ImmAddend)) {
6081 VarAddend = VarAddend
6088 auto SVL = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
6100 return DAG.
getNode(IsLoad ? AArch64ISD::SME_ZA_LDR : AArch64ISD::SME_ZA_STR,
6102 {
N.getOperand(0), TileSlice,
Base,
6111 auto Op1 =
Op.getOperand(1);
6112 auto Op2 =
Op.getOperand(2);
6113 auto Mask =
Op.getOperand(3);
6116 EVT Op2VT = Op2.getValueType();
6117 EVT ResVT =
Op.getValueType();
6121 "Expected 8-bit or 16-bit characters.");
6135 Op2 = DAG.
getNode(AArch64ISD::DUPLANE128,
DL, OpContainerVT, Op2,
6163 ID, Mask, Op1, Op2);
6174 unsigned IntNo =
Op.getConstantOperandVal(1);
6179 case Intrinsic::aarch64_prefetch: {
6183 unsigned IsWrite =
Op.getConstantOperandVal(3);
6184 unsigned Locality =
Op.getConstantOperandVal(4);
6185 unsigned IsStream =
Op.getConstantOperandVal(5);
6186 unsigned IsData =
Op.getConstantOperandVal(6);
6187 unsigned PrfOp = (IsWrite << 4) |
6192 return DAG.
getNode(AArch64ISD::PREFETCH,
DL, MVT::Other, Chain,
6195 case Intrinsic::aarch64_sme_str:
6196 case Intrinsic::aarch64_sme_ldr: {
6199 case Intrinsic::aarch64_sme_za_enable:
6201 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6204 case Intrinsic::aarch64_sme_za_disable:
6206 AArch64ISD::SMSTOP,
DL, DAG.
getVTList(MVT::Other, MVT::Glue),
6214 unsigned IntNo =
Op.getConstantOperandVal(1);
6219 case Intrinsic::aarch64_mops_memset_tag: {
6226 auto Alignment =
Node->getMemOperand()->getAlign();
6227 bool IsVol =
Node->isVolatile();
6228 auto DstPtrInfo =
Node->getPointerInfo();
6232 SDValue MS = SDI.EmitMOPS(AArch64::MOPSMemorySetTaggingPseudo, DAG,
DL,
6233 Chain, Dst, Val,
Size, Alignment, IsVol,
6234 DstPtrInfo, MachinePointerInfo{});
6247 unsigned IntNo =
Op.getConstantOperandVal(0);
6251 case Intrinsic::thread_pointer: {
6253 return DAG.
getNode(AArch64ISD::THREAD_POINTER,
DL, PtrVT);
6255 case Intrinsic::aarch64_sve_whilewr_b:
6257 Op.getOperand(1),
Op.getOperand(2),
6260 case Intrinsic::aarch64_sve_whilewr_h:
6262 Op.getOperand(1),
Op.getOperand(2),
6265 case Intrinsic::aarch64_sve_whilewr_s:
6267 Op.getOperand(1),
Op.getOperand(2),
6270 case Intrinsic::aarch64_sve_whilewr_d:
6272 Op.getOperand(1),
Op.getOperand(2),
6275 case Intrinsic::aarch64_sve_whilerw_b:
6277 Op.getOperand(1),
Op.getOperand(2),
6280 case Intrinsic::aarch64_sve_whilerw_h:
6282 Op.getOperand(1),
Op.getOperand(2),
6285 case Intrinsic::aarch64_sve_whilerw_s:
6287 Op.getOperand(1),
Op.getOperand(2),
6290 case Intrinsic::aarch64_sve_whilerw_d:
6292 Op.getOperand(1),
Op.getOperand(2),
6295 case Intrinsic::aarch64_neon_abs: {
6296 EVT Ty =
Op.getValueType();
6297 if (Ty == MVT::i64) {
6299 DAG.
getNode(ISD::BITCAST,
DL, MVT::v1i64,
Op.getOperand(1));
6301 return DAG.
getNode(ISD::BITCAST,
DL, MVT::i64, Result);
6308 case Intrinsic::aarch64_neon_pmull64: {
6312 std::optional<uint64_t> LHSLane =
6314 std::optional<uint64_t> RHSLane =
6317 assert((!LHSLane || *LHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6318 assert((!RHSLane || *RHSLane < 2) &&
"Expect lane to be None or 0 or 1");
6324 auto TryVectorizeOperand = [](
SDValue N, std::optional<uint64_t> NLane,
6325 std::optional<uint64_t> OtherLane,
6327 SelectionDAG &DAG) ->
SDValue {
6336 if (OtherLane == 1) {
6345 DAG.
getNode(AArch64ISD::DUPLANE64,
DL, MVT::v2i64,
6351 return DAG.
getNode(AArch64ISD::DUP,
DL, MVT::v1i64,
N);
6356 assert(
N.getValueType() == MVT::i64 &&
6357 "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
6361 LHS = TryVectorizeOperand(
LHS, LHSLane, RHSLane,
DL, DAG);
6362 RHS = TryVectorizeOperand(
RHS, RHSLane, LHSLane,
DL, DAG);
6366 case Intrinsic::aarch64_neon_smax:
6369 case Intrinsic::aarch64_neon_umax:
6372 case Intrinsic::aarch64_neon_smin:
6375 case Intrinsic::aarch64_neon_umin:
6378 case Intrinsic::aarch64_neon_scalar_sqxtn:
6379 case Intrinsic::aarch64_neon_scalar_sqxtun:
6380 case Intrinsic::aarch64_neon_scalar_uqxtn: {
6381 assert(
Op.getValueType() == MVT::i32 ||
Op.getValueType() == MVT::f32);
6382 if (
Op.getValueType() == MVT::i32)
6383 return DAG.
getNode(ISD::BITCAST,
DL, MVT::i32,
6387 Op.getOperand(1))));
6390 case Intrinsic::aarch64_neon_sqxtn:
6393 case Intrinsic::aarch64_neon_sqxtun:
6396 case Intrinsic::aarch64_neon_uqxtn:
6399 case Intrinsic::aarch64_neon_sqshrn:
6400 if (
Op.getValueType().isVector())
6403 Op.getOperand(1).getValueType(),
6404 Op.getOperand(1),
Op.getOperand(2)));
6406 case Intrinsic::aarch64_neon_sqshrun:
6407 if (
Op.getValueType().isVector())
6410 Op.getOperand(1).getValueType(),
6411 Op.getOperand(1),
Op.getOperand(2)));
6413 case Intrinsic::aarch64_neon_uqshrn:
6414 if (
Op.getValueType().isVector())
6417 Op.getOperand(1).getValueType(),
6418 Op.getOperand(1),
Op.getOperand(2)));
6420 case Intrinsic::aarch64_neon_sqrshrn:
6421 if (
Op.getValueType().isVector())
6424 Op.getOperand(1).getValueType(),
6425 Op.getOperand(1),
Op.getOperand(2)));
6427 case Intrinsic::aarch64_neon_sqrshrun:
6428 if (
Op.getValueType().isVector())
6431 Op.getOperand(1).getValueType(),
6432 Op.getOperand(1),
Op.getOperand(2)));
6434 case Intrinsic::aarch64_neon_uqrshrn:
6435 if (
Op.getValueType().isVector())
6438 Op.getOperand(1).getValueType(),
6439 Op.getOperand(1),
Op.getOperand(2)));
6441 case Intrinsic::aarch64_neon_sqrshl:
6442 if (
Op.getValueType().isVector())
6445 case Intrinsic::aarch64_neon_sqshl:
6446 if (
Op.getValueType().isVector())
6449 case Intrinsic::aarch64_neon_uqrshl:
6450 if (
Op.getValueType().isVector())
6453 case Intrinsic::aarch64_neon_uqshl:
6454 if (
Op.getValueType().isVector())
6457 case Intrinsic::aarch64_neon_sqadd:
6458 if (
Op.getValueType().isVector())
6463 case Intrinsic::aarch64_neon_sqsub:
6464 if (
Op.getValueType().isVector())
6469 case Intrinsic::aarch64_neon_uqadd:
6470 if (
Op.getValueType().isVector())
6474 case Intrinsic::aarch64_neon_uqsub:
6475 if (
Op.getValueType().isVector())
6479 case Intrinsic::aarch64_neon_sqdmulls_scalar:
6481 case Intrinsic::aarch64_sve_whilelt:
6484 case Intrinsic::aarch64_sve_whilels:
6487 case Intrinsic::aarch64_sve_whilele:
6490 case Intrinsic::aarch64_sve_sunpkhi:
6491 return DAG.
getNode(AArch64ISD::SUNPKHI,
DL,
Op.getValueType(),
6493 case Intrinsic::aarch64_sve_sunpklo:
6494 return DAG.
getNode(AArch64ISD::SUNPKLO,
DL,
Op.getValueType(),
6496 case Intrinsic::aarch64_sve_uunpkhi:
6497 return DAG.
getNode(AArch64ISD::UUNPKHI,
DL,
Op.getValueType(),
6499 case Intrinsic::aarch64_sve_uunpklo:
6500 return DAG.
getNode(AArch64ISD::UUNPKLO,
DL,
Op.getValueType(),
6502 case Intrinsic::aarch64_sve_clasta_n:
6503 return DAG.
getNode(AArch64ISD::CLASTA_N,
DL,
Op.getValueType(),
6504 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6505 case Intrinsic::aarch64_sve_clastb_n:
6506 return DAG.
getNode(AArch64ISD::CLASTB_N,
DL,
Op.getValueType(),
6507 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6508 case Intrinsic::aarch64_sve_lasta:
6509 return DAG.
getNode(AArch64ISD::LASTA,
DL,
Op.getValueType(),
6510 Op.getOperand(1),
Op.getOperand(2));
6511 case Intrinsic::aarch64_sve_lastb:
6512 return DAG.
getNode(AArch64ISD::LASTB,
DL,
Op.getValueType(),
6513 Op.getOperand(1),
Op.getOperand(2));
6514 case Intrinsic::aarch64_sve_tbl:
6515 return DAG.
getNode(AArch64ISD::TBL,
DL,
Op.getValueType(),
Op.getOperand(1),
6517 case Intrinsic::aarch64_sve_trn1:
6518 return DAG.
getNode(AArch64ISD::TRN1,
DL,
Op.getValueType(),
6519 Op.getOperand(1),
Op.getOperand(2));
6520 case Intrinsic::aarch64_sve_trn2:
6521 return DAG.
getNode(AArch64ISD::TRN2,
DL,
Op.getValueType(),
6522 Op.getOperand(1),
Op.getOperand(2));
6523 case Intrinsic::aarch64_sve_uzp1:
6524 return DAG.
getNode(AArch64ISD::UZP1,
DL,
Op.getValueType(),
6525 Op.getOperand(1),
Op.getOperand(2));
6526 case Intrinsic::aarch64_sve_uzp2:
6527 return DAG.
getNode(AArch64ISD::UZP2,
DL,
Op.getValueType(),
6528 Op.getOperand(1),
Op.getOperand(2));
6529 case Intrinsic::aarch64_sve_zip1:
6530 return DAG.
getNode(AArch64ISD::ZIP1,
DL,
Op.getValueType(),
6531 Op.getOperand(1),
Op.getOperand(2));
6532 case Intrinsic::aarch64_sve_zip2:
6533 return DAG.
getNode(AArch64ISD::ZIP2,
DL,
Op.getValueType(),
6534 Op.getOperand(1),
Op.getOperand(2));
6535 case Intrinsic::aarch64_sve_splice:
6536 return DAG.
getNode(AArch64ISD::SPLICE,
DL,
Op.getValueType(),
6537 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6538 case Intrinsic::aarch64_sve_ptrue:
6539 return getPTrue(DAG,
DL,
Op.getValueType(),
Op.getConstantOperandVal(1));
6540 case Intrinsic::aarch64_sve_clz:
6541 return DAG.
getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6542 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6543 case Intrinsic::aarch64_sme_cntsd: {
6549 case Intrinsic::aarch64_sve_cnt: {
6552 if (
Data.getValueType().isFloatingPoint())
6554 return DAG.
getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6555 Op.getOperand(2),
Data,
Op.getOperand(1));
6557 case Intrinsic::aarch64_sve_dupq_lane:
6558 return LowerDUPQLane(
Op, DAG);
6559 case Intrinsic::aarch64_sve_convert_from_svbool:
6560 if (
Op.getValueType() == MVT::aarch64svcount)
6561 return DAG.
getNode(ISD::BITCAST,
DL,
Op.getValueType(),
Op.getOperand(1));
6563 case Intrinsic::aarch64_sve_convert_to_svbool:
6564 if (
Op.getOperand(1).getValueType() == MVT::aarch64svcount)
6565 return DAG.
getNode(ISD::BITCAST,
DL, MVT::nxv16i1,
Op.getOperand(1));
6567 case Intrinsic::aarch64_sve_fneg:
6568 return DAG.
getNode(AArch64ISD::FNEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6569 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6570 case Intrinsic::aarch64_sve_frintp:
6571 return DAG.
getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6572 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6573 case Intrinsic::aarch64_sve_frintm:
6574 return DAG.
getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6575 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6576 case Intrinsic::aarch64_sve_frinti:
6577 return DAG.
getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU,
DL,
6578 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6580 case Intrinsic::aarch64_sve_frintx:
6581 return DAG.
getNode(AArch64ISD::FRINT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6582 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6583 case Intrinsic::aarch64_sve_frinta:
6584 return DAG.
getNode(AArch64ISD::FROUND_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6585 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6586 case Intrinsic::aarch64_sve_frintn:
6587 return DAG.
getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU,
DL,
6588 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6590 case Intrinsic::aarch64_sve_frintz:
6591 return DAG.
getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6592 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6593 case Intrinsic::aarch64_sve_ucvtf:
6594 return DAG.
getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU,
DL,
6595 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6597 case Intrinsic::aarch64_sve_scvtf:
6598 return DAG.
getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU,
DL,
6599 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6601 case Intrinsic::aarch64_sve_fcvtzu:
6602 return DAG.
getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6603 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6604 case Intrinsic::aarch64_sve_fcvtzs:
6605 return DAG.
getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6606 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6607 case Intrinsic::aarch64_sve_fsqrt:
6608 return DAG.
getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6609 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6610 case Intrinsic::aarch64_sve_frecpx:
6611 return DAG.
getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6612 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6613 case Intrinsic::aarch64_sve_frecpe_x:
6614 return DAG.
getNode(AArch64ISD::FRECPE,
DL,
Op.getValueType(),
6616 case Intrinsic::aarch64_sve_frecps_x:
6617 return DAG.
getNode(AArch64ISD::FRECPS,
DL,
Op.getValueType(),
6618 Op.getOperand(1),
Op.getOperand(2));
6619 case Intrinsic::aarch64_sve_frsqrte_x:
6620 return DAG.
getNode(AArch64ISD::FRSQRTE,
DL,
Op.getValueType(),
6622 case Intrinsic::aarch64_sve_frsqrts_x:
6623 return DAG.
getNode(AArch64ISD::FRSQRTS,
DL,
Op.getValueType(),
6624 Op.getOperand(1),
Op.getOperand(2));
6625 case Intrinsic::aarch64_sve_fabs:
6626 return DAG.
getNode(AArch64ISD::FABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6627 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6628 case Intrinsic::aarch64_sve_abs:
6629 return DAG.
getNode(AArch64ISD::ABS_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6630 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6631 case Intrinsic::aarch64_sve_neg:
6632 return DAG.
getNode(AArch64ISD::NEG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6633 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6634 case Intrinsic::aarch64_sve_insr: {
6636 EVT ScalarTy =
Scalar.getValueType();
6637 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
6640 return DAG.
getNode(AArch64ISD::INSR,
DL,
Op.getValueType(),
6641 Op.getOperand(1), Scalar);
6643 case Intrinsic::aarch64_sve_rbit:
6644 return DAG.
getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
DL,
6645 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
6647 case Intrinsic::aarch64_sve_revb:
6648 return DAG.
getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6649 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6650 case Intrinsic::aarch64_sve_revh:
6651 return DAG.
getNode(AArch64ISD::REVH_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6652 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6653 case Intrinsic::aarch64_sve_revw:
6654 return DAG.
getNode(AArch64ISD::REVW_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6655 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6656 case Intrinsic::aarch64_sve_revd:
6657 return DAG.
getNode(AArch64ISD::REVD_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6658 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
6659 case Intrinsic::aarch64_sve_sxtb:
6661 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6662 Op.getOperand(2),
Op.getOperand(3),
6663 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i8)),
6665 case Intrinsic::aarch64_sve_sxth:
6667 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6668 Op.getOperand(2),
Op.getOperand(3),
6669 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i16)),
6671 case Intrinsic::aarch64_sve_sxtw:
6673 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6674 Op.getOperand(2),
Op.getOperand(3),
6675 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i32)),
6677 case Intrinsic::aarch64_sve_uxtb:
6679 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6680 Op.getOperand(2),
Op.getOperand(3),
6681 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i8)),
6683 case Intrinsic::aarch64_sve_uxth:
6685 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6686 Op.getOperand(2),
Op.getOperand(3),
6687 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i16)),
6689 case Intrinsic::aarch64_sve_uxtw:
6691 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU,
DL,
Op.getValueType(),
6692 Op.getOperand(2),
Op.getOperand(3),
6693 DAG.
getValueType(
Op.getValueType().changeVectorElementType(MVT::i32)),
6695 case Intrinsic::localaddress: {
6697 const auto *RegInfo = Subtarget->getRegisterInfo();
6698 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
6700 Op.getSimpleValueType());
6703 case Intrinsic::eh_recoverfp: {
6708 SDValue IncomingFPOp =
Op.getOperand(2);
6713 "llvm.eh.recoverfp must take a function as the first argument");
6714 return IncomingFPOp;
6716 case Intrinsic::aarch64_neon_vsri:
6717 case Intrinsic::aarch64_neon_vsli:
6718 case Intrinsic::aarch64_sve_sri:
6719 case Intrinsic::aarch64_sve_sli: {
6720 EVT Ty =
Op.getValueType();
6727 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
6728 IntNo == Intrinsic::aarch64_sve_sri;
6729 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
6730 return DAG.
getNode(Opcode,
DL, Ty,
Op.getOperand(1),
Op.getOperand(2),
6734 case Intrinsic::aarch64_neon_srhadd:
6735 case Intrinsic::aarch64_neon_urhadd:
6736 case Intrinsic::aarch64_neon_shadd:
6737 case Intrinsic::aarch64_neon_uhadd: {
6738 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6739 IntNo == Intrinsic::aarch64_neon_shadd);
6740 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
6741 IntNo == Intrinsic::aarch64_neon_urhadd);
6742 unsigned Opcode = IsSignedAdd
6745 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6748 case Intrinsic::aarch64_neon_saddlp:
6749 case Intrinsic::aarch64_neon_uaddlp: {
6750 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
6751 ? AArch64ISD::UADDLP
6752 : AArch64ISD::SADDLP;
6753 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1));
6755 case Intrinsic::aarch64_neon_sdot:
6756 case Intrinsic::aarch64_neon_udot:
6757 case Intrinsic::aarch64_sve_sdot:
6758 case Intrinsic::aarch64_sve_udot: {
6759 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
6760 IntNo == Intrinsic::aarch64_sve_udot)
6763 return DAG.
getNode(Opcode,
DL,
Op.getValueType(),
Op.getOperand(1),
6764 Op.getOperand(2),
Op.getOperand(3));
6766 case Intrinsic::aarch64_neon_usdot:
6767 case Intrinsic::aarch64_sve_usdot: {
6768 return DAG.
getNode(AArch64ISD::USDOT,
DL,
Op.getValueType(),
6769 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
6771 case Intrinsic::aarch64_neon_saddlv:
6772 case Intrinsic::aarch64_neon_uaddlv: {
6773 EVT OpVT =
Op.getOperand(1).getValueType();
6774 EVT ResVT =
Op.getValueType();
6776 ((ResVT == MVT::i32 && (OpVT == MVT::v8i8 || OpVT == MVT::v16i8 ||
6777 OpVT == MVT::v8i16 || OpVT == MVT::v4i16)) ||
6778 (ResVT == MVT::i64 && (OpVT == MVT::v4i32 || OpVT == MVT::v2i32))) &&
6779 "Unexpected aarch64_neon_u/saddlv type");
6783 IntNo == Intrinsic::aarch64_neon_uaddlv ? AArch64ISD::UADDLV
6784 : AArch64ISD::SADDLV,
6785 DL, ResVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64,
Op.getOperand(1));
6789 return EXTRACT_VEC_ELT;
6791 case Intrinsic::experimental_cttz_elts: {
6805 DAG.
getNode(AArch64ISD::CTTZ_ELTS,
DL, MVT::i64, CttzOp);
6808 case Intrinsic::experimental_vector_match: {
6814bool AArch64TargetLowering::shouldExtendGSIndex(
EVT VT,
EVT &EltTy)
const {
6823bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(
SDValue Extend,
6844 if (LD->isVolatile())
6847 EVT MemVT = LD->getMemoryVT();
6848 if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8 && MemVT != MVT::v2i16)
6851 Align Alignment = LD->getAlign();
6853 if (Subtarget.requiresStrictAlign() && Alignment < RequiredAlignment)
6859bool AArch64TargetLowering::isVectorLoadExtDesirable(
SDValue ExtVal)
const {
6867 if (!ExtVT.
isScalableVector() && !Subtarget->useSVEForFixedLengthVectors())
6882 unsigned NumExtMaskedLoads = 0;
6883 for (
auto *U : Ld->getMask()->users())
6885 NumExtMaskedLoads++;
6887 if (NumExtMaskedLoads <= 1)
6893 return PreExtScalarVT == MVT::i8 || PreExtScalarVT == MVT::i16 ||
6894 PreExtScalarVT == MVT::i32 || PreExtScalarVT == MVT::i64;
6898 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
6899 {std::make_tuple(
false,
false,
false),
6900 AArch64ISD::GLD1_MERGE_ZERO},
6901 {std::make_tuple(
false,
false,
true),
6902 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
6903 {std::make_tuple(
false,
true,
false),
6904 AArch64ISD::GLD1_MERGE_ZERO},
6905 {std::make_tuple(
false,
true,
true),
6906 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
6907 {std::make_tuple(
true,
false,
false),
6908 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
6909 {std::make_tuple(
true,
false,
true),
6910 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
6911 {std::make_tuple(
true,
true,
false),
6912 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
6913 {std::make_tuple(
true,
true,
true),
6914 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
6916 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
6917 return AddrModes.find(
Key)->second;
6925 case AArch64ISD::GLD1_MERGE_ZERO:
6926 return AArch64ISD::GLD1S_MERGE_ZERO;
6927 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
6928 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
6929 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
6930 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
6931 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
6932 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
6933 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
6934 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
6935 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
6936 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
6937 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
6938 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
6953 EVT VT =
Op.getValueType();
6977 EVT IndexVT =
Index.getValueType();
6989 assert(Subtarget->useSVEForFixedLengthVectors() &&
6990 "Cannot lower when not using SVE for fixed vectors!");
6999 Index.getValueType().getVectorElementType() == MVT::i64 ||
7000 Mask.getValueType().getVectorElementType() == MVT::i64)
7065 EVT IndexVT =
Index.getValueType();
7077 assert(Subtarget->useSVEForFixedLengthVectors() &&
7078 "Cannot lower when not using SVE for fixed vectors!");
7084 StoreVal = DAG.
getNode(ISD::BITCAST,
DL, VT, StoreVal);
7090 Index.getValueType().getVectorElementType() == MVT::i64 ||
7091 Mask.getValueType().getVectorElementType() == MVT::i64)
7101 if (PromotedVT != VT)
7125 assert(LoadNode &&
"Expected custom lowering of a masked load node");
7126 EVT VT =
Op->getValueType(0);
7129 return LowerFixedLengthVectorMLoadToSVE(
Op, DAG);
7153 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
7166 {Undef, Undef, Undef, Undef});
7172 Trunc = DAG.
getNode(ISD::BITCAST,
DL, MVT::v2i32, Trunc);
7176 return DAG.
getStore(ST->getChain(),
DL, ExtractTrunc,
7177 ST->getBasePtr(), ST->getMemOperand());
7183 MVT DestVT =
Op.getSimpleValueType();
7187 unsigned SrcAS =
N->getSrcAddressSpace();
7188 unsigned DestAS =
N->getDestAddressSpace();
7189 assert(SrcAS != DestAS &&
7190 "addrspacecast must be between different address spaces");
7193 "addrspacecast must be between different ptr sizes");
7219 assert (StoreNode &&
"Can only custom lower store nodes");
7223 EVT VT =
Value.getValueType();
7229 Subtarget->useSVEForFixedLengthVectors()))
7230 return LowerFixedLengthVectorStoreToSVE(
Op, DAG);
7242 MemVT == MVT::v4i8) {
7265 AArch64ISD::STNP, Dl, DAG.
getVTList(MVT::Other),
7266 {StoreNode->getChain(), DAG.getBitcast(MVT::v2i64, Lo),
7267 DAG.getBitcast(MVT::v2i64, Hi), StoreNode->getBasePtr()},
7271 }
else if (MemVT == MVT::i128 && StoreNode->
isVolatile()) {
7272 return LowerStore128(
Op, DAG);
7273 }
else if (MemVT == MVT::i64x8) {
7278 EVT PtrVT =
Base.getValueType();
7279 for (
unsigned i = 0; i < 8; i++) {
7280 SDValue Part = DAG.
getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
7300 bool IsStoreRelease =
7303 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
7304 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
7309 StoreNode->
getOpcode() == ISD::ATOMIC_STORE)
7314 unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
7316 std::swap(StoreValue.first, StoreValue.second);
7319 {StoreNode->getChain(), StoreValue.first, StoreValue.second,
7320 StoreNode->getBasePtr()},
7332 EVT MemVT = Load->getMemoryVT();
7333 EVT ResVT = Load->getValueType(0);
7339 switch (Load->getExtensionType()) {
7352 SDValue Chain = Load->getChain();
7353 SDValue BasePtr = Load->getBasePtr();
7355 Align Alignment = Load->getAlign();
7361 DAG.
getLoad(ScalarLoadType,
DL, Chain, BasePtr, PtrInfo, Alignment);
7373 while (CurrentEltBits < DstEltBits) {
7375 CurrentNumElts = CurrentNumElts / 2;
7381 CurrentEltBits = CurrentEltBits * 2;
7384 Res = DAG.
getNode(ExtOpcode,
DL, ExtVT, Res);
7387 if (CurrentNumElts != NumElts) {
7400 assert(LoadNode &&
"Expected custom lowering of a load node");
7409 EVT PtrVT =
Base.getValueType();
7410 for (
unsigned i = 0; i < 8; i++) {
7416 Ops.push_back(Part);
7433 Vec = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, PackedVT, Vec);
7434 Vec = DAG.
getNode(AArch64ISD::NVCAST,
DL, ContainerVT, Vec);
7448 Vec = DAG.
getNode(AArch64ISD::NVCAST,
DL, PackedVT, Vec);
7449 Vec = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VecVT, Vec);
7464 EVT MaskVT =
Mask.getValueType();
7467 const bool HasPassthru = !Passthru.
isUndef();
7471 assert(VecVT.
isVector() &&
"Input to VECTOR_COMPRESS must be vector.");
7473 if (!Subtarget->isSVEAvailable())
7480 if (MinElmts != 2 && MinElmts != 4)
7484 if (IsFixedLength) {
7494 DAG.
getUNDEF(ScalableMaskVT), Mask,
7499 DAG.
getUNDEF(ScalableVecVT), Passthru,
7503 MaskVT =
Mask.getValueType();
7539 if (IsFixedLength) {
7557 MVT VT =
Op.getSimpleValueType();
7560 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
7568 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT,
Op.getOperand(0), Neg,
7581 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
7596 MVT VT =
Op.getSimpleValueType();
7600 if (NewShiftNo == 0)
7601 return Op.getOperand(0);
7610 if (NewShiftNo == 0)
7611 return Op.getOperand(1);
7613 if (ShiftNo->getZExtValue() == NewShiftNo)
7628 EVT XScalarTy =
X.getValueType();
7633 switch (
Op.getSimpleValueType().SimpleTy) {
7642 ExpVT = MVT::nxv4i32;
7646 ExpVT = MVT::nxv2i64;
7657 AArch64SVEPredPattern::all);
7664 if (
X.getValueType() != XScalarTy)
7672 return Op.getOperand(0);
7707 const char FptrReg = 0x11;
7713 Chain,
DL, DAG.
getConstant(0x58000080u | NestReg,
DL, MVT::i32), Addr,
7714 MachinePointerInfo(TrmpAddr));
7719 Chain,
DL, DAG.
getConstant(0x580000b0u | FptrReg,
DL, MVT::i32), Addr,
7720 MachinePointerInfo(TrmpAddr, 4));
7726 MachinePointerInfo(TrmpAddr, 8));
7731 DAG.
getStore(Chain,
DL, Nest, Addr, MachinePointerInfo(TrmpAddr, 16));
7736 DAG.
getStore(Chain,
DL, FPtr, Addr, MachinePointerInfo(TrmpAddr, 24));
7750 EVT VT =
Op.getValueType();
7752 (Subtarget->hasSVEB16B16() &&
7753 Subtarget->isNonStreamingSVEorSME2Available()))
7754 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMUL_PRED);
7756 assert(Subtarget->hasBF16() &&
"Expected +bf16 for custom FMUL lowering");
7757 assert((VT == MVT::nxv4bf16 || VT == MVT::nxv8bf16 || VT == MVT::v8bf16) &&
7758 "Unexpected FMUL VT");
7761 return [&, IID](EVT VT,
auto...
Ops) {
7768 EVT SrcVT =
Value.getValueType();
7779 auto FCVT = MakeGetIntrinsic(Intrinsic::aarch64_sve_fcvt_bf16f32_v2);
7780 auto FCVTNT = MakeGetIntrinsic(Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2);
7785 MakeGetIntrinsic(UseSVEBFMLAL ? Intrinsic::aarch64_sve_bfmlalb
7786 : Intrinsic::aarch64_neon_bfmlalb);
7788 MakeGetIntrinsic(UseSVEBFMLAL ? Intrinsic::aarch64_sve_bfmlalt
7789 : Intrinsic::aarch64_neon_bfmlalt);
7791 EVT AccVT = UseSVEBFMLAL ? MVT::nxv4f32 : MVT::v4f32;
7803 LHS = Reinterpret(
LHS, MVT::nxv8bf16);
7804 RHS = Reinterpret(
RHS, MVT::nxv8bf16);
7807 SDValue BottomF32 = Reinterpret(BFMLALB(AccVT, Zero,
LHS,
RHS), MVT::nxv4f32);
7809 FCVT(MVT::nxv8bf16, DAG.
getPOISON(MVT::nxv8bf16), Pg, BottomF32);
7811 if (VT == MVT::nxv4bf16)
7812 return Reinterpret(BottomBF16, VT);
7814 SDValue TopF32 = Reinterpret(BFMLALT(AccVT, Zero,
LHS,
RHS), MVT::nxv4f32);
7815 SDValue TopBF16 = FCVTNT(MVT::nxv8bf16, BottomBF16, Pg, TopF32);
7816 return Reinterpret(TopBF16, VT);
7823 EVT VT =
Op.getValueType();
7826 assert(VT.
isVector() &&
"Scalar fma lowering should be handled by patterns");
7829 if (VT != MVT::v8f16 && VT != MVT::v4f32 && VT != MVT::v2f64)
7830 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED);
7834 ? LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMA_PRED)
7844 auto ConvertToScalableFnegMt = [&](
SDValue Op) {
7845 if (
Op.getOpcode() == ISD::FNEG)
7846 Op = LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
7850 OpA = ConvertToScalableFnegMt(OpA);
7851 OpB = ConvertToScalableFnegMt(OpB);
7852 OpC = ConvertToScalableFnegMt(OpC);
7855 DAG.
getNode(AArch64ISD::FMA_PRED,
DL, ContainerVT, Pg, OpA, OpB, OpC);
7864 switch (
Op.getOpcode()) {
7870 return LowerLOOP_DEPENDENCE_MASK(
Op, DAG);
7872 return LowerBITCAST(
Op, DAG);
7874 return LowerGlobalAddress(
Op, DAG);
7876 return LowerGlobalTLSAddress(
Op, DAG);
7878 return LowerPtrAuthGlobalAddress(
Op, DAG);
7879 case ISD::ADJUST_TRAMPOLINE:
7880 return LowerADJUST_TRAMPOLINE(
Op, DAG);
7881 case ISD::INIT_TRAMPOLINE:
7882 return LowerINIT_TRAMPOLINE(
Op, DAG);
7886 return LowerSETCC(
Op, DAG);
7888 return LowerSETCCCARRY(
Op, DAG);
7892 return LowerBR_CC(
Op, DAG);
7894 return LowerSELECT(
Op, DAG);
7896 return LowerSELECT_CC(
Op, DAG);
7898 return LowerJumpTable(
Op, DAG);
7900 return LowerBR_JT(
Op, DAG);
7902 return LowerBRIND(
Op, DAG);
7904 return LowerConstantPool(
Op, DAG);
7906 return LowerBlockAddress(
Op, DAG);
7908 return LowerVASTART(
Op, DAG);
7910 return LowerVACOPY(
Op, DAG);
7912 return LowerVAARG(
Op, DAG);
7929 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FADD_PRED);
7931 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSUB_PRED);
7933 return LowerFMUL(
Op, DAG);
7935 return LowerFMA(
Op, DAG);
7937 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FDIV_PRED);
7939 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
7941 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
7943 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
7944 case ISD::FNEARBYINT:
7945 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
7947 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
7949 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
7950 case ISD::FROUNDEVEN:
7951 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
7953 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
7955 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
7957 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
7960 return LowerFP_ROUND(
Op, DAG);
7961 case ISD::FP_EXTEND:
7963 return LowerFP_EXTEND(
Op, DAG);
7965 return LowerFRAMEADDR(
Op, DAG);
7967 return LowerSPONENTRY(
Op, DAG);
7969 return LowerRETURNADDR(
Op, DAG);
7971 return LowerADDROFRETURNADDR(
Op, DAG);
7973 return LowerCONCAT_VECTORS(
Op, DAG);
7975 return LowerINSERT_VECTOR_ELT(
Op, DAG);
7977 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
7979 return LowerBUILD_VECTOR(
Op, DAG);
7982 return LowerEXTEND_VECTOR_INREG(
Op, DAG);
7984 return LowerZERO_EXTEND_VECTOR_INREG(
Op, DAG);
7986 return LowerVECTOR_SHUFFLE(
Op, DAG);
7988 return LowerSPLAT_VECTOR(
Op, DAG);
7990 return LowerEXTRACT_SUBVECTOR(
Op, DAG);
7992 return LowerINSERT_SUBVECTOR(
Op, DAG);
7995 return LowerDIV(
Op, DAG);
8000 return LowerMinMax(
Op, DAG);
8004 return LowerVectorSRA_SRL_SHL(
Op, DAG);
8008 return LowerShiftParts(
Op, DAG);
8011 return LowerCTPOP_PARITY(
Op, DAG);
8013 return LowerFCOPYSIGN(
Op, DAG);
8015 return LowerVectorOR(
Op, DAG);
8017 return LowerXOR(
Op, DAG);
8024 return LowerINT_TO_FP(
Op, DAG);
8029 return LowerFP_TO_INT(
Op, DAG);
8032 return LowerFP_TO_INT_SAT(
Op, DAG);
8034 return LowerGET_ROUNDING(
Op, DAG);
8035 case ISD::SET_ROUNDING:
8036 return LowerSET_ROUNDING(
Op, DAG);
8037 case ISD::GET_FPMODE:
8038 return LowerGET_FPMODE(
Op, DAG);
8039 case ISD::SET_FPMODE:
8040 return LowerSET_FPMODE(
Op, DAG);
8041 case ISD::RESET_FPMODE:
8042 return LowerRESET_FPMODE(
Op, DAG);
8044 return LowerMUL(
Op, DAG);
8046 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHS_PRED);
8048 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::MULHU_PRED);
8050 return LowerINTRINSIC_W_CHAIN(
Op, DAG);
8052 return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
8054 return LowerINTRINSIC_VOID(
Op, DAG);
8055 case ISD::ATOMIC_STORE:
8057 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
8058 return LowerStore128(
Op, DAG);
8062 return LowerSTORE(
Op, DAG);
8064 return LowerMSTORE(
Op, DAG);
8066 return LowerMGATHER(
Op, DAG);
8068 return LowerMSCATTER(
Op, DAG);
8069 case ISD::VECREDUCE_SEQ_FADD:
8070 return LowerVECREDUCE_SEQ_FADD(
Op, DAG);
8071 case ISD::VECREDUCE_ADD:
8072 case ISD::VECREDUCE_AND:
8073 case ISD::VECREDUCE_OR:
8074 case ISD::VECREDUCE_XOR:
8075 case ISD::VECREDUCE_SMAX:
8076 case ISD::VECREDUCE_SMIN:
8077 case ISD::VECREDUCE_UMAX:
8078 case ISD::VECREDUCE_UMIN:
8079 case ISD::VECREDUCE_FADD:
8080 case ISD::VECREDUCE_FMAX:
8081 case ISD::VECREDUCE_FMIN:
8082 case ISD::VECREDUCE_FMAXIMUM:
8083 case ISD::VECREDUCE_FMINIMUM:
8084 return LowerVECREDUCE(
Op, DAG);
8085 case ISD::VECREDUCE_MUL:
8086 case ISD::VECREDUCE_FMUL:
8087 return LowerVECREDUCE_MUL(
Op, DAG);
8088 case ISD::ATOMIC_LOAD_AND:
8089 return LowerATOMIC_LOAD_AND(
Op, DAG);
8090 case ISD::DYNAMIC_STACKALLOC:
8091 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
8093 return LowerVSCALE(
Op, DAG);
8095 return LowerVECTOR_COMPRESS(
Op, DAG);
8099 return LowerFixedLengthVectorIntExtendToSVE(
Op, DAG);
8100 case ISD::ADDRSPACECAST:
8106 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
8107 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
8110 return LowerToPredicatedOp(
Op, DAG,
8111 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
8114 return LowerTRUNCATE(
Op, DAG);
8116 return LowerMLOAD(
Op, DAG);
8119 !Subtarget->isNeonAvailable()))
8120 return LowerFixedLengthVectorLoadToSVE(
Op, DAG);
8121 return LowerLOAD(
Op, DAG);
8125 return LowerToScalableOp(
Op, DAG);
8127 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAX_PRED);
8129 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMAXNM_PRED);
8131 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMIN_PRED);
8133 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::FMINNM_PRED);
8135 return LowerFixedLengthVectorSelectToSVE(
Op, DAG);
8137 return LowerABS(
Op, DAG);
8139 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDS_PRED);
8141 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::ABDU_PRED);
8143 return LowerAVG(
Op, DAG, AArch64ISD::HADDS_PRED);
8145 return LowerAVG(
Op, DAG, AArch64ISD::HADDU_PRED);
8147 return LowerAVG(
Op, DAG, AArch64ISD::RHADDS_PRED);
8149 return LowerAVG(
Op, DAG, AArch64ISD::RHADDU_PRED);
8151 return LowerBitreverse(
Op, DAG);
8153 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
8155 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
8157 return LowerCTTZ(
Op, DAG);
8159 return LowerVECTOR_SPLICE(
Op, DAG);
8161 return LowerVECTOR_DEINTERLEAVE(
Op, DAG);
8163 return LowerVECTOR_INTERLEAVE(
Op, DAG);
8164 case ISD::GET_ACTIVE_LANE_MASK:
8165 return LowerGET_ACTIVE_LANE_MASK(
Op, DAG);
8168 if (
Op.getValueType().isVector())
8169 return LowerVectorXRINT(
Op, DAG);
8172 case ISD::LLROUND: {
8173 assert((
Op.getOperand(0).getValueType() == MVT::f16 ||
8174 Op.getOperand(0).getValueType() == MVT::bf16) &&
8175 "Expected custom lowering of rounding operations only for f16");
8178 return DAG.
getNode(
Op.getOpcode(),
DL,
Op.getValueType(), Ext);
8184 assert((
Op.getOperand(1).getValueType() == MVT::f16 ||
8185 Op.getOperand(1).getValueType() == MVT::bf16) &&
8186 "Expected custom lowering of rounding operations only for f16");
8189 {
Op.getOperand(0),
Op.getOperand(1)});
8190 return DAG.
getNode(
Op.getOpcode(),
DL, {Op.getValueType(), MVT::Other},
8191 {Ext.getValue(1), Ext.getValue(0)});
8194 assert(
Op.getOperand(2).getValueType() == MVT::i128 &&
8195 "WRITE_REGISTER custom lowering is only for 128-bit sysregs");
8200 std::pair<SDValue, SDValue> Pair =
8205 SysRegName, Pair.first, Pair.second);
8214 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM:
8215 return LowerVECTOR_HISTOGRAM(
Op, DAG);
8216 case ISD::PARTIAL_REDUCE_SMLA:
8217 case ISD::PARTIAL_REDUCE_UMLA:
8218 case ISD::PARTIAL_REDUCE_SUMLA:
8219 case ISD::PARTIAL_REDUCE_FMLA:
8220 return LowerPARTIAL_REDUCE_MLA(
Op, DAG);
8225 return !Subtarget->useSVEForFixedLengthVectors();
8229 EVT VT,
bool OverrideNEON)
const {
8252 return Subtarget->isSVEorStreamingSVEAvailable();
8259 if (!Subtarget->useSVEForFixedLengthVectors())
8279 unsigned Opcode =
N->getOpcode();
8284 unsigned IID =
N->getConstantOperandVal(0);
8285 if (IID < Intrinsic::num_intrinsics)
8299 if (IID == Intrinsic::aarch64_neon_umull ||
8301 IID == Intrinsic::aarch64_neon_smull ||
8310 bool IsVarArg)
const {
8333 if (Subtarget->isTargetWindows()) {
8335 if (Subtarget->isWindowsArm64EC())
8341 if (!Subtarget->isTargetDarwin())
8349 if (Subtarget->isWindowsArm64EC())
8355 if (Subtarget->isWindowsArm64EC())
8379 if (Subtarget->isWindowsArm64EC())
8415 IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE;
8431 RTLIB::Libcall LC = RTLIB::SMEABI_TPIDR2_RESTORE;
8446 Chain = DAG.
getCopyToReg(Chain,
DL, AArch64::X0, TPIDR2Block, Glue);
8448 DAG.
getNode(AArch64ISD::RESTORE_ZA,
DL, MVT::Other,
8449 {Chain, TPIDR2_EL0, DAG.
getRegister(AArch64::X0, MVT::i64),
8450 RestoreRoutine, RegMask, Chain.
getValue(1)});
8466 auto &FuncInfo = *MF.
getInfo<AArch64FunctionInfo>();
8467 auto &Subtarget = DAG.
getSubtarget<AArch64Subtarget>();
8468 const AArch64RegisterInfo &
TRI = *Subtarget.getRegisterInfo();
8470 SMEAttrs SMEFnAttrs = FuncInfo.getSMEFnAttrs();
8496 if (
getTM().useNewSMEABILowering())
8506 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Chain,
8515 {Chain, DAG.getConstant(0, DL, MVT::i32), ZT0FrameIndex});
8526SDValue AArch64TargetLowering::LowerFormalArguments(
8534 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
8536 (isVarArg && Subtarget->isWindowsArm64EC());
8537 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
8547 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.
getContext());
8555 unsigned NumArgs = Ins.
size();
8557 unsigned CurArgIdx = 0;
8558 bool UseVarArgCC =
false;
8560 UseVarArgCC = isVarArg;
8564 for (
unsigned i = 0; i != NumArgs; ++i) {
8565 MVT ValVT = Ins[i].VT;
8566 if (Ins[i].isOrigArg()) {
8567 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
8568 CurArgIdx = Ins[i].getOrigArgIndex();
8575 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
8577 else if (ActualMVT == MVT::i16)
8581 Ins[i].OrigTy, CCInfo);
8582 assert(!Res &&
"Call operand has unhandled type");
8587 bool IsLocallyStreaming =
8588 !
Attrs.hasStreamingInterface() &&
Attrs.hasStreamingBody();
8592 unsigned ExtraArgLocs = 0;
8593 for (
unsigned i = 0, e = Ins.
size(); i != e; ++i) {
8594 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
8596 if (Ins[i].
Flags.isByVal()) {
8600 int Size = Ins[i].Flags.getByValSize();
8601 unsigned NumRegs = (
Size + 7) / 8;
8613 if (Ins[i].
Flags.isSwiftAsync())
8614 MF.
getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(
true);
8620 const TargetRegisterClass *RC;
8622 if (RegVT == MVT::i32)
8623 RC = &AArch64::GPR32RegClass;
8624 else if (RegVT == MVT::i64)
8625 RC = &AArch64::GPR64RegClass;
8626 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
8627 RC = &AArch64::FPR16RegClass;
8628 else if (RegVT == MVT::f32)
8629 RC = &AArch64::FPR32RegClass;
8631 RC = &AArch64::FPR64RegClass;
8633 RC = &AArch64::FPR128RegClass;
8637 RC = &AArch64::PPRRegClass;
8638 }
else if (RegVT == MVT::aarch64svcount) {
8640 RC = &AArch64::PPRRegClass;
8643 RC = &AArch64::ZPRRegClass;
8650 if (IsLocallyStreaming) {
8665 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
8684 "Indirect arguments should be scalable on most subtargets");
8706 uint32_t BEAlign = 0;
8707 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
8708 !Ins[i].Flags.isInConsecutiveRegs())
8709 BEAlign = 8 - ArgSize;
8712 MachinePointerInfo PtrInfo;
8718 unsigned ObjOffset = ArgOffset + BEAlign;
8747 Subtarget->isWindowsArm64EC()) &&
8748 "Indirect arguments should be scalable on most subtargets");
8768 Subtarget->isWindowsArm64EC()) &&
8769 "Indirect arguments should be scalable on most subtargets");
8772 unsigned NumParts = 1;
8773 if (Ins[i].
Flags.isInConsecutiveRegs()) {
8774 while (!Ins[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
8783 while (NumParts > 0) {
8784 ArgValue = DAG.
getLoad(PartLoad,
DL, Chain, Ptr, MachinePointerInfo());
8797 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
8803 if (Ins[i].isOrigArg()) {
8804 Argument *OrigArg =
F.getArg(Ins[i].getOrigArgIndex());
8806 if (!Ins[i].
Flags.isZExt()) {
8807 ArgValue = DAG.
getNode(AArch64ISD::ASSERT_ZEXT_BOOL,
DL,
8818 if (
Attrs.hasStreamingCompatibleInterface()) {
8820 DAG.
getNode(AArch64ISD::ENTRY_PSTATE_SM,
DL,
8821 DAG.
getVTList(MVT::i64, MVT::Other), {Chain});
8833 if (IsLocallyStreaming) {
8834 if (
Attrs.hasStreamingCompatibleInterface())
8843 for (
unsigned I=0;
I<InVals.
size(); ++
I) {
8855 if (!Subtarget->isTargetDarwin() || IsWin64) {
8861 saveVarArgRegisters(CCInfo, DAG,
DL, Chain);
8865 unsigned VarArgsOffset = CCInfo.getStackSize();
8868 alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
8879 SmallVectorImpl<ForwardedRegister> &Forwards =
8881 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
8885 if (!CCInfo.isAllocated(AArch64::X8)) {
8887 Forwards.
push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
8896 for (
unsigned I = 0,
E = Ins.
size();
I !=
E; ++
I) {
8898 Ins[
I].Flags.isInReg()) &&
8899 Ins[
I].Flags.isSRet()) {
8914 unsigned StackArgSize = CCInfo.getStackSize();
8916 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
8920 StackArgSize =
alignTo(StackArgSize, 16);
8934 if (Subtarget->hasCustomCallingConv())
8935 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
8937 if (
getTM().useNewSMEABILowering()) {
8940 if (
Attrs.hasZAState()) {
8944 }
else if (
Attrs.hasAgnosticZAInterface()) {
8945 RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
8948 auto *RetTy = EVT(MVT::i64).getTypeForEVT(*DAG.
getContext());
8949 TargetLowering::CallLoweringInfo CLI(DAG);
8950 CLI.setDebugLoc(
DL).setChain(Chain).setLibCallee(
8956 ISD::DYNAMIC_STACKALLOC,
DL, DAG.
getVTList(MVT::i64, MVT::Other),
8957 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
8963 Chain = DAG.
getNode(AArch64ISD::SME_STATE_ALLOC,
DL,
8974 if (
Attrs.hasZAState()) {
8981 Buffer = DAG.
getNode(AArch64ISD::ALLOCATE_ZA_BUFFER,
DL,
8982 DAG.
getVTList(MVT::i64, MVT::Other), {Chain, SVL});
8985 Buffer = DAG.
getNode(ISD::DYNAMIC_STACKALLOC,
DL,
8987 {Chain, Size, DAG.getConstant(1, DL, MVT::i64)});
8993 AArch64ISD::INIT_TPIDR2OBJ,
DL, DAG.
getVTList(MVT::Other),
8994 { Buffer.getValue(1), Buffer.getValue(0),
8996 }
else if (
Attrs.hasAgnosticZAInterface()) {
8999 DAG.
getNode(AArch64ISD::GET_SME_SAVE_SIZE,
DL,
9000 DAG.
getVTList(MVT::i64, MVT::Other), Chain);
9004 Buffer = DAG.
getNode(AArch64ISD::ALLOC_SME_SAVE_BUFFER,
DL,
9006 {Chain, BufferSize});
9010 ISD::DYNAMIC_STACKALLOC,
DL, DAG.
getVTList(MVT::i64, MVT::Other),
9011 {Chain, BufferSize, DAG.getConstant(1, DL, MVT::i64)});
9023 for (
const ISD::InputArg &
I : Ins) {
9024 if (
I.Flags.isSwiftSelf() ||
I.Flags.isSwiftError() ||
9025 I.Flags.isSwiftAsync()) {
9029 "Swift attributes can't be used with preserve_none",
9039void AArch64TargetLowering::saveVarArgRegisters(
CCState &CCInfo,
9045 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9049 Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
9055 if (Subtarget->isWindowsArm64EC()) {
9062 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
9064 if (GPRSaveSize != 0) {
9067 if (GPRSaveSize & 15)
9074 if (Subtarget->isWindowsArm64EC()) {
9087 for (
unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
9093 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
9103 if (Subtarget->hasFPARMv8() && !IsWin64) {
9105 const unsigned NumFPRArgRegs =
FPRArgRegs.size();
9108 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
9110 if (FPRSaveSize != 0) {
9115 for (
unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
9130 if (!MemOps.
empty()) {
9137SDValue AArch64TargetLowering::LowerCallResult(
9141 SDValue ThisVal,
bool RequiresSMChange)
const {
9142 DenseMap<unsigned, SDValue> CopiedRegs;
9144 for (
unsigned i = 0; i != RVLocs.
size(); ++i) {
9145 CCValAssign VA = RVLocs[i];
9149 if (i == 0 && isThisReturn) {
9151 "unexpected return calling convention register assignment");
9187 Val = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
9254 unsigned NumArgs = Outs.
size();
9255 for (
unsigned i = 0; i != NumArgs; ++i) {
9256 MVT ArgVT = Outs[i].VT;
9259 bool UseVarArgCC =
false;
9263 if (IsCalleeWin64) {
9277 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
9279 else if (ActualMVT == MVT::i16)
9287 Outs[i].OrigTy, CCInfo);
9288 assert(!Res &&
"Call operand has unhandled type");
9303bool AArch64TargetLowering::isEligibleForTailCallOptimization(
9304 const CallLoweringInfo &CLI)
const {
9310 bool IsVarArg = CLI.IsVarArg;
9314 const SelectionDAG &DAG = CLI.DAG;
9321 SMECallAttrs CallAttrs =
9334 MF.
getInfo<AArch64FunctionInfo>()->isSVECC())
9337 bool CCMatch = CallerCC == CalleeCC;
9352 if (i->hasByValAttr())
9361 if (i->hasInRegAttr()) {
9362 unsigned ArgIdx = i - CallerF.
arg_begin();
9363 if (!CLI.CB || CLI.CB->arg_size() <= ArgIdx)
9365 AttributeSet
Attrs = CLI.CB->getParamAttributes(ArgIdx);
9366 if (!
Attrs.hasAttribute(Attribute::InReg) ||
9367 !
Attrs.hasAttribute(Attribute::StructRet) || !i->hasStructRetAttr() ||
9368 CLI.CB->getArgOperand(ArgIdx) != i) {
9385 const GlobalValue *GV =
G->getGlobal();
9388 (!
TT.isOSWindows() ||
TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO()))
9408 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
9409 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
9411 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
9412 if (Subtarget->hasCustomCallingConv()) {
9413 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
9414 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
9416 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9425 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs,
C);
9429 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
9437 for (
const CCValAssign &ArgLoc : ArgLocs)
9438 if (!ArgLoc.isRegLoc())
9442 const AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9450 A.getValVT().isScalableVector() ||
9451 Subtarget->isWindowsArm64EC()) &&
9452 "Expected value to be scalable");
9472 int ClobberedFI)
const {
9475 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
9486 if (FI->getIndex() < 0) {
9488 int64_t InLastByte = InFirstByte;
9491 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
9492 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
9500bool AArch64TargetLowering::DoesCalleeRestoreStack(
CallingConv::ID CallCC,
9501 bool TailCallOpt)
const {
9512 APInt RequiredZero(SizeInBits, 0xFE);
9514 bool ZExtBool = (Bits.Zero & RequiredZero) == RequiredZero;
9518void AArch64TargetLowering::AdjustInstrPostInstrSelection(
MachineInstr &
MI,
9524 if (
MI.getOpcode() == AArch64::MSRpstatesvcrImm1 ||
9525 MI.getOpcode() == AArch64::MSRpstatePseudo) {
9526 for (
unsigned I =
MI.getNumOperands() - 1;
I > 0; --
I)
9527 if (MachineOperand &MO =
MI.getOperand(
I);
9528 MO.isReg() && MO.isImplicit() && MO.isDef() &&
9529 (AArch64::GPR32RegClass.contains(MO.getReg()) ||
9530 AArch64::GPR64RegClass.contains(MO.getReg())))
9531 MI.removeOperand(
I);
9535 if (
MI.getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
9536 MI.getOperand(0).getImm() == AArch64SVCR::SVCRSMZA) {
9551 const MachineFunction &MF = *
MI.getMF();
9552 if (MF.
getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() &&
9553 (
MI.getOpcode() == AArch64::ADDXri ||
9554 MI.getOpcode() == AArch64::SUBXri)) {
9555 const MachineOperand &MO =
MI.getOperand(1);
9564 unsigned Condition,
bool InsertVectorLengthCheck)
const {
9572 Ops.push_back(InGlue);
9573 return DAG.
getNode(AArch64ISD::CHECK_MATCHING_VL,
DL,
9577 if (InsertVectorLengthCheck &&
Enable) {
9580 SDValue CheckVL = GetCheckVL(Chain, InGlue);
9593 assert(PStateReg.
isValid() &&
"PStateSM Register is invalid");
9600 Opcode =
Enable ? AArch64ISD::COND_SMSTART : AArch64ISD::COND_SMSTOP;
9601 Ops.push_back(ConditionOp);
9602 Ops.push_back(PStateSM);
9604 Opcode =
Enable ? AArch64ISD::SMSTART : AArch64ISD::SMSTOP;
9606 Ops.push_back(RegMask);
9609 Ops.push_back(InGlue);
9614 if (!InsertVectorLengthCheck ||
Enable)
9641 if (Flags.isZExt() || Flags.isSExt())
9648 Arg->
isAssert() ||
Op == AArch64ISD::ASSERT_ZEXT_BOOL) {
9660 int FI = FINode->getIndex();
9678AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
9680 SelectionDAG &DAG = CLI.DAG;
9687 bool &IsTailCall = CLI.IsTailCall;
9689 bool IsVarArg = CLI.IsVarArg;
9690 const CallBase *CB = CLI.CB;
9693 MachineFunction::CallSiteInfo CSInfo;
9694 bool IsThisReturn =
false;
9696 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
9698 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType;
9699 bool IsSibCall =
false;
9700 bool GuardWithBTI =
false;
9702 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
9703 !Subtarget->noBTIAtReturnTwice()) {
9709 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.
getContext());
9712 unsigned NumArgs = Outs.
size();
9714 for (
unsigned i = 0; i != NumArgs; ++i) {
9715 if (Outs[i].
Flags.isVarArg() && Outs[i].VT.isScalableVector())
9717 "currently not supported");
9728 RetCCInfo.AnalyzeCallResult(Ins, RetCC);
9732 CSInfo = MachineFunction::CallSiteInfo(*CB);
9737 auto HasSVERegLoc = [](CCValAssign &Loc) {
9738 if (!Loc.isRegLoc())
9740 return AArch64::ZPRRegClass.contains(Loc.getLocReg()) ||
9741 AArch64::PPRRegClass.contains(Loc.getLocReg());
9743 if (
any_of(RVLocs, HasSVERegLoc) ||
any_of(ArgLocs, HasSVERegLoc))
9748 SMECallAttrs CallAttrs =
9751 std::optional<unsigned> ZAMarkerNode;
9754 if (UseNewSMEABILowering) {
9757 ZAMarkerNode = AArch64ISD::REQUIRES_ZA_SAVE;
9759 ZAMarkerNode = AArch64ISD::REQUIRES_ZT0_SAVE;
9762 ZAMarkerNode = AArch64ISD::INOUT_ZA_USE;
9767 IsTailCall = isEligibleForTailCallOptimization(CLI);
9771 if (!ZAMarkerNode && !TailCallOpt && IsTailCall &&
9779 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
9781 "site marked musttail");
9799 if (IsTailCall && !IsSibCall) {
9804 NumBytes =
alignTo(NumBytes, 16);
9809 FPDiff = NumReusableBytes - NumBytes;
9813 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (
unsigned)-FPDiff)
9821 assert(FPDiff % 16 == 0 &&
"unaligned stack on tail call");
9824 auto DescribeCallsite =
9825 [&](OptimizationRemarkAnalysis &
R) -> OptimizationRemarkAnalysis & {
9828 R <<
ore::NV(
"Callee", ES->getSymbol());
9829 else if (CLI.CB && CLI.CB->getCalledFunction())
9830 R <<
ore::NV(
"Callee", CLI.CB->getCalledFunction()->getName());
9832 R <<
"unknown callee";
9837 bool RequiresLazySave = !UseNewSMEABILowering && CallAttrs.
requiresLazySave();
9838 bool RequiresSaveAllZA =
9840 if (RequiresLazySave) {
9851 auto R = CLI.CB ? OptimizationRemarkAnalysis(
"sme",
"SMELazySaveZA",
9853 : OptimizationRemarkAnalysis(
"sme",
"SMELazySaveZA",
9855 return DescribeCallsite(R) <<
" sets up a lazy save for ZA";
9857 }
else if (RequiresSaveAllZA) {
9859 "Cannot share state that may not exist");
9865 if (RequiresSMChange) {
9868 auto R = CLI.CB ? OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9870 : OptimizationRemarkAnalysis(
"sme",
"SMETransition",
9872 DescribeCallsite(R) <<
" requires a streaming mode transition";
9879 bool ShouldPreserveZT0 =
9884 if (ShouldPreserveZT0) {
9888 {Chain, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
9895 assert((!DisableZA || !RequiresLazySave) &&
9896 "Lazy-save should have PSTATE.SM=1 on entry to the function");
9900 AArch64ISD::SMSTOP,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Chain,
9905 assert((!IsSibCall || !ZAMarkerNode) &&
"ZA markers require CALLSEQ_START");
9915 {Chain, Chain.getValue(1)});
9923 SmallSet<unsigned, 8> RegsUsed;
9927 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
9929 for (
const auto &
F : Forwards) {
9936 unsigned ExtraArgLocs = 0;
9937 for (
unsigned i = 0, e = Outs.
size(); i != e; ++i) {
9938 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
9940 ISD::ArgFlagsTy
Flags = Outs[i].Flags;
9955 if (Outs[i].ArgVT == MVT::i1) {
9977 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
9993 assert((isScalable || Subtarget->isWindowsArm64EC()) &&
9994 "Indirect arguments should be scalable on most subtargets");
9997 TypeSize PartSize = StoreSize;
9998 unsigned NumParts = 1;
9999 if (Outs[i].
Flags.isInConsecutiveRegs()) {
10000 while (!Outs[i + NumParts - 1].
Flags.isInConsecutiveRegsLast())
10002 StoreSize *= NumParts;
10011 bool IsPred = VA.
getValVT() == MVT::aarch64svcount ||
10029 if (NumParts > 0) {
10045 if (i == 0 &&
Flags.isReturned() && !
Flags.isSwiftSelf() &&
10046 Outs[0].VT == MVT::i64) {
10048 "unexpected calling convention register assignment");
10050 "unexpected use of 'returned'");
10051 IsThisReturn =
true;
10060 [=](
const std::pair<unsigned, SDValue> &Elt) {
10069 [&VA](MachineFunction::ArgRegPair ArgReg) {
10070 return ArgReg.Reg == VA.getLocReg();
10077 Arg = DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10082 if (
Options.EmitCallSiteInfo)
10089 MachinePointerInfo DstInfo;
10093 uint32_t BEAlign = 0;
10099 OpSize =
Flags.isByVal() ?
Flags.getByValSize() * 8
10101 OpSize = (OpSize + 7) / 8;
10102 if (!Subtarget->isLittleEndian() && !
Flags.isByVal() &&
10103 !
Flags.isInConsecutiveRegs()) {
10105 BEAlign = 8 - OpSize;
10108 int32_t
Offset = LocMemOffset + BEAlign;
10125 Chain = addTokenForArgument(Chain, DAG, MF.
getFrameInfo(), FI);
10133 if (Outs[i].
Flags.isByVal()) {
10137 Chain,
DL, DstAddr, Arg, SizeNode,
10138 Outs[i].
Flags.getNonZeroByValAlign(),
10140 nullptr, std::nullopt, DstInfo, MachinePointerInfo());
10157 if (IsVarArg && Subtarget->isWindowsArm64EC() &&
10158 !(CLI.CB && CLI.CB->isMustTailCall())) {
10176 if (!MemOpChains.
empty())
10180 if (RequiresSMChange) {
10181 bool InsertVectorLengthCheck =
10191 for (
auto &RegToPass : RegsToPass) {
10193 RegToPass.second, InGlue);
10200 const GlobalValue *CalledGlobal =
nullptr;
10201 unsigned OpFlags = 0;
10203 CalledGlobal =
G->getGlobal();
10204 OpFlags = Subtarget->classifyGlobalFunctionReference(CalledGlobal,
10210 const GlobalValue *GV =
G->getGlobal();
10215 Subtarget->isTargetMachO()) ||
10217 const char *Sym = S->getSymbol();
10230 if (IsTailCall && !IsSibCall) {
10235 unsigned Opc = IsTailCall ? AArch64ISD::TC_RETURN : AArch64ISD::CALL;
10237 std::vector<SDValue>
Ops;
10238 Ops.push_back(Chain);
10239 Ops.push_back(Callee);
10246 "tail calls cannot be marked with clang.arc.attachedcall");
10247 Opc = AArch64ISD::CALL_RVMARKER;
10253 Ops.insert(
Ops.begin() + 1, GA);
10260 Ops.insert(
Ops.begin() + 2, DoEmitMarker);
10262 Opc = AArch64ISD::CALL_ARM64EC_TO_X64;
10263 }
else if (GuardWithBTI) {
10264 Opc = AArch64ISD::CALL_BTI;
10275 const uint64_t
Key = CLI.PAI->Key;
10277 "Invalid auth call key");
10281 std::tie(IntDisc, AddrDisc) =
10284 if (
Opc == AArch64ISD::CALL_RVMARKER)
10285 Opc = AArch64ISD::AUTH_CALL_RVMARKER;
10287 Opc = IsTailCall ? AArch64ISD::AUTH_TC_RETURN : AArch64ISD::AUTH_CALL;
10289 Ops.push_back(IntDisc);
10290 Ops.push_back(AddrDisc);
10295 for (
auto &RegToPass : RegsToPass)
10297 RegToPass.second.getValueType()));
10300 const uint32_t *
Mask;
10301 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10302 if (IsThisReturn) {
10304 Mask =
TRI->getThisReturnPreservedMask(MF, CallConv);
10306 IsThisReturn =
false;
10307 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10310 Mask =
TRI->getCallPreservedMask(MF, CallConv);
10312 if (Subtarget->hasCustomCallingConv())
10313 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
10315 if (
TRI->isAnyArgRegReserved(MF))
10316 TRI->emitReservedArgRegCallError(MF);
10318 assert(Mask &&
"Missing call preserved mask for calling convention");
10322 Ops.push_back(InGlue);
10324 if (CLI.DeactivationSymbol)
10337 if (CalledGlobal &&
10351 if (CalledGlobal &&
10355 uint64_t CalleePopBytes =
10356 DoesCalleeRestoreStack(CallConv, TailCallOpt) ?
alignTo(NumBytes, 16) : 0;
10364 Chain, InGlue, CallConv, IsVarArg, RVLocs,
DL, DAG, InVals, IsThisReturn,
10365 IsThisReturn ? OutVals[0] :
SDValue(), RequiresSMChange);
10370 if (RequiresSMChange) {
10376 if (!UseNewSMEABILowering &&
10380 AArch64ISD::SMSTART,
DL, DAG.
getVTList(MVT::Other, MVT::Glue), Result,
10383 if (ShouldPreserveZT0)
10386 {Result, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
10388 if (RequiresLazySave) {
10390 }
else if (RequiresSaveAllZA) {
10395 if (RequiresSMChange || RequiresLazySave || ShouldPreserveZT0 ||
10396 RequiresSaveAllZA) {
10397 for (
unsigned I = 0;
I < InVals.
size(); ++
I) {
10412 for (
const ISD::OutputArg &O : Outs) {
10413 if (
O.Flags.isSwiftSelf() ||
O.Flags.isSwiftError() ||
10414 O.Flags.isSwiftAsync()) {
10418 "Swift attributes can't be used with preserve_none",
10419 DL.getDebugLoc()));
10428bool AArch64TargetLowering::CanLowerReturn(
10431 const Type *RetTy)
const {
10434 CCState CCInfo(CallConv, isVarArg, MF, RVLocs,
Context);
10445 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10449 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.
getContext());
10455 SmallSet<unsigned, 4> RegsUsed;
10456 for (
unsigned i = 0, realRVLocIdx = 0; i != RVLocs.
size();
10457 ++i, ++realRVLocIdx) {
10458 CCValAssign &VA = RVLocs[i];
10460 SDValue Arg = OutVals[realRVLocIdx];
10466 if (Outs[i].ArgVT == MVT::i1) {
10482 assert(VA.
getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits");
10491 llvm::find_if(RetVals, [=](
const std::pair<unsigned, SDValue> &Elt) {
10501 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10517 for (
auto &RetVal : RetVals) {
10521 DAG.
getNode(AArch64ISD::COALESCER_BARRIER,
DL,
10522 DAG.
getVTList(RetVal.second.getValueType(), MVT::Glue),
10524 Chain = DAG.
getCopyToReg(Chain,
DL, RetVal.first, RetVal.second, Glue);
10527 DAG.
getRegister(RetVal.first, RetVal.second.getValueType()));
10538 unsigned RetValReg = AArch64::X0;
10540 RetValReg = AArch64::X8;
10551 if (AArch64::GPR64RegClass.
contains(*
I))
10553 else if (AArch64::FPR64RegClass.
contains(*
I))
10564 RetOps.push_back(Glue);
10575 MachinePointerInfo());
10576 RetOps.insert(RetOps.begin() + 1, Arm64ECRetDest);
10578 return DAG.
getNode(AArch64ISD::TC_RETURN,
DL, MVT::Other, RetOps);
10581 return DAG.
getNode(AArch64ISD::RET_GLUE,
DL, MVT::Other, RetOps);
10590 unsigned Flag)
const {
10592 N->getOffset(), Flag);
10597 unsigned Flag)
const {
10603 unsigned Flag)
const {
10605 N->getOffset(), Flag);
10610 unsigned Flag)
const {
10616 unsigned Flag)
const {
10621template <
class NodeTy>
10623 unsigned Flags)
const {
10631 .
getInfo<AArch64FunctionInfo>()
10632 ->hasELFSignedGOT())
10635 return DAG.
getNode(AArch64ISD::LOADgot,
DL, Ty, GotAddr);
10639template <
class NodeTy>
10641 unsigned Flags)
const {
10647 AArch64ISD::WrapperLarge,
DL, Ty,
10655template <
class NodeTy>
10657 unsigned Flags)
const {
10665 return DAG.
getNode(AArch64ISD::ADDlow,
DL, Ty, ADRP,
Lo);
10669template <
class NodeTy>
10671 unsigned Flags)
const {
10675 SDValue Sym = getTargetNode(
N, Ty, DAG, Flags);
10676 return DAG.
getNode(AArch64ISD::ADR,
DL, Ty, Sym);
10682 const GlobalValue *GV = GN->
getGlobal();
10683 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV,
getTargetMachine());
10687 "unexpected offset in global node");
10692 return getGOT(GN, DAG, OpFlags);
10698 Result = getAddrLarge(GN, DAG, OpFlags);
10700 Result = getAddrTiny(GN, DAG, OpFlags);
10702 Result = getAddr(GN, DAG, OpFlags);
10741AArch64TargetLowering::LowerDarwinGlobalTLSAddress(
SDValue Op,
10743 assert(Subtarget->isTargetDarwin() &&
10744 "This function expects a Darwin target");
10759 PtrMemVT,
DL, Chain, DescAddr,
10774 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
10775 const uint32_t *
Mask =
TRI->getTLSCallPreservedMask();
10776 if (Subtarget->hasCustomCallingConv())
10784 unsigned Opcode = AArch64ISD::CALL;
10786 Ops.push_back(Chain);
10787 Ops.push_back(FuncTLVGet);
10791 Opcode = AArch64ISD::AUTH_CALL;
10913SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(
SDValue SymAddr,
10918 auto *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
10922 SDVTList NodeTys = DAG.
getVTList(MVT::Other, MVT::Glue);
10925 bool RequiresSMChange = TLSCallAttrs.requiresSMChange();
10927 auto ChainAndGlue = [](
SDValue Chain) -> std::pair<SDValue, SDValue> {
10928 return {Chain, Chain.
getValue(1)};
10931 if (RequiresSMChange)
10932 std::tie(Chain, Glue) =
10938 ? AArch64ISD::TLSDESC_AUTH_CALLSEQ
10939 : AArch64ISD::TLSDESC_CALLSEQ;
10941 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
10944 if (TLSCallAttrs.requiresLazySave())
10945 std::tie(Chain, Glue) = ChainAndGlue(DAG.
getNode(
10946 AArch64ISD::REQUIRES_ZA_SAVE,
DL, NodeTys, {Chain, Chain.getValue(1)}));
10948 if (RequiresSMChange)
10949 std::tie(Chain, Glue) =
10957AArch64TargetLowering::LowerELFGlobalTLSAddress(
SDValue Op,
10959 assert(Subtarget->isTargetELF() &&
"This function expects an ELF target");
10962 AArch64FunctionInfo *MFI =
10977 "in local exec TLS model");
10988 const GlobalValue *GV = GA->
getGlobal();
10993 return LowerELFTLSLocalExec(GV, ThreadBase,
DL, DAG);
10996 TPOff = DAG.
getNode(AArch64ISD::LOADgot,
DL, PtrVT, TPOff);
11014 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
11021 GV,
DL, MVT::i64, 0,
11038 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
11046AArch64TargetLowering::LowerWindowsGlobalTLSAddress(
SDValue Op,
11048 assert(Subtarget->isTargetWindows() &&
"Windows specific TLS lowering");
11060 TLSArray = DAG.
getLoad(PtrVT,
DL, Chain, TLSArray, MachinePointerInfo());
11073 DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, ADRP, TLSIndexLo);
11074 TLSIndex = DAG.
getLoad(MVT::i32,
DL, Chain, TLSIndex, MachinePointerInfo());
11084 MachinePointerInfo());
11085 Chain =
TLS.getValue(1);
11088 const GlobalValue *GV = GA->
getGlobal();
11100 Addr = DAG.
getNode(AArch64ISD::ADDlow,
DL, PtrVT, Addr, TGALo);
11110 if (Subtarget->isTargetDarwin())
11111 return LowerDarwinGlobalTLSAddress(
Op, DAG);
11112 if (Subtarget->isTargetELF())
11113 return LowerELFGlobalTLSAddress(
Op, DAG);
11114 if (Subtarget->isTargetWindows())
11115 return LowerWindowsGlobalTLSAddress(
Op, DAG);
11153 assert(TGN->getGlobal()->hasExternalWeakLinkage());
11159 if (TGN->getOffset() != 0)
11161 "unsupported non-zero offset in weak ptrauth global reference");
11168 {TGA, Key, Discriminator}),
11173AArch64TargetLowering::LowerPtrAuthGlobalAddress(
SDValue Op,
11176 uint64_t KeyC =
Op.getConstantOperandVal(1);
11177 SDValue AddrDiscriminator =
Op.getOperand(2);
11178 uint64_t DiscriminatorC =
Op.getConstantOperandVal(3);
11179 EVT VT =
Op.getValueType();
11189 "constant discriminator in ptrauth global out of range [0, 0xffff]");
11192 if (!Subtarget->isTargetELF() && !Subtarget->isTargetMachO())
11195 int64_t PtrOffsetC = 0;
11201 const GlobalValue *PtrGV = PtrN->getGlobal();
11204 const unsigned OpFlags =
11208 "unsupported non-GOT op flags on ptrauth global reference");
11211 PtrOffsetC += PtrN->getOffset();
11214 assert(PtrN->getTargetFlags() == 0 &&
11215 "unsupported target flags on ptrauth global");
11220 ? AddrDiscriminator
11224 if (!NeedsGOTLoad) {
11228 {TPtr, Key, TAddrDiscriminator, Discriminator}),
11237 {TPtr, Key, TAddrDiscriminator, Discriminator}),
11274 bool ProduceNonFlagSettingCondBr =
11280 if (
LHS.getValueType() == MVT::f128) {
11285 if (!
RHS.getNode()) {
11305 OFCC = getInvertedCondCode(OFCC);
11308 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11312 if (
LHS.getValueType().isInteger()) {
11314 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
11319 if (RHSC && RHSC->
getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
11329 uint64_t
Mask =
LHS.getConstantOperandVal(1);
11330 return DAG.
getNode(AArch64ISD::TBZ,
DL, MVT::Other, Chain,
Test,
11335 return DAG.
getNode(AArch64ISD::CBZ,
DL, MVT::Other, Chain,
LHS, Dest);
11345 uint64_t
Mask =
LHS.getConstantOperandVal(1);
11346 return DAG.
getNode(AArch64ISD::TBNZ,
DL, MVT::Other, Chain,
Test,
11351 return DAG.
getNode(AArch64ISD::CBNZ,
DL, MVT::Other, Chain,
LHS, Dest);
11356 uint64_t SignBitPos;
11358 return DAG.
getNode(AArch64ISD::TBNZ,
DL, MVT::Other, Chain,
LHS,
11363 LHS.getOpcode() !=
ISD::AND && ProduceNonFlagSettingCondBr) {
11367 uint64_t SignBitPos;
11369 return DAG.
getNode(AArch64ISD::TBZ,
DL, MVT::Other, Chain,
LHS,
11375 if (Subtarget->hasCMPBR() &&
11377 ProduceNonFlagSettingCondBr) {
11386 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CCVal,
11390 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::bf16 ||
11391 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11400 DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, Chain, Dest, CC1Val, Cmp);
11403 return DAG.
getNode(AArch64ISD::BRCOND,
DL, MVT::Other, BR1, Dest, CC2Val,
11412 if (!Subtarget->isNeonAvailable() &&
11413 !Subtarget->useSVEForFixedLengthVectors())
11416 EVT VT =
Op.getValueType();
11444 if (!VT.
isVector() && !Subtarget->isNeonAvailable() &&
11445 Subtarget->isSVEorStreamingSVEAvailable()) {
11446 if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64 && VT != MVT::bf16)
11461 auto BitCast = [
this](EVT VT,
SDValue Op, SelectionDAG &DAG) {
11463 return getSVESafeBitCast(VT,
Op, DAG);
11470 auto SetVecVal = [&](
int Idx = -1) {
11477 VecVal1 = BitCast(VecVT, In1, DAG);
11478 VecVal2 = BitCast(VecVT, In2, DAG);
11484 }
else if (VT == MVT::f64) {
11485 VecVT = MVT::v2i64;
11486 SetVecVal(AArch64::dsub);
11487 }
else if (VT == MVT::f32) {
11488 VecVT = MVT::v4i32;
11489 SetVecVal(AArch64::ssub);
11490 }
else if (VT == MVT::f16 || VT == MVT::bf16) {
11491 VecVT = MVT::v8i16;
11492 SetVecVal(AArch64::hsub);
11503 if (VT == MVT::f64 || VT == MVT::v2f64) {
11505 SignMaskV = DAG.
getNode(ISD::BITCAST,
DL, MVT::v2f64, SignMaskV);
11506 SignMaskV = DAG.
getNode(ISD::FNEG,
DL, MVT::v2f64, SignMaskV);
11507 SignMaskV = DAG.
getNode(ISD::BITCAST,
DL, MVT::v2i64, SignMaskV);
11511 DAG.
getNode(AArch64ISD::BSP,
DL, VecVT, SignMaskV, VecVal1, VecVal2);
11512 if (VT == MVT::f16 || VT == MVT::bf16)
11514 if (VT == MVT::f32)
11516 if (VT == MVT::f64)
11519 return BitCast(VT, BSP, DAG);
11525 Attribute::NoImplicitFloat))
11528 EVT VT =
Op.getValueType();
11531 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
11539 if (VT == MVT::i32 && IsParity)
11542 if (Subtarget->isSVEorStreamingSVEAvailable()) {
11543 if (VT == MVT::i32 || VT == MVT::i64) {
11544 EVT ContainerVT = VT == MVT::i32 ? MVT::nxv4i32 : MVT::nxv2i64;
11556 if (VT == MVT::i128) {
11557 Val = DAG.
getNode(ISD::BITCAST,
DL, MVT::v2i64, Val);
11561 Val = DAG.
getNode(ISD::VECREDUCE_ADD,
DL, MVT::i64, Val);
11569 if (!Subtarget->isNeonAvailable())
11580 if (VT == MVT::i32 || VT == MVT::i64) {
11581 if (VT == MVT::i32)
11583 Val = DAG.
getNode(ISD::BITCAST,
DL, MVT::v8i8, Val);
11587 AddV = DAG.
getNode(AArch64ISD::NVCAST,
DL,
11588 VT == MVT::i32 ? MVT::v2i32 : MVT::v1i64, AddV);
11594 }
else if (VT == MVT::i128) {
11595 Val = DAG.
getNode(ISD::BITCAST,
DL, MVT::v16i8, Val);
11600 DAG.
getNode(AArch64ISD::NVCAST,
DL, MVT::v2i64, AddV),
11608 assert(!IsParity &&
"ISD::PARITY of vector types not supported");
11610 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
11611 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
11612 "Unexpected type for custom ctpop lowering");
11620 EVT DT = VT == MVT::v2i64 ? MVT::v4i32 : VT;
11624 if (VT == MVT::v2i64) {
11625 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11626 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, VT, Val);
11627 }
else if (VT == MVT::v2i32) {
11628 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11629 }
else if (VT == MVT::v4i32) {
11630 Val = DAG.
getNode(AArch64ISD::UDOT,
DL, DT, Zeros, Ones, Val);
11639 unsigned EltSize = 8;
11645 Val = DAG.
getNode(AArch64ISD::UADDLP,
DL, WidenVT, Val);
11652 EVT VT =
Op.getValueType();
11655 VT, Subtarget->useSVEForFixedLengthVectors()));
11665 EVT VT =
Op.getValueType();
11667 unsigned Opcode =
Op.getOpcode();
11694 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMAX_PRED);
11696 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SMIN_PRED);
11698 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMAX_PRED);
11700 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::UMIN_PRED);
11712 EVT VT =
Op.getValueType();
11716 VT, Subtarget->useSVEForFixedLengthVectors()))
11717 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);
11729 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11736 REVB = DAG.
getNode(AArch64ISD::REV32,
DL, VST,
Op.getOperand(0));
11743 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11750 REVB = DAG.
getNode(AArch64ISD::REV64,
DL, VST,
Op.getOperand(0));
11756 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT,
11763 SmallVector<std::pair<SDValue, SDValue>, 16> &WorkList) {
11769 N =
N->getOperand(0);
11773 WorkList.push_back(std::make_pair(
N->getOperand(0),
N->getOperand(1)));
11779 if (
N->getOpcode() !=
ISD::OR || !
N->hasOneUse())
11793 EVT VT =
N->getValueType(0);
11803 unsigned NumXors = 0;
11808 std::tie(XOR0, XOR1) = WorkList[0];
11811 for (
unsigned I = 1;
I < WorkList.
size();
I++) {
11812 std::tie(XOR0, XOR1) = WorkList[
I];
11814 Cmp = DAG.
getNode(LogicOp,
DL, VT, Cmp, CmpChain);
11826 if (
Op.getValueType().isVector())
11827 return LowerVSETCC(
Op, DAG);
11829 bool IsStrict =
Op->isStrictFPOpcode();
11831 unsigned OpNo = IsStrict ? 1 : 0;
11834 Chain =
Op.getOperand(0);
11841 EVT VT =
Op.getValueType();
11847 if (
LHS.getValueType() == MVT::f128) {
11852 if (!
RHS.getNode()) {
11853 assert(
LHS.getValueType() ==
Op.getValueType() &&
11854 "Unexpected setcc expansion!");
11859 if (
LHS.getValueType().isInteger()) {
11875 SDValue Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CCVal, Cmp);
11880 assert(
LHS.getValueType() == MVT::bf16 ||
LHS.getValueType() == MVT::f16 ||
11881 LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
11902 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, FVal, TVal, CC1Val, Cmp);
11912 DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
11915 Res = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
11925 EVT VT =
LHS.getValueType();
11926 if (VT != MVT::i32 && VT != MVT::i64)
11936 EVT OpVT =
Op.getValueType();
11945 return DAG.
getNode(AArch64ISD::CSEL,
DL, OpVT, FVal, TVal, CCVal,
11954 "function only supposed to emit natural comparisons");
11963 if (!
LHS.getValueType().isVector()) {
11968 DAG.
getUNDEF(VecVT), Fcmeq, Zero);
12002 assert(!
LHS.getValueType().isVector());
12003 assert(!
RHS.getValueType().isVector());
12007 if (!CTVal || !CFVal)
12021 bool OneNaN =
false;
12037 bool ShouldInvert =
false;
12046 if (!Cmp2 && !ShouldInvert)
12064SDValue AArch64TargetLowering::LowerSELECT_CC(
12070 if (
LHS.getValueType() == MVT::f128) {
12075 if (!
RHS.getNode()) {
12082 if ((
LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
12083 LHS.getValueType() == MVT::bf16) {
12089 if (
LHS.getValueType().isInteger()) {
12091 (
LHS.getValueType() == MVT::i32 ||
LHS.getValueType() == MVT::i64));
12103 LHS.getValueType() ==
RHS.getValueType()) {
12104 EVT VT =
LHS.getValueType();
12110 Shift = DAG.
getNOT(
DL, Shift, VT);
12124 uint64_t SignBitPos;
12126 EVT TestVT =
LHS.getValueType();
12130 LHS, SignBitConst);
12158 unsigned Opcode = AArch64ISD::CSEL;
12166 }
else if (CTVal && CFVal && CTVal->
isOne() && CFVal->
isZero()) {
12186 }
else if (CTVal && CFVal) {
12194 if (TrueVal == ~FalseVal) {
12195 Opcode = AArch64ISD::CSINV;
12196 }
else if (FalseVal > std::numeric_limits<int64_t>::min() &&
12197 TrueVal == -FalseVal) {
12198 Opcode = AArch64ISD::CSNEG;
12208 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
12209 Opcode = AArch64ISD::CSINC;
12211 if (TrueVal32 > FalseVal32) {
12217 const uint64_t TrueVal64 =
TrueVal;
12218 const uint64_t FalseVal64 =
FalseVal;
12220 if ((TrueVal64 == FalseVal64 + 1) || (TrueVal64 + 1 == FalseVal64)) {
12221 Opcode = AArch64ISD::CSINC;
12223 if (TrueVal > FalseVal) {
12236 if (Opcode != AArch64ISD::CSEL) {
12249 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->
isOne() &&
12254 if (CTVal && CTVal == RHSVal && AArch64CC ==
AArch64CC::EQ)
12256 else if (CFVal && CFVal == RHSVal && AArch64CC ==
AArch64CC::NE)
12258 }
else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->
isOne()) {
12259 assert (CTVal && CFVal &&
"Expected constant operands for CSNEG.");
12264 Opcode = AArch64ISD::CSINV;
12273 return DAG.
getNode(Opcode,
DL, VT, TVal, FVal, CCVal, Cmp);
12277 assert(
LHS.getValueType() == MVT::f16 ||
LHS.getValueType() == MVT::f32 ||
12278 LHS.getValueType() == MVT::f64);
12285 if (Subtarget->isNeonAvailable() &&
all_of(
Users, [](
const SDNode *U) {
12286 switch (
U->getOpcode()) {
12291 case AArch64ISD::DUP:
12309 if (
Flags.hasNoSignedZeros()) {
12313 if (RHSVal && RHSVal->
isZero()) {
12321 CFVal && CFVal->
isZero() &&
12329 SDValue CS1 = DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, FVal, CC1Val, Cmp);
12335 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal, CS1, CC2Val, Cmp);
12344 EVT Ty =
Op.getValueType();
12345 auto Idx =
Op.getConstantOperandAPInt(2);
12346 int64_t IdxVal = Idx.getSExtValue();
12348 "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
12357 std::optional<unsigned> PredPattern;
12369 return DAG.
getNode(AArch64ISD::SPLICE,
DL, Ty, Pred,
Op.getOperand(0),
12375 if (IdxVal >= 0 && (IdxVal *
BlockSize / 8) < 256)
12388 SDNodeFlags
Flags =
Op->getFlags();
12390 return LowerSELECT_CC(CC,
LHS,
RHS, TVal, FVal,
Op->users(), Flags,
DL, DAG);
12400 EVT Ty =
Op.getValueType();
12401 if (Ty == MVT::aarch64svcount) {
12402 TVal = DAG.
getNode(ISD::BITCAST,
DL, MVT::nxv16i1, TVal);
12403 FVal = DAG.
getNode(ISD::BITCAST,
DL, MVT::nxv16i1, FVal);
12406 return DAG.
getNode(ISD::BITCAST,
DL, Ty, Sel);
12438 return DAG.
getNode(AArch64ISD::CSEL,
DL,
Op.getValueType(), TVal, FVal,
12457 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12465 Op->getFlags(),
DL, DAG);
12467 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
12482 !Subtarget->isTargetMachO())
12483 return getAddrLarge(JT, DAG);
12485 return getAddrTiny(JT, DAG);
12486 return getAddr(JT, DAG);
12499 AFI->setJumpTableEntryInfo(JTI, 4,
nullptr);
12504 "aarch64-jump-table-hardening")) {
12506 if (Subtarget->isTargetMachO()) {
12511 assert(Subtarget->isTargetELF() &&
12512 "jump table hardening only supported on MachO/ELF");
12543 std::optional<uint16_t> BADisc =
12544 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(MF.
getFunction());
12555 {Dest,
Key, Disc, AddrDisc, Chain});
12565 if (Subtarget->isTargetMachO()) {
12566 return getGOT(CP, DAG);
12569 return getAddrLarge(CP, DAG);
12571 return getAddrTiny(CP, DAG);
12573 return getAddr(CP, DAG);
12581 if (std::optional<uint16_t> BADisc =
12582 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(
12596 {TargetBA,
Key, AddrDisc, Disc});
12604 return getAddrLarge(BAN, DAG);
12606 return getAddrTiny(BAN, DAG);
12608 return getAddr(BAN, DAG);
12613 AArch64FunctionInfo *FuncInfo =
12622 MachinePointerInfo(SV));
12628 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12632 if (Subtarget->isWindowsArm64EC()) {
12638 uint64_t StackOffset;
12653 MachinePointerInfo(SV));
12661 AArch64FunctionInfo *FuncInfo = MF.
getInfo<AArch64FunctionInfo>();
12662 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12677 MachinePointerInfo(SV),
Align(PtrSize)));
12694 MachinePointerInfo(SV,
Offset),
12712 MachinePointerInfo(SV,
Offset),
12722 GROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12730 VROffsAddr, MachinePointerInfo(SV,
Offset),
Align(4)));
12740 if (Subtarget->isCallingConvWin64(
F.getCallingConv(),
F.isVarArg()))
12741 return LowerWin64_VASTART(
Op, DAG);
12742 else if (Subtarget->isTargetDarwin())
12743 return LowerDarwin_VASTART(
Op, DAG);
12745 return LowerAAPCS_VASTART(
Op, DAG);
12753 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
12754 unsigned VaListSize =
12755 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
12757 : Subtarget->isTargetILP32() ? 20 : 32;
12763 Align(PtrSize),
false,
false,
nullptr,
12764 std::nullopt, MachinePointerInfo(DestSV),
12765 MachinePointerInfo(SrcSV));
12769 assert(Subtarget->isTargetDarwin() &&
12770 "automatic va_arg instruction only works on Darwin");
12773 EVT VT =
Op.getValueType();
12777 MaybeAlign
Align(
Op.getConstantOperandVal(3));
12778 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
12782 DAG.
getLoad(PtrMemVT,
DL, Chain, Addr, MachinePointerInfo(V));
12788 "currently not supported");
12790 if (Align && *Align > MinSlotSize) {
12806 ArgSize = std::max(ArgSize, MinSlotSize);
12807 bool NeedFPTrunc =
false;
12810 NeedFPTrunc =
true;
12820 DAG.
getStore(Chain,
DL, VANext, Addr, MachinePointerInfo(V));
12826 DAG.
getLoad(MVT::f64,
DL, APStore, VAList, MachinePointerInfo());
12836 return DAG.
getLoad(VT,
DL, APStore, VAList, MachinePointerInfo());
12844 EVT VT =
Op.getValueType();
12846 unsigned Depth =
Op.getConstantOperandVal(0);
12851 MachinePointerInfo());
12853 if (Subtarget->isTargetILP32())
12869#define GET_REGISTER_MATCHER
12870#include "AArch64GenAsmMatcher.inc"
12877 if (AArch64::X1 <=
Reg &&
Reg <= AArch64::X28) {
12879 unsigned DwarfRegNum =
MRI->getDwarfRegNum(
Reg,
false);
12880 if (!Subtarget->isXRegisterReserved(DwarfRegNum) &&
12881 !
MRI->isReservedReg(MF,
Reg))
12891 EVT VT =
Op.getValueType();
12907 EVT VT =
Op.getValueType();
12909 unsigned Depth =
Op.getConstantOperandVal(0);
12912 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
12929 if (Subtarget->hasPAuth()) {
12957 bool OptForSize)
const {
12958 bool IsLegal =
false;
12967 const APInt ImmInt = Imm.bitcastToAPInt();
12968 if (VT == MVT::f64)
12970 else if (VT == MVT::f32)
12972 else if (VT == MVT::f16 || VT == MVT::bf16)
12982 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
12991 "Should be able to build any value with at most 4 moves");
12992 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2));
12993 IsLegal = Insn.
size() <= Limit;
12997 <<
" imm value: "; Imm.dump(););
13009 if ((ST->hasNEON() &&
13010 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
13011 VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
13012 VT == MVT::v4f32)) ||
13014 (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
13021 constexpr unsigned AccurateBits = 8;
13023 ExtraSteps = DesiredBits <= AccurateBits
13028 return DAG.
getNode(Opcode,
SDLoc(Operand), VT, Operand);
13038 EVT VT =
Op.getValueType();
13045AArch64TargetLowering::getSqrtResultForDenormInput(
SDValue Op,
13054 bool Reciprocal)
const {
13058 DAG, ExtraSteps)) {
13063 SDNodeFlags
Flags =
13068 for (
int i = ExtraSteps; i > 0; --i) {
13071 Step = DAG.
getNode(AArch64ISD::FRSQRTS,
DL, VT, Operand, Step, Flags);
13086 int &ExtraSteps)
const {
13089 DAG, ExtraSteps)) {
13097 for (
int i = ExtraSteps; i > 0; --i) {
13137const char *AArch64TargetLowering::LowerXConstraint(
EVT ConstraintVT)
const {
13145 if (!Subtarget->hasFPARMv8())
13170static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
13173 (Constraint[1] !=
'p' && Constraint[1] !=
'z'))
13174 return std::nullopt;
13176 bool IsPredicate = Constraint[1] ==
'p';
13177 Constraint = Constraint.
substr(2, Constraint.
size() - 3);
13178 bool IsPredicateAsCount = IsPredicate && Constraint.
starts_with(
"n");
13179 if (IsPredicateAsCount)
13184 return std::nullopt;
13186 if (IsPredicateAsCount)
13187 return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
13189 return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
13190 return std::make_pair(AArch64::Z0 + V, &AArch64::ZPRRegClass);
13193static std::optional<PredicateConstraint>
13204 if (VT != MVT::aarch64svcount &&
13208 switch (Constraint) {
13210 return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
13211 : &AArch64::PPR_p8to15RegClass;
13213 return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
13214 : &AArch64::PPR_3bRegClass;
13216 return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
13217 : &AArch64::PPRRegClass;
13225static std::optional<ReducedGprConstraint>
13238 switch (Constraint) {
13240 return &AArch64::MatrixIndexGPR32_8_11RegClass;
13242 return &AArch64::MatrixIndexGPR32_12_15RegClass;
13276 return DAG.
getNode(AArch64ISD::CSINC,
DL, MVT::i32,
13279 getCondCode(DAG, getInvertedCondCode(CC)), NZCV);
13283SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
13285 const AsmOperandInfo &OpInfo,
SelectionDAG &DAG)
const {
13290 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
13291 OpInfo.ConstraintVT.getSizeInBits() < 8)
13306 if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
13317AArch64TargetLowering::getConstraintType(
StringRef Constraint)
const {
13318 if (Constraint.
size() == 1) {
13319 switch (Constraint[0]) {
13356AArch64TargetLowering::getSingleConstraintMatchWeight(
13357 AsmOperandInfo &
info,
const char *constraint)
const {
13359 Value *CallOperandVal =
info.CallOperandVal;
13362 if (!CallOperandVal)
13366 switch (*constraint) {
13388std::pair<unsigned, const TargetRegisterClass *>
13389AArch64TargetLowering::getRegForInlineAsmConstraint(
13391 if (Constraint.
size() == 1) {
13392 switch (Constraint[0]) {
13395 return std::make_pair(0U,
nullptr);
13397 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
13399 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
13400 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
13402 if (!Subtarget->hasFPARMv8())
13406 return std::make_pair(0U, &AArch64::ZPRRegClass);
13407 return std::make_pair(0U,
nullptr);
13409 if (VT == MVT::Other)
13413 return std::make_pair(0U, &AArch64::FPR16RegClass);
13415 return std::make_pair(0U, &AArch64::FPR32RegClass);
13417 return std::make_pair(0U, &AArch64::FPR64RegClass);
13419 return std::make_pair(0U, &AArch64::FPR128RegClass);
13425 if (!Subtarget->hasFPARMv8())
13428 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
13430 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
13433 if (!Subtarget->hasFPARMv8())
13436 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
13444 if (AArch64::ZPRRegClass.hasSubClassEq(
P->second) &&
13445 !Subtarget->isSVEorStreamingSVEAvailable())
13446 return std::make_pair(
TRI->getSubReg(
P->first, AArch64::zsub),
13447 &AArch64::FPR128RegClass);
13452 return std::make_pair(0U, RegClass);
13456 return std::make_pair(0U, RegClass);
13458 if (StringRef(
"{cc}").equals_insensitive(Constraint) ||
13460 return std::make_pair(
unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
13462 if (Constraint ==
"{za}") {
13463 return std::make_pair(
unsigned(AArch64::ZA), &AArch64::MPRRegClass);
13466 if (Constraint ==
"{zt0}") {
13467 return std::make_pair(
unsigned(AArch64::ZT0), &AArch64::ZTRRegClass);
13472 std::pair<unsigned, const TargetRegisterClass *> Res;
13477 unsigned Size = Constraint.
size();
13478 if ((
Size == 4 ||
Size == 5) && Constraint[0] ==
'{' &&
13479 tolower(Constraint[1]) ==
'v' && Constraint[
Size - 1] ==
'}') {
13482 if (!
Failed && RegNo >= 0 && RegNo <= 31) {
13487 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
13488 Res.second = &AArch64::FPR64RegClass;
13490 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
13491 Res.second = &AArch64::FPR128RegClass;
13497 if (Res.second && !Subtarget->hasFPARMv8() &&
13498 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
13499 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
13500 return std::make_pair(0U,
nullptr);
13507 bool AllowUnknown)
const {
13508 if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
13509 return EVT(MVT::i64x8);
13516void AArch64TargetLowering::LowerAsmOperandForConstraint(
13522 if (Constraint.
size() != 1)
13525 char ConstraintLetter = Constraint[0];
13526 switch (ConstraintLetter) {
13537 if (
Op.getValueType() == MVT::i64)
13538 Result = DAG.
getRegister(AArch64::XZR, MVT::i64);
13540 Result = DAG.
getRegister(AArch64::WZR, MVT::i32);
13562 switch (ConstraintLetter) {
13576 CVal =
C->getSExtValue();
13607 if ((CVal & 0xFFFF) == CVal)
13609 if ((CVal & 0xFFFF0000ULL) == CVal)
13611 uint64_t NCVal = ~(uint32_t)CVal;
13612 if ((NCVal & 0xFFFFULL) == NCVal)
13614 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13621 if ((CVal & 0xFFFFULL) == CVal)
13623 if ((CVal & 0xFFFF0000ULL) == CVal)
13625 if ((CVal & 0xFFFF00000000ULL) == CVal)
13627 if ((CVal & 0xFFFF000000000000ULL) == CVal)
13629 uint64_t NCVal = ~CVal;
13630 if ((NCVal & 0xFFFFULL) == NCVal)
13632 if ((NCVal & 0xFFFF0000ULL) == NCVal)
13634 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
13636 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
13650 Ops.push_back(Result);
13687 EVT VT =
Op.getValueType();
13689 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13693 if (VT != MVT::v16i8 && VT != MVT::v8i8)
13697 assert((NumElts == 8 || NumElts == 16) &&
13698 "Need to have exactly 8 or 16 elements in vector.");
13704 for (
unsigned i = 0; i < NumElts; ++i) {
13711 SourceVec = OperandSourceVec;
13712 else if (SourceVec != OperandSourceVec)
13725 }
else if (!AndMaskConstants.
empty()) {
13745 if (!MaskSourceVec) {
13749 }
else if (MaskSourceVec != MaskSource->
getOperand(0)) {
13763 if (!AndMaskConstants.
empty())
13770 SourceVec, MaskSourceVec);
13778 LLVM_DEBUG(
dbgs() <<
"AArch64TargetLowering::ReconstructShuffle\n");
13780 EVT VT =
Op.getValueType();
13782 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
13785 struct ShuffleSourceInfo {
13800 ShuffleSourceInfo(
SDValue Vec)
13801 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
13802 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
13810 for (
unsigned i = 0; i < NumElts; ++i) {
13816 V.getOperand(0).getValueType().isScalableVector()) {
13818 dbgs() <<
"Reshuffle failed: "
13819 "a shuffle can only come from building a vector from "
13820 "various elements of other fixed-width vectors, provided "
13821 "their indices are constant\n");
13827 auto Source =
find(Sources, SourceVec);
13828 if (Source == Sources.
end())
13829 Source = Sources.
insert(Sources.
end(), ShuffleSourceInfo(SourceVec));
13832 unsigned EltNo = V.getConstantOperandVal(1);
13833 Source->MinElt = std::min(Source->MinElt, EltNo);
13834 Source->MaxElt = std::max(Source->MaxElt, EltNo);
13839 if ((Sources.
size() == 3 || Sources.
size() == 4) && NumElts > 4) {
13844 for (
unsigned I = 0;
I < NumElts; ++
I) {
13847 for (
unsigned OF = 0; OF < OutputFactor; OF++)
13848 Mask.push_back(-1);
13854 unsigned Lane = V.getConstantOperandVal(1);
13855 for (
unsigned S = 0; S < Sources.
size(); S++) {
13856 if (V.getOperand(0) == Sources[S].Vec) {
13857 unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
13858 unsigned InputBase = 16 * S + Lane * InputSize / 8;
13859 for (
unsigned OF = 0; OF < OutputFactor; OF++)
13860 Mask.push_back(InputBase + OF);
13870 ? Intrinsic::aarch64_neon_tbl3
13871 : Intrinsic::aarch64_neon_tbl4,
13873 for (
unsigned i = 0; i < Sources.
size(); i++) {
13874 SDValue Src = Sources[i].Vec;
13875 EVT SrcVT = Src.getValueType();
13878 "Expected a legally typed vector");
13886 for (
unsigned i = 0; i < Mask.size(); i++)
13888 assert((Mask.size() == 8 || Mask.size() == 16) &&
13889 "Expected a v8i8 or v16i8 Mask");
13891 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8,
DL, TBLMask));
13895 Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
13899 if (Sources.
size() > 2) {
13900 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: currently only do something "
13901 <<
"sensible when at most two source vectors are "
13909 for (
auto &Source : Sources) {
13910 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
13911 if (SrcEltTy.
bitsLT(SmallestEltTy)) {
13912 SmallestEltTy = SrcEltTy;
13915 unsigned ResMultiplier =
13924 for (
auto &Src : Sources) {
13925 EVT SrcVT = Src.ShuffleVec.getValueType();
13938 assert(2 * SrcVTSize == VTSize);
13943 DAG.
getUNDEF(Src.ShuffleVec.getValueType()));
13949 dbgs() <<
"Reshuffle failed: result vector too small to extract\n");
13953 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
13955 dbgs() <<
"Reshuffle failed: span too large for a VEXT to cope\n");
13959 if (Src.MinElt >= NumSrcElts) {
13964 Src.WindowBase = -NumSrcElts;
13965 }
else if (Src.MaxElt < NumSrcElts) {
13982 dbgs() <<
"Reshuffle failed: don't know how to lower AArch64ISD::EXT "
13983 "for SVE vectors.");
13988 DAG.
getNode(AArch64ISD::EXT,
DL, DestVT, VEXTSrc1, VEXTSrc2,
13990 Src.WindowBase = -Src.MinElt;
13997 for (
auto &Src : Sources) {
13999 if (SrcEltTy == SmallestEltTy)
14004 DAG.
getNode(AArch64ISD::NVCAST,
DL, ShuffleVT, Src.ShuffleVec);
14006 Src.ShuffleVec = DAG.
getNode(ISD::BITCAST,
DL, ShuffleVT, Src.ShuffleVec);
14010 Src.WindowBase *= Src.WindowScale;
14015 for (
auto Src : Sources)
14016 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
14024 if (Entry.isUndef())
14027 auto Src =
find(Sources, Entry.getOperand(0));
14036 int LanesDefined = BitsDefined / BitsPerShuffleLane;
14040 int *LaneMask = &Mask[i * ResMultiplier];
14042 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
14043 ExtractBase += NumElts * (Src - Sources.
begin());
14044 for (
int j = 0; j < LanesDefined; ++j)
14045 LaneMask[j] = ExtractBase + j;
14050 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: illegal shuffle mask\n");
14055 for (
unsigned i = 0; i < Sources.
size(); ++i)
14062 V = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Shuffle);
14064 V = DAG.
getNode(ISD::BITCAST,
DL, VT, Shuffle);
14068 dbgs() <<
"Reshuffle, creating node: "; V.dump(););
14087 unsigned ExpectedElt = Imm;
14088 for (
unsigned i = 1; i < NumElts; ++i) {
14092 if (ExpectedElt == NumElts)
14097 if (ExpectedElt !=
static_cast<unsigned>(M[i]))
14108 if (V.getValueType() != MVT::v16i8)
14110 assert(V.getNumOperands() == 16 &&
"Expected 16 operands on the BUILDVECTOR");
14112 for (
unsigned X = 0;
X < 4;
X++) {
14124 for (
unsigned Y = 1;
Y < 4;
Y++) {
14140 V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
14141 V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
14143 if (V.getValueType() == MVT::v4i32)
14159 unsigned &DupLaneOp) {
14161 "Only possible block sizes for wide DUP are: 16, 32, 64");
14180 for (
size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
14181 for (
size_t I = 0;
I < NumEltsPerBlock;
I++) {
14182 int Elt = M[BlockIndex * NumEltsPerBlock +
I];
14186 if ((
unsigned)Elt >= SingleVecNumElements)
14188 if (BlockElts[
I] < 0)
14189 BlockElts[
I] = Elt;
14190 else if (BlockElts[
I] != Elt)
14199 auto FirstRealEltIter =
find_if(BlockElts, [](
int Elt) {
return Elt >= 0; });
14200 assert(FirstRealEltIter != BlockElts.
end() &&
14201 "Shuffle with all-undefs must have been caught by previous cases, "
14203 if (FirstRealEltIter == BlockElts.
end()) {
14209 size_t FirstRealIndex = FirstRealEltIter - BlockElts.
begin();
14211 if ((
unsigned)*FirstRealEltIter < FirstRealIndex)
14214 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
14217 if (Elt0 % NumEltsPerBlock != 0)
14221 for (
size_t I = 0;
I < NumEltsPerBlock;
I++)
14222 if (BlockElts[
I] >= 0 && (
unsigned)BlockElts[
I] != Elt0 +
I)
14225 DupLaneOp = Elt0 / NumEltsPerBlock;
14234 const int *FirstRealElt =
find_if(M, [](
int Elt) {
return Elt >= 0; });
14239 APInt ExpectedElt =
APInt(MaskBits, *FirstRealElt + 1,
false,
14243 bool FoundWrongElt = std::any_of(FirstRealElt + 1, M.end(), [&](
int Elt) {
14244 return Elt != ExpectedElt++ && Elt >= 0;
14276 if (NumElts % 2 != 0)
14278 WhichResult = (M[0] == 0 ? 0 : 1);
14279 unsigned Idx = WhichResult * NumElts / 2;
14280 for (
unsigned i = 0; i != NumElts; i += 2) {
14281 if ((M[i] >= 0 && (
unsigned)M[i] != Idx) ||
14282 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != Idx))
14295 WhichResult = (M[0] == 0 ? 0 : 1);
14296 for (
unsigned j = 0; j != 2; ++j) {
14297 unsigned Idx = WhichResult;
14298 for (
unsigned i = 0; i != Half; ++i) {
14299 int MIdx = M[i + j * Half];
14300 if (MIdx >= 0 && (
unsigned)MIdx != Idx)
14314 if (NumElts % 2 != 0)
14316 WhichResult = (M[0] == 0 ? 0 : 1);
14317 for (
unsigned i = 0; i < NumElts; i += 2) {
14318 if ((M[i] >= 0 && (
unsigned)M[i] != i + WhichResult) ||
14319 (M[i + 1] >= 0 && (
unsigned)M[i + 1] != i + WhichResult))
14326 bool &DstIsLeft,
int &Anomaly) {
14327 if (M.size() !=
static_cast<size_t>(NumInputElements))
14330 int NumLHSMatch = 0, NumRHSMatch = 0;
14331 int LastLHSMismatch = -1, LastRHSMismatch = -1;
14333 for (
int i = 0; i < NumInputElements; ++i) {
14343 LastLHSMismatch = i;
14345 if (M[i] == i + NumInputElements)
14348 LastRHSMismatch = i;
14351 if (NumLHSMatch == NumInputElements - 1) {
14353 Anomaly = LastLHSMismatch;
14355 }
else if (NumRHSMatch == NumInputElements - 1) {
14357 Anomaly = LastRHSMismatch;
14370 for (
int I = 0,
E = NumElts / 2;
I !=
E;
I++) {
14375 int Offset = NumElts / 2;
14376 for (
int I = NumElts / 2,
E = NumElts;
I !=
E;
I++) {
14377 if (Mask[
I] !=
I + SplitLHS *
Offset)
14386 EVT VT =
Op.getValueType();
14421 unsigned OpNum = (PFEntry >> 26) & 0x0F;
14422 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
14423 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
14445 if (LHSID == (1 * 9 + 2) * 9 + 3)
14447 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 &&
"Illegal OP_COPY!");
14451 if (OpNum == OP_MOVLANE) {
14453 auto getPFIDLane = [](
unsigned ID,
int Elt) ->
int {
14454 assert(Elt < 4 &&
"Expected Perfect Lanes to be less than 4");
14460 return (
ID % 9 == 8) ? -1 :
ID % 9;
14469 assert(RHSID < 8 &&
"Expected a lane index for RHSID!");
14470 unsigned ExtLane = 0;
14476 int MaskElt = getPFIDLane(
ID, (RHSID & 0x01) << 1) >> 1;
14478 MaskElt = (getPFIDLane(
ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
14479 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14480 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
14481 Input = MaskElt < 2 ? V1 : V2;
14487 "Expected 16 or 32 bit shuffle elements");
14492 int MaskElt = getPFIDLane(
ID, RHSID);
14493 assert(MaskElt >= 0 &&
"Didn't expect an undef movlane index!");
14494 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
14495 Input = MaskElt < 4 ? V1 : V2;
14497 if (VT == MVT::v4i16) {
14503 Input.getValueType().getVectorElementType(),
14525 return DAG.
getNode(AArch64ISD::REV64,
DL, VT, OpLHS);
14530 return DAG.
getNode(AArch64ISD::REV32,
DL, VT, OpLHS);
14533 return DAG.
getNode(AArch64ISD::REV16,
DL, VT, OpLHS);
14540 if (EltTy == MVT::i8)
14541 Opcode = AArch64ISD::DUPLANE8;
14542 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
14543 Opcode = AArch64ISD::DUPLANE16;
14544 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
14545 Opcode = AArch64ISD::DUPLANE32;
14546 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
14547 Opcode = AArch64ISD::DUPLANE64;
14554 return DAG.
getNode(Opcode,
DL, VT, OpLHS, Lane);
14560 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, OpLHS, OpRHS,
14564 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, OpLHS, OpRHS);
14566 return DAG.
getNode(AArch64ISD::UZP2,
DL, VT, OpLHS, OpRHS);
14568 return DAG.
getNode(AArch64ISD::ZIP1,
DL, VT, OpLHS, OpRHS);
14570 return DAG.
getNode(AArch64ISD::ZIP2,
DL, VT, OpLHS, OpRHS);
14572 return DAG.
getNode(AArch64ISD::TRN1,
DL, VT, OpLHS, OpRHS);
14574 return DAG.
getNode(AArch64ISD::TRN2,
DL, VT, OpLHS, OpRHS);
14585 EVT EltVT =
Op.getValueType().getVectorElementType();
14598 MVT IndexVT = MVT::v8i8;
14599 unsigned IndexLen = 8;
14600 if (
Op.getValueSizeInBits() == 128) {
14601 IndexVT = MVT::v16i8;
14606 for (
int Val : ShuffleMask) {
14607 for (
unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
14608 unsigned Offset = Byte + Val * BytesPerElt;
14611 if (IsUndefOrZero &&
Offset >= IndexLen)
14621 if (IsUndefOrZero) {
14630 if (IndexLen == 8) {
14651 return DAG.
getNode(ISD::BITCAST,
DL,
Op.getValueType(), Shuffle);
14655 if (EltType == MVT::i8)
14656 return AArch64ISD::DUPLANE8;
14657 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
14658 return AArch64ISD::DUPLANE16;
14659 if (EltType == MVT::i32 || EltType == MVT::f32)
14660 return AArch64ISD::DUPLANE32;
14661 if (EltType == MVT::i64 || EltType == MVT::f64)
14662 return AArch64ISD::DUPLANE64;
14670 auto getScaledOffsetDup = [](
SDValue BitCast,
int &LaneC,
MVT &CastVT) {
14672 if (BitCast.
getOpcode() != ISD::BITCAST ||
14681 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
14683 if (ExtIdxInBits % CastedEltBitWidth != 0)
14691 LaneC += ExtIdxInBits / CastedEltBitWidth;
14698 unsigned SrcVecNumElts =
14705 if (getScaledOffsetDup(V, Lane, CastVT)) {
14706 V = DAG.
getBitcast(CastVT, V.getOperand(0).getOperand(0));
14708 V.getOperand(0).getValueType().is128BitVector()) {
14711 Lane += V.getConstantOperandVal(1);
14712 V = V.getOperand(0);
14738 EVT VT =
Op.getValueType();
14748 if (ElementSize > 32 || ElementSize == 1)
14778 EVT VT =
Op.getValueType();
14795 for (
unsigned I = 0;
I < 16;
I++) {
14796 if (ShuffleMask[
I] < 16)
14802 TBLMaskParts[
I] = DAG.
getConstant(
C->getSExtValue() + 32,
DL, MVT::i32);
14816AArch64TargetLowering::LowerEXTEND_VECTOR_INREG(
SDValue Op,
14819 EVT VT =
Op.getValueType();
14823 unsigned UnpackOpcode =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
14831 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv8i16, Val);
14832 if (VT == MVT::nxv8i16)
14836 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv4i32, Val);
14837 if (VT == MVT::nxv4i32)
14841 Val = DAG.
getNode(UnpackOpcode,
DL, MVT::nxv2i64, Val);
14842 assert(VT == MVT::nxv2i64 &&
"Unexpected result type!");
14853AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(
SDValue Op,
14856 EVT VT =
Op.getValueType();
14859 return LowerEXTEND_VECTOR_INREG(
Op, DAG);
14864 "Unexpected extension factor.");
14871 DAG.
getNode(AArch64ISD::ZIP1,
DL, SrcVT, SrcOp, Zeros));
14877 EVT VT =
Op.getValueType();
14882 return LowerFixedLengthVECTOR_SHUFFLEToSVE(
Op, DAG);
14888 ArrayRef<int> ShuffleMask = SVN->
getMask();
14895 "Unexpected VECTOR_SHUFFLE mask size!");
14921 for (
unsigned LaneSize : {64U, 32U, 16U}) {
14924 unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
14925 : LaneSize == 32 ? AArch64ISD::DUPLANE32
14926 : AArch64ISD::DUPLANE16;
14941 if (
isREVMask(ShuffleMask, EltSize, NumElts, 64))
14943 if (
isREVMask(ShuffleMask, EltSize, NumElts, 32))
14945 if (
isREVMask(ShuffleMask, EltSize, NumElts, 16))
14948 if (((NumElts == 8 && EltSize == 16) || (NumElts == 16 && EltSize == 8)) &&
14951 return DAG.
getNode(AArch64ISD::EXT,
DL, VT, Rev, Rev,
14955 bool ReverseEXT =
false;
14957 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
14969 unsigned WhichResult;
14970 unsigned OperandOrder;
14971 if (
isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
14972 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
14974 OperandOrder == 0 ? V2 : V1);
14976 if (
isUZPMask(ShuffleMask, NumElts, WhichResult)) {
14977 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
14980 if (
isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder)) {
14981 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
14983 OperandOrder == 0 ? V2 : V1);
14987 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
14991 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
14995 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
15005 if (
isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
15006 SDValue DstVec = DstIsLeft ? V1 : V2;
15010 int SrcLane = ShuffleMask[Anomaly];
15011 if (SrcLane >= NumInputElements) {
15013 SrcLane -= NumElts;
15020 ScalarVT = MVT::i32;
15033 if (NumElts == 4) {
15034 unsigned PFIndexes[4];
15035 for (
unsigned i = 0; i != 4; ++i) {
15036 if (ShuffleMask[i] < 0)
15039 PFIndexes[i] = ShuffleMask[i];
15043 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
15044 PFIndexes[2] * 9 + PFIndexes[3];
15054 "Expected larger vector element sizes to be handled already");
15056 for (
int M : ShuffleMask)
15058 M >=
static_cast<int>(NumElts) ? 0 : 0xffffffff,
DL, MVT::i32));
15072 EVT VT =
Op.getValueType();
15075 return LowerToScalableOp(
Op, DAG);
15078 "Unexpected vector type!");
15093 if (VT == MVT::nxv1i1)
15105 EVT VT =
Op.getValueType();
15118 if (CIdx && (CIdx->getZExtValue() <= 3)) {
15120 return DAG.
getNode(AArch64ISD::DUPLANE128,
DL, VT,
Op.getOperand(1), CI);
15142 SDValue TBL = DAG.
getNode(AArch64ISD::TBL,
DL, MVT::nxv2i64, V, ShuffleMask);
15143 return DAG.
getNode(ISD::BITCAST,
DL, VT, TBL);
15148 APInt &UndefBits) {
15150 APInt SplatBits, SplatUndef;
15151 unsigned SplatBitSize;
15153 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
15156 for (
unsigned i = 0; i < NumSplats; ++i) {
15157 CnstBits <<= SplatBitSize;
15158 UndefBits <<= SplatBitSize;
15160 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.
getSizeInBits());
15171 const APInt &Bits) {
15172 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15174 EVT VT =
Op.getValueType();
15183 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15194 EVT VT =
Op.getValueType();
15199 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15202 bool isAdvSIMDModImm =
false;
15222 if (isAdvSIMDModImm) {
15236 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15247 EVT VT =
Op.getValueType();
15252 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15255 bool isAdvSIMDModImm =
false;
15267 if (isAdvSIMDModImm) {
15281 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15291 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15293 EVT VT =
Op.getValueType();
15295 bool isAdvSIMDModImm =
false;
15307 if (isAdvSIMDModImm) {
15312 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15321 const APInt &Bits) {
15322 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15324 EVT VT =
Op.getValueType();
15333 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15342 const APInt &Bits) {
15343 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
15345 EVT VT =
Op.getValueType();
15348 bool isAdvSIMDModImm =
false;
15352 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
15357 MovTy = MVT::v2f64;
15360 if (isAdvSIMDModImm) {
15364 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Mov);
15384 for (
unsigned i = 1; i < NumElts; ++i)
15393 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
15394 N =
N.getOperand(0);
15400 unsigned NumElts =
N.getValueType().getVectorMinNumElements();
15403 while (
N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
15404 N =
N.getOperand(0);
15407 if (
N.getValueType().getVectorMinNumElements() < NumElts)
15417 if (
N.getOpcode() == AArch64ISD::PTRUE &&
15418 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
15419 return N.getValueType().getVectorMinNumElements() >= NumElts;
15423 if (
N.getOpcode() == AArch64ISD::PTRUE) {
15425 unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
15426 unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
15427 if (MaxSVESize && MinSVESize == MaxSVESize) {
15429 unsigned PatNumElts =
15431 return PatNumElts == (NumElts * VScale);
15445 EVT VT =
N->getValueType(0);
15455 SDValue FirstOp =
N->getOperand(0);
15456 unsigned FirstOpc = FirstOp.
getOpcode();
15457 SDValue SecondOp =
N->getOperand(1);
15458 unsigned SecondOpc = SecondOp.
getOpcode();
15465 if ((FirstOpc ==
ISD::AND || FirstOpc == AArch64ISD::BICi) &&
15466 (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR ||
15467 SecondOpc == AArch64ISD::SHL_PRED ||
15468 SecondOpc == AArch64ISD::SRL_PRED)) {
15472 }
else if ((SecondOpc ==
ISD::AND || SecondOpc == AArch64ISD::BICi) &&
15473 (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR ||
15474 FirstOpc == AArch64ISD::SHL_PRED ||
15475 FirstOpc == AArch64ISD::SRL_PRED)) {
15482 bool IsShiftRight = Shift.
getOpcode() == AArch64ISD::VLSHR ||
15483 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15484 bool ShiftHasPredOp = Shift.
getOpcode() == AArch64ISD::SHL_PRED ||
15485 Shift.
getOpcode() == AArch64ISD::SRL_PRED;
15489 if (ShiftHasPredOp) {
15495 C2 =
C.getZExtValue();
15498 C2 = C2node->getZExtValue();
15512 assert(C1nodeImm && C1nodeShift);
15514 C1AsAPInt = C1AsAPInt.
zextOrTrunc(ElemSizeInBits);
15520 if (C2 > ElemSizeInBits)
15525 if (C1AsAPInt != RequiredC1)
15533 unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
15538 EVT VT =
N->getValueType(0);
15539 assert(VT.
isVector() &&
"Expected vector type in tryLowerToBSL\n");
15557 for (
int i = 1; i >= 0; --i) {
15558 for (
int j = 1; j >= 0; --j) {
15584 if (
Sub.getOperand(1) !=
Add.getOperand(0))
15587 return DAG.
getNode(AArch64ISD::BSP,
DL, VT,
Sub, SubSibling, AddSibling);
15595 for (
int i = 1; i >= 0; --i)
15596 for (
int j = 1; j >= 0; --j) {
15607 if (!BVN0 || !BVN1)
15610 bool FoundMatch =
true;
15614 if (!CN0 || !CN1 ||
15617 FoundMatch =
false;
15632 !Subtarget->isNeonAvailable()))
15633 return LowerToScalableOp(
Op, DAG);
15642 EVT VT =
Op.getValueType();
15647 BuildVectorSDNode *BVN =
15651 LHS =
Op.getOperand(1);
15669 UndefBits, &
LHS)) ||
15685 EVT VT =
Op.getValueType();
15699 CstLane->getAPIntValue().trunc(EltTy.
getSizeInBits()).getZExtValue(),
15701 }
else if (Lane.getNode()->isUndef()) {
15704 assert(Lane.getValueType() == MVT::i32 &&
15705 "Unexpected BUILD_VECTOR operand type");
15707 Ops.push_back(Lane);
15714 EVT VT =
Op.getValueType();
15722 int32_t ImmVal, ShiftVal;
15730 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Res);
15735 EVT VT =
Op.getValueType();
15737 "Expected a legal NEON vector");
15743 auto TryMOVIWithBits = [&](
APInt DefBits) {
15757 APInt NotDefBits = ~DefBits;
15767 if (
SDValue R = TryMOVIWithBits(DefBits))
15769 if (
SDValue R = TryMOVIWithBits(UndefBits))
15777 auto TryWithFNeg = [&](
APInt DefBits,
MVT FVT) {
15783 unsigned NumElts = VT.
getSizeInBits() / FVT.getScalarSizeInBits();
15784 for (
unsigned i = 0; i < NumElts; i++)
15785 NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
15786 NegBits = DefBits ^ NegBits;
15790 if (
SDValue NewOp = TryMOVIWithBits(NegBits)) {
15794 AArch64ISD::NVCAST,
DL, VT,
15796 DAG.
getNode(AArch64ISD::NVCAST,
DL, VFVT, NewOp)));
15801 if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
15802 (R = TryWithFNeg(DefBits, MVT::f64)) ||
15803 (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
15810SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE(
15812 EVT VT =
Op.getValueType();
15836 NumElems -
count_if(
Op->op_values(), IsExtractElt) > 4)
15843 return Op.isUndef() ? Undef
15844 : DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
15845 ContainerVT, Undef, Op, ZeroI64);
15849 while (Intermediates.
size() > 1) {
15852 for (
unsigned I = 0;
I < Intermediates.
size();
I += 2) {
15855 Intermediates[
I / 2] =
15857 : DAG.
getNode(AArch64ISD::ZIP1,
DL, ZipVT, Op0, Op1);
15860 Intermediates.
resize(Intermediates.
size() / 2);
15871 EVT VT =
Op.getValueType();
15873 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
15876 return LowerFixedLengthBuildVectorToSVE(
Op, DAG);
15894 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
15895 if (Val.isZero() || (VT.
isInteger() && Val.isAllOnes()))
15899 if (
Const->isZero() && !
Const->isNegative())
15920 bool isOnlyLowElement =
true;
15921 bool usesOnlyOneValue =
true;
15922 bool usesOnlyOneConstantValue =
true;
15924 bool AllLanesExtractElt =
true;
15925 unsigned NumConstantLanes = 0;
15926 unsigned NumDifferentLanes = 0;
15927 unsigned NumUndefLanes = 0;
15930 SmallMapVector<SDValue, unsigned, 16> DifferentValueMap;
15931 unsigned ConsecutiveValCount = 0;
15933 for (
unsigned i = 0; i < NumElts; ++i) {
15936 AllLanesExtractElt =
false;
15942 isOnlyLowElement =
false;
15947 ++NumConstantLanes;
15948 if (!ConstantValue.
getNode())
15950 else if (ConstantValue != V)
15951 usesOnlyOneConstantValue =
false;
15954 if (!
Value.getNode())
15956 else if (V !=
Value) {
15957 usesOnlyOneValue =
false;
15958 ++NumDifferentLanes;
15961 if (PrevVal != V) {
15962 ConsecutiveValCount = 0;
15977 DifferentValueMap[
V] = ++ConsecutiveValCount;
15980 if (!
Value.getNode()) {
15982 dbgs() <<
"LowerBUILD_VECTOR: value undefined, creating undef node\n");
15990 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: only low element used, creating 1 "
15991 "SCALAR_TO_VECTOR node\n");
15995 if (AllLanesExtractElt) {
15996 SDNode *
Vector =
nullptr;
16001 for (
unsigned i = 0; i < NumElts; ++i) {
16003 const SDNode *
N =
V.getNode();
16028 if (Val == 2 * i) {
16032 if (Val - 1 == 2 * i) {
16059 if (usesOnlyOneValue) {
16062 Value.getValueType() != VT) {
16064 dbgs() <<
"LowerBUILD_VECTOR: use DUP for non-constant splats\n");
16072 if (
Value.getValueSizeInBits() == 64) {
16074 dbgs() <<
"LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
16086 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
16087 EltTy == MVT::f64) &&
"Unsupported floating-point vector type");
16089 dbgs() <<
"LowerBUILD_VECTOR: float constant splats, creating int "
16090 "BITCASTS, and try again\n");
16092 for (
unsigned i = 0; i < NumElts; ++i)
16093 Ops.push_back(DAG.
getNode(ISD::BITCAST,
DL, NewType,
Op.getOperand(i)));
16096 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: trying to lower new vector: ";
16098 Val = LowerBUILD_VECTOR(Val, DAG);
16100 return DAG.
getNode(ISD::BITCAST,
DL, VT, Val);
16108 bool PreferDUPAndInsert =
16110 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
16111 NumDifferentLanes >= NumConstantLanes;
16117 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
16121 APInt ConstantValueAPInt(1, 0);
16123 ConstantValueAPInt =
C->getAPIntValue().zextOrTrunc(BitSize);
16125 !ConstantValueAPInt.isAllOnes()) {
16129 Val = DAG.
getNode(AArch64ISD::DUP,
DL, VT, ConstantValue);
16133 for (
unsigned i = 0; i < NumElts; ++i) {
16147 dbgs() <<
"LowerBUILD_VECTOR: all elements are constant, use default "
16159 if (NumElts >= 4) {
16167 if (PreferDUPAndInsert) {
16172 for (
unsigned I = 0;
I < NumElts; ++
I)
16183 if (DifferentValueMap.
size() == 2 && NumUndefLanes == 0) {
16195 bool canUseVECTOR_CONCAT =
true;
16196 for (
auto Pair : DifferentValueMap) {
16198 if (Pair.second != NumElts / 2)
16199 canUseVECTOR_CONCAT =
false;
16212 if (canUseVECTOR_CONCAT) {
16235 if (NumElts >= 8) {
16236 SmallVector<int, 16> MaskVec;
16238 SDValue FirstLaneVal =
Op.getOperand(0);
16239 for (
unsigned i = 0; i < NumElts; ++i) {
16241 if (FirstLaneVal == Val)
16265 dbgs() <<
"LowerBUILD_VECTOR: alternatives failed, creating sequence "
16266 "of INSERT_VECTOR_ELT\n");
16283 LLVM_DEBUG(
dbgs() <<
"Creating node for op0, it is not undefined:\n");
16289 dbgs() <<
"Creating nodes for the other vector elements:\n";
16291 for (; i < NumElts; ++i) {
16302 dbgs() <<
"LowerBUILD_VECTOR: use default expansion, failed to find "
16303 "better alternative\n");
16310 !Subtarget->isNeonAvailable()))
16311 return LowerFixedLengthConcatVectorsToSVE(
Op, DAG);
16313 assert(
Op.getValueType().isScalableVector() &&
16315 "Expected legal scalable vector type!");
16320 "Unexpected number of operands in CONCAT_VECTORS");
16322 if (NumOperands == 2)
16327 while (ConcatOps.size() > 1) {
16328 for (
unsigned I = 0,
E = ConcatOps.size();
I !=
E;
I += 2) {
16336 ConcatOps.resize(ConcatOps.size() / 2);
16338 return ConcatOps[0];
16349 !Subtarget->isNeonAvailable()))
16350 return LowerFixedLengthInsertVectorElt(
Op, DAG);
16352 EVT VT =
Op.getOperand(0).getValueType();
16366 ExtendedValue,
Op.getOperand(2));
16379AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
16382 EVT VT =
Op.getOperand(0).getValueType();
16391 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
16393 Extend,
Op.getOperand(1));
16398 return LowerFixedLengthExtractVectorElt(
Op, DAG);
16406 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16407 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
16408 VT == MVT::v8f16 || VT == MVT::v8bf16)
16411 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
16412 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
16423 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
16433 EVT VT =
Op.getValueType();
16435 "Only cases that extract a fixed length vector are supported!");
16436 EVT InVT =
Op.getOperand(0).getValueType();
16444 unsigned Idx =
Op.getConstantOperandVal(1);
16463 if (PackedVT != InVT) {
16486 assert(
Op.getValueType().isScalableVector() &&
16487 "Only expect to lower inserts into scalable vectors!");
16489 EVT InVT =
Op.getOperand(1).getValueType();
16490 unsigned Idx =
Op.getConstantOperandVal(2);
16495 EVT VT =
Op.getValueType();
16511 if (Idx < (NumElts / 2))
16537 Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
16538 Vec1 = getSVESafeBitCast(NarrowVT, Vec1, DAG);
16542 Vec1 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, Vec1);
16551 HiVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, HiVec0);
16552 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, Vec1, HiVec0);
16555 "Invalid subvector index!");
16557 LoVec0 = DAG.
getNode(AArch64ISD::NVCAST,
DL, NarrowVT, LoVec0);
16558 Narrow = DAG.
getNode(AArch64ISD::UZP1,
DL, NarrowVT, LoVec0, Vec1);
16561 return getSVESafeBitCast(VT, Narrow, DAG);
16569 std::optional<unsigned> PredPattern =
16581 if (
Op.getOpcode() != AArch64ISD::DUP &&
16594 SplatVal =
Op->getConstantOperandVal(0);
16595 if (
Op.getValueType().getVectorElementType() != MVT::i64)
16596 SplatVal = (int32_t)SplatVal;
16604 SplatVal = -SplatVal;
16612 EVT VT =
Op.getValueType();
16616 return LowerFixedLengthVectorIntDivideToSVE(
Op, DAG);
16621 unsigned PredOpcode =
Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
16630 DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
DL, VT, Pg,
Op->getOperand(0),
16638 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
16639 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
16644 if (VT == MVT::nxv16i8)
16645 WidenedVT = MVT::nxv8i16;
16646 else if (VT == MVT::nxv8i16)
16647 WidenedVT = MVT::nxv4i32;
16651 unsigned UnpkLo =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
16652 unsigned UnpkHi =
Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
16661 return DAG.
getNode(AArch64ISD::UZP1,
DL, VT, ResultLoCast, ResultHiCast);
16664bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
16665 EVT VT,
unsigned DefinedValues)
const {
16666 if (!Subtarget->isNeonAvailable())
16685 unsigned DummyUnsigned;
16693 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
16695 isTRNMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
16696 isUZPMask(M, NumElts, DummyUnsigned) ||
16697 isZIPMask(M, NumElts, DummyUnsigned, DummyUnsigned) ||
16701 isINSMask(M, NumElts, DummyBool, DummyInt) ||
16716 while (
Op.getOpcode() == ISD::BITCAST)
16717 Op =
Op.getOperand(0);
16719 APInt SplatBits, SplatUndef;
16720 unsigned SplatBitSize;
16722 if (!BVN || !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
16723 HasAnyUndefs, ElementBits) ||
16724 SplatBitSize > ElementBits)
16735 assert(VT.
isVector() &&
"vector shift count is not a vector type");
16739 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
16746 assert(VT.
isVector() &&
"vector shift count is not a vector type");
16750 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
16755 EVT VT =
Op.getValueType();
16760 EVT OpVT =
Op.getOperand(0).getValueType();
16771 !Subtarget->isNeonAvailable()))
16772 return LowerFixedLengthVectorTruncateToSVE(
Op, DAG);
16782 unsigned &ShiftValue,
16795 ShiftValue = ShiftOp1->getZExtValue();
16804 "ResVT must be truncated or same type as the shift.");
16807 if (ShiftValue > ExtraBits && !
Add->getFlags().hasNoUnsignedWrap())
16814 uint64_t AddValue = AddOp1->getZExtValue();
16815 if (AddValue != 1ULL << (ShiftValue - 1))
16818 RShOperand =
Add->getOperand(0);
16824 EVT VT =
Op.getValueType();
16828 if (!
Op.getOperand(1).getValueType().isVector())
16832 switch (
Op.getOpcode()) {
16836 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SHL_PRED);
16838 if (
isVShiftLImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize)
16839 return DAG.
getNode(AArch64ISD::VSHL,
DL, VT,
Op.getOperand(0),
16844 Op.getOperand(0),
Op.getOperand(1));
16848 (Subtarget->hasSVE2() ||
16849 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
16851 unsigned ShiftValue;
16853 return DAG.
getNode(AArch64ISD::URSHR_I_PRED,
DL, VT,
16860 unsigned Opc =
Op.getOpcode() ==
ISD::SRA ? AArch64ISD::SRA_PRED
16861 : AArch64ISD::SRL_PRED;
16862 return LowerToPredicatedOp(
Op, DAG,
Opc);
16866 if (
isVShiftRImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize) {
16868 (
Op.getOpcode() ==
ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
16877 unsigned Opc = (
Op.getOpcode() ==
ISD::SRA) ? Intrinsic::aarch64_neon_sshl
16878 : Intrinsic::aarch64_neon_ushl;
16886 return NegShiftLeft;
16894 if (
Op.getValueType().isScalableVector())
16895 return LowerToPredicatedOp(
Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
16898 !Subtarget->isNeonAvailable()))
16899 return LowerFixedLengthVectorSetccToSVE(
Op, DAG);
16904 EVT CmpVT =
LHS.getValueType().changeVectorElementTypeToInteger();
16907 if (
LHS.getValueType().getVectorElementType().isInteger())
16910 assert(((!Subtarget->hasFullFP16() &&
16911 LHS.getValueType().getVectorElementType() != MVT::f16) ||
16912 LHS.getValueType().getVectorElementType() != MVT::bf16 ||
16913 LHS.getValueType().getVectorElementType() != MVT::f128) &&
16914 "Unexpected type!");
16919 bool OneNaN =
false;
16943 if (!
Cmp.getNode())
16972 unsigned ScalarOpcode;
16974 case ISD::VECREDUCE_AND:
16977 case ISD::VECREDUCE_OR:
16980 case ISD::VECREDUCE_XOR:
16990 "Expected power-of-2 length vector");
16998 if (ElemVT == MVT::i1) {
17000 if (NumElems > 16) {
17003 EVT HalfVT =
Lo.getValueType();
17014 unsigned ExtendedWidth = 64;
17017 ExtendedWidth = 128;
17022 unsigned ExtendOp =
17030 NumElems == 2 && ExtendedWidth == 128) {
17031 Extended = DAG.
getBitcast(MVT::v4i32, Extended);
17032 ExtendedVT = MVT::i32;
17034 switch (ScalarOpcode) {
17036 Result = DAG.
getNode(ISD::VECREDUCE_UMIN,
DL, ExtendedVT, Extended);
17039 Result = DAG.
getNode(ISD::VECREDUCE_UMAX,
DL, ExtendedVT, Extended);
17042 Result = DAG.
getNode(ISD::VECREDUCE_ADD,
DL, ExtendedVT, Extended);
17055 VecVT =
Lo.getValueType();
17071 for (
unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
17076 Scalar = DAG.
getNode(ScalarOpcode,
DL, ScalarVT, Scalar, Shifted);
17088 EVT SrcVT = Src.getValueType();
17092 if (Subtarget->hasFullFP16() &&
Op.getOpcode() == ISD::VECREDUCE_FADD &&
17093 SrcVT == MVT::v2f16) {
17101 bool OverrideNEON = !Subtarget->isNeonAvailable() ||
17102 Op.getOpcode() == ISD::VECREDUCE_AND ||
17103 Op.getOpcode() == ISD::VECREDUCE_OR ||
17104 Op.getOpcode() == ISD::VECREDUCE_XOR ||
17105 Op.getOpcode() == ISD::VECREDUCE_FADD ||
17106 (
Op.getOpcode() != ISD::VECREDUCE_ADD &&
17110 SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {
17113 return LowerPredReductionToSVE(
Op, DAG);
17115 switch (
Op.getOpcode()) {
17116 case ISD::VECREDUCE_ADD:
17117 return LowerReductionToSVE(AArch64ISD::UADDV_PRED,
Op, DAG);
17118 case ISD::VECREDUCE_AND:
17119 return LowerReductionToSVE(AArch64ISD::ANDV_PRED,
Op, DAG);
17120 case ISD::VECREDUCE_OR:
17121 return LowerReductionToSVE(AArch64ISD::ORV_PRED,
Op, DAG);
17122 case ISD::VECREDUCE_SMAX:
17123 return LowerReductionToSVE(AArch64ISD::SMAXV_PRED,
Op, DAG);
17124 case ISD::VECREDUCE_SMIN:
17125 return LowerReductionToSVE(AArch64ISD::SMINV_PRED,
Op, DAG);
17126 case ISD::VECREDUCE_UMAX:
17127 return LowerReductionToSVE(AArch64ISD::UMAXV_PRED,
Op, DAG);
17128 case ISD::VECREDUCE_UMIN:
17129 return LowerReductionToSVE(AArch64ISD::UMINV_PRED,
Op, DAG);
17130 case ISD::VECREDUCE_XOR:
17131 return LowerReductionToSVE(AArch64ISD::EORV_PRED,
Op, DAG);
17132 case ISD::VECREDUCE_FADD:
17133 return LowerReductionToSVE(AArch64ISD::FADDV_PRED,
Op, DAG);
17134 case ISD::VECREDUCE_FMAX:
17135 return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED,
Op, DAG);
17136 case ISD::VECREDUCE_FMIN:
17137 return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED,
Op, DAG);
17138 case ISD::VECREDUCE_FMAXIMUM:
17139 return LowerReductionToSVE(AArch64ISD::FMAXV_PRED,
Op, DAG);
17140 case ISD::VECREDUCE_FMINIMUM:
17141 return LowerReductionToSVE(AArch64ISD::FMINV_PRED,
Op, DAG);
17149 switch (
Op.getOpcode()) {
17150 case ISD::VECREDUCE_AND:
17151 case ISD::VECREDUCE_OR:
17152 case ISD::VECREDUCE_XOR:
17154 Op.getValueType(),
DL, DAG);
17155 case ISD::VECREDUCE_ADD:
17157 case ISD::VECREDUCE_SMAX:
17159 case ISD::VECREDUCE_SMIN:
17161 case ISD::VECREDUCE_UMAX:
17163 case ISD::VECREDUCE_UMIN:
17174 EVT SrcVT = Src.getValueType();
17177 SDVTList SrcVTs = DAG.
getVTList(SrcVT, SrcVT);
17189 for (
unsigned I = 0;
I < Stages; ++
I) {
17191 Src = DAG.
getNode(BaseOpc,
DL, SrcVT, Src.getValue(0), Src.getValue(1));
17199 auto &Subtarget = DAG.
getSubtarget<AArch64Subtarget>();
17201 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
17206 MVT VT =
Op.getSimpleValueType();
17207 assert(VT != MVT::i128 &&
"Handled elsewhere, code replicated.");
17212 Op.getOperand(0),
Op.getOperand(1),
RHS,
17217AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op,
17222 SDNode *
Node =
Op.getNode();
17227 EVT VT =
Node->getValueType(0);
17230 "no-stack-arg-probe")) {
17232 Chain =
SP.getValue(1);
17248 const AArch64RegisterInfo *
TRI = Subtarget->getRegisterInfo();
17249 const uint32_t *
Mask =
TRI->getWindowsStackProbePreservedMask();
17250 if (Subtarget->hasCustomCallingConv())
17258 Chain, Callee, DAG.
getRegister(AArch64::X15, MVT::i64),
17269 Chain =
SP.getValue(1);
17283AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(
SDValue Op,
17286 SDNode *
Node =
Op.getNode();
17293 EVT VT =
Node->getValueType(0);
17297 Chain =
SP.getValue(1);
17304 Chain = DAG.
getNode(AArch64ISD::PROBED_ALLOCA,
DL, MVT::Other, Chain, SP);
17310AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
17314 if (Subtarget->isTargetWindows())
17315 return LowerWindowsDYNAMIC_STACKALLOC(
Op, DAG);
17317 return LowerInlineDYNAMIC_STACKALLOC(
Op, DAG);
17323 unsigned NewOp)
const {
17324 if (Subtarget->hasSVE2())
17325 return LowerToPredicatedOp(
Op, DAG, NewOp);
17333 EVT VT =
Op.getValueType();
17334 assert(VT != MVT::i64 &&
"Expected illegal VSCALE node");
17337 APInt MulImm =
Op.getConstantOperandAPInt(0);
17343template <
unsigned NumVecs>
17353 for (
unsigned I = 0;
I < NumVecs; ++
I)
17362 Info.align.reset();
17374 auto &
DL =
I.getDataLayout();
17376 case Intrinsic::aarch64_sve_st2:
17378 case Intrinsic::aarch64_sve_st3:
17380 case Intrinsic::aarch64_sve_st4:
17382 case Intrinsic::aarch64_neon_ld2:
17383 case Intrinsic::aarch64_neon_ld3:
17384 case Intrinsic::aarch64_neon_ld4:
17385 case Intrinsic::aarch64_neon_ld1x2:
17386 case Intrinsic::aarch64_neon_ld1x3:
17387 case Intrinsic::aarch64_neon_ld1x4: {
17389 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
17391 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17393 Info.align.reset();
17398 case Intrinsic::aarch64_neon_ld2lane:
17399 case Intrinsic::aarch64_neon_ld3lane:
17400 case Intrinsic::aarch64_neon_ld4lane:
17401 case Intrinsic::aarch64_neon_ld2r:
17402 case Intrinsic::aarch64_neon_ld3r:
17403 case Intrinsic::aarch64_neon_ld4r: {
17406 Type *RetTy =
I.getType();
17408 unsigned NumElts = StructTy->getNumElements();
17409 Type *VecTy = StructTy->getElementType(0);
17412 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17414 Info.align.reset();
17419 case Intrinsic::aarch64_neon_st2:
17420 case Intrinsic::aarch64_neon_st3:
17421 case Intrinsic::aarch64_neon_st4:
17422 case Intrinsic::aarch64_neon_st1x2:
17423 case Intrinsic::aarch64_neon_st1x3:
17424 case Intrinsic::aarch64_neon_st1x4: {
17426 unsigned NumElts = 0;
17427 for (
const Value *Arg :
I.args()) {
17428 Type *ArgTy = Arg->getType();
17431 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
17434 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17436 Info.align.reset();
17441 case Intrinsic::aarch64_neon_st2lane:
17442 case Intrinsic::aarch64_neon_st3lane:
17443 case Intrinsic::aarch64_neon_st4lane: {
17445 unsigned NumElts = 0;
17447 Type *VecTy =
I.getArgOperand(0)->getType();
17450 for (
const Value *Arg :
I.args()) {
17451 Type *ArgTy = Arg->getType();
17458 Info.ptrVal =
I.getArgOperand(
I.arg_size() - 1);
17460 Info.align.reset();
17465 case Intrinsic::aarch64_ldaxr:
17466 case Intrinsic::aarch64_ldxr: {
17467 Type *ValTy =
I.getParamElementType(0);
17470 Info.ptrVal =
I.getArgOperand(0);
17472 Info.align =
DL.getABITypeAlign(ValTy);
17476 case Intrinsic::aarch64_stlxr:
17477 case Intrinsic::aarch64_stxr: {
17478 Type *ValTy =
I.getParamElementType(1);
17481 Info.ptrVal =
I.getArgOperand(1);
17483 Info.align =
DL.getABITypeAlign(ValTy);
17487 case Intrinsic::aarch64_ldaxp:
17488 case Intrinsic::aarch64_ldxp:
17490 Info.memVT = MVT::i128;
17491 Info.ptrVal =
I.getArgOperand(0);
17493 Info.align =
Align(16);
17496 case Intrinsic::aarch64_stlxp:
17497 case Intrinsic::aarch64_stxp:
17499 Info.memVT = MVT::i128;
17500 Info.ptrVal =
I.getArgOperand(2);
17502 Info.align =
Align(16);
17505 case Intrinsic::aarch64_sve_ldnt1: {
17509 Info.ptrVal =
I.getArgOperand(1);
17511 Info.align =
DL.getABITypeAlign(ElTy);
17515 case Intrinsic::aarch64_sve_stnt1: {
17519 Info.memVT =
MVT::getVT(
I.getOperand(0)->getType());
17520 Info.ptrVal =
I.getArgOperand(2);
17522 Info.align =
DL.getABITypeAlign(ElTy);
17526 case Intrinsic::aarch64_mops_memset_tag: {
17527 Value *Dst =
I.getArgOperand(0);
17528 Value *Val =
I.getArgOperand(1);
17533 Info.align =
I.getParamAlign(0).valueOrOne();
17548 std::optional<unsigned> ByteOffset)
const {
17565 Base.getOperand(1).hasOneUse() &&
17572 uint64_t ShiftAmount =
Base.getOperand(1).getConstantOperandVal(1);
17574 if (ShiftAmount ==
Log2_32(LoadBytes))
17584 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->
use_size()) {
17603 return NumBits1 > NumBits2;
17610 return NumBits1 > NumBits2;
17617 if (
I->getOpcode() != Instruction::FMul)
17620 if (!
I->hasOneUse())
17625 if (!(
User->getOpcode() == Instruction::FSub ||
17626 User->getOpcode() == Instruction::FAdd))
17637 I->getFastMathFlags().allowContract()));
17647 return NumBits1 == 32 && NumBits2 == 64;
17654 return NumBits1 == 32 && NumBits2 == 64;
17672bool AArch64TargetLowering::isExtFreeImpl(
const Instruction *Ext)
const {
17680 for (
const Use &U : Ext->
uses()) {
17688 switch (Instr->getOpcode()) {
17689 case Instruction::Shl:
17693 case Instruction::GetElementPtr: {
17696 std::advance(GTI, U.getOperandNo()-1);
17709 if (ShiftAmt == 0 || ShiftAmt > 4)
17713 case Instruction::Trunc:
17730 unsigned NumElts,
bool IsLittleEndian,
17732 if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth > 64)
17735 assert(DstWidth % SrcWidth == 0 &&
17736 "TBL lowering is not supported for a conversion instruction with this "
17737 "source and destination element type.");
17739 unsigned Factor = DstWidth / SrcWidth;
17740 unsigned MaskLen = NumElts * Factor;
17743 Mask.resize(MaskLen, NumElts);
17745 unsigned SrcIndex = 0;
17746 for (
unsigned I = IsLittleEndian ? 0 : Factor - 1;
I < MaskLen;
I += Factor)
17747 Mask[
I] = SrcIndex++;
17755 bool IsLittleEndian) {
17757 unsigned NumElts = SrcTy->getNumElements();
17765 auto *FirstEltZero = Builder.CreateInsertElement(
17767 Value *Result = Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
17768 Result = Builder.CreateBitCast(Result, DstTy);
17769 if (DstTy != ZExtTy)
17770 Result = Builder.CreateZExt(Result, ZExtTy);
17776 bool IsLittleEndian) {
17783 !IsLittleEndian, Mask))
17786 auto *FirstEltZero = Builder.CreateInsertElement(
17789 return Builder.CreateShuffleVector(
Op, FirstEltZero, Mask);
17798 assert(SrcTy->getElementType()->isIntegerTy() &&
17799 "Non-integer type source vector element is not supported");
17800 assert(DstTy->getElementType()->isIntegerTy(8) &&
17801 "Unsupported destination vector element type");
17802 unsigned SrcElemTySz =
17804 unsigned DstElemTySz =
17806 assert((SrcElemTySz % DstElemTySz == 0) &&
17807 "Cannot lower truncate to tbl instructions for a source element size "
17808 "that is not divisible by the destination element size");
17809 unsigned TruncFactor = SrcElemTySz / DstElemTySz;
17810 assert((SrcElemTySz == 16 || SrcElemTySz == 32 || SrcElemTySz == 64) &&
17811 "Unsupported source vector element type size");
17819 for (
int Itr = 0; Itr < 16; Itr++) {
17820 if (Itr < NumElements)
17822 IsLittleEndian ? Itr * TruncFactor
17823 : Itr * TruncFactor + (TruncFactor - 1)));
17825 MaskConst.
push_back(Builder.getInt8(255));
17828 int MaxTblSz = 128 * 4;
17829 int MaxSrcSz = SrcElemTySz * NumElements;
17831 (MaxTblSz > MaxSrcSz) ? NumElements : (MaxTblSz / SrcElemTySz);
17832 assert(ElemsPerTbl <= 16 &&
17833 "Maximum elements selected using TBL instruction cannot exceed 16!");
17835 int ShuffleCount = 128 / SrcElemTySz;
17837 for (
int i = 0; i < ShuffleCount; ++i)
17844 while (ShuffleLanes.
back() < NumElements) {
17846 Builder.CreateShuffleVector(TI->
getOperand(0), ShuffleLanes), VecTy));
17848 if (Parts.
size() == 4) {
17851 Builder.CreateIntrinsic(Intrinsic::aarch64_neon_tbl4, VecTy, Parts));
17855 for (
int i = 0; i < ShuffleCount; ++i)
17856 ShuffleLanes[i] += ShuffleCount;
17860 "Lowering trunc for vectors requiring different TBL instructions is "
17864 if (!Parts.
empty()) {
17866 switch (Parts.
size()) {
17868 TblID = Intrinsic::aarch64_neon_tbl1;
17871 TblID = Intrinsic::aarch64_neon_tbl2;
17874 TblID = Intrinsic::aarch64_neon_tbl3;
17879 Results.push_back(Builder.CreateIntrinsic(TblID, VecTy, Parts));
17884 assert(
Results.size() <= 2 &&
"Trunc lowering does not support generation of "
17885 "more than 2 tbl instructions!");
17888 if (ElemsPerTbl < 16) {
17890 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
17891 FinalResult = Builder.CreateShuffleVector(
Results[0], FinalMask);
17895 if (ElemsPerTbl < 16) {
17896 std::iota(FinalMask.
begin(), FinalMask.
begin() + ElemsPerTbl, 0);
17897 std::iota(FinalMask.
begin() + ElemsPerTbl, FinalMask.
end(), 16);
17899 std::iota(FinalMask.
begin(), FinalMask.
end(), 0);
17913 if (!
EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors())
17921 if (!L || L->getHeader() !=
I->getParent() ||
F->hasOptSize())
17926 if (!SrcTy || !DstTy)
17933 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
17934 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
17935 if (DstWidth % 8 != 0)
17938 auto *TruncDstType =
17942 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
17943 if (
TTI.getCastInstrCost(
I->getOpcode(), DstTy, TruncDstType,
17946 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
17949 DstTy = TruncDstType;
17957 if (SrcWidth * 4 <= DstWidth) {
17958 if (
all_of(
I->users(), [&](
auto *U) {
17959 using namespace llvm::PatternMatch;
17960 auto *SingleUser = cast<Instruction>(&*U);
17961 if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
17963 if (match(SingleUser,
17964 m_Intrinsic<Intrinsic::vector_partial_reduce_add>(
17965 m_Value(), m_Specific(I))))
17972 if (DstTy->getScalarSizeInBits() >= 64)
17978 DstTy, Subtarget->isLittleEndian());
17981 ZExt->replaceAllUsesWith(Result);
17982 ZExt->eraseFromParent();
17987 if (UIToFP && ((SrcTy->getElementType()->isIntegerTy(8) &&
17988 DstTy->getElementType()->isFloatTy()) ||
17989 (SrcTy->getElementType()->isIntegerTy(16) &&
17990 DstTy->getElementType()->isDoubleTy()))) {
17995 assert(ZExt &&
"Cannot fail for the i8 to float conversion");
17996 auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
17997 I->replaceAllUsesWith(UI);
17998 I->eraseFromParent();
18003 if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
18004 DstTy->getElementType()->isFloatTy()) {
18008 Subtarget->isLittleEndian());
18009 assert(Shuffle &&
"Cannot fail for the i8 to float conversion");
18011 auto *AShr = Builder.CreateAShr(Cast, 24,
"",
true);
18012 auto *
SI = Builder.CreateSIToFP(AShr, DstTy);
18013 I->replaceAllUsesWith(
SI);
18014 I->eraseFromParent();
18022 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
18023 SrcTy->getElementType()->isFloatTy() &&
18024 DstTy->getElementType()->isIntegerTy(8)) {
18026 auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0),
18028 auto *TruncI = Builder.CreateTrunc(WideConv, DstTy);
18029 I->replaceAllUsesWith(TruncI);
18030 I->eraseFromParent();
18040 if (TI && DstTy->getElementType()->isIntegerTy(8) &&
18041 ((SrcTy->getElementType()->isIntegerTy(32) ||
18042 SrcTy->getElementType()->isIntegerTy(64)) &&
18043 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) {
18052 Align &RequiredAlignment)
const {
18057 RequiredAlignment =
Align(1);
18059 return NumBits == 32 || NumBits == 64;
18066 unsigned VecSize = 128;
18070 VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
18071 return std::max<unsigned>(1, (MinElts * ElSize + 127) / VecSize);
18076 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
18086 unsigned MinElts = EC.getKnownMinValue();
18088 UseScalable =
false;
18091 (!Subtarget->useSVEForFixedLengthVectors() ||
18096 !Subtarget->isSVEorStreamingSVEAvailable())
18104 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
18107 if (EC.isScalable()) {
18108 UseScalable =
true;
18109 return isPowerOf2_32(MinElts) && (MinElts * ElSize) % 128 == 0;
18112 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
18113 if (Subtarget->useSVEForFixedLengthVectors()) {
18114 unsigned MinSVEVectorSize =
18115 std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
18116 if (VecSize % MinSVEVectorSize == 0 ||
18118 (!Subtarget->isNeonAvailable() || VecSize > 128))) {
18119 UseScalable =
true;
18126 return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
18158 bool Scalable,
Type *LDVTy,
18160 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
18161 static const Intrinsic::ID SVELoads[3] = {Intrinsic::aarch64_sve_ld2_sret,
18162 Intrinsic::aarch64_sve_ld3_sret,
18163 Intrinsic::aarch64_sve_ld4_sret};
18164 static const Intrinsic::ID NEONLoads[3] = {Intrinsic::aarch64_neon_ld2,
18165 Intrinsic::aarch64_neon_ld3,
18166 Intrinsic::aarch64_neon_ld4};
18175 bool Scalable,
Type *STVTy,
18177 assert(Factor >= 2 && Factor <= 4 &&
"Invalid interleave factor");
18178 static const Intrinsic::ID SVEStores[3] = {Intrinsic::aarch64_sve_st2,
18179 Intrinsic::aarch64_sve_st3,
18180 Intrinsic::aarch64_sve_st4};
18181 static const Intrinsic::ID NEONStores[3] = {Intrinsic::aarch64_neon_st2,
18182 Intrinsic::aarch64_neon_st3,
18183 Intrinsic::aarch64_neon_st4};
18206 "Invalid interleave factor");
18207 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
18209 "Unmatched number of shufflevectors and indices");
18214 assert(!Mask && GapMask.
popcount() == Factor &&
"Unexpected mask on a load");
18233 SI->getType()->getScalarSizeInBits() * 4 ==
18234 SI->user_back()->getType()->getScalarSizeInBits();
18244 Type *EltTy = FVTy->getElementType();
18252 FVTy->getNumElements() / NumLoads);
18260 Value *BaseAddr = LI->getPointerOperand();
18262 Type *PtrTy = LI->getPointerOperandType();
18264 LDVTy->getElementCount());
18267 UseScalable, LDVTy, PtrTy);
18274 Value *PTrue =
nullptr;
18276 std::optional<unsigned> PgPattern =
18278 if (Subtarget->getMinSVEVectorSizeInBits() ==
18279 Subtarget->getMaxSVEVectorSizeInBits() &&
18280 Subtarget->getMinSVEVectorSizeInBits() ==
DL.getTypeSizeInBits(FVTy))
18281 PgPattern = AArch64SVEPredPattern::all;
18285 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18289 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
18294 BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
18295 FVTy->getNumElements() * Factor);
18299 LdN = Builder.CreateCall(LdNFunc, {PTrue, BaseAddr},
"ldN");
18301 LdN = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18304 for (
unsigned i = 0; i < Shuffles.
size(); i++) {
18306 unsigned Index = Indices[i];
18308 Value *SubVec = Builder.CreateExtractValue(LdN, Index);
18311 SubVec = Builder.CreateExtractVector(FVTy, SubVec,
uint64_t(0));
18315 SubVec = Builder.CreateIntToPtr(
18317 FVTy->getNumElements()));
18319 SubVecs[SVI].push_back(SubVec);
18328 auto &SubVec = SubVecs[SVI];
18331 SVI->replaceAllUsesWith(WideVec);
18337template <
typename Iter>
18339 int MaxLookupDist = 20;
18340 unsigned IdxWidth =
DL.getIndexSizeInBits(0);
18341 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
18342 const Value *PtrA1 =
18345 while (++It != End) {
18346 if (It->isDebugOrPseudoInst())
18348 if (MaxLookupDist-- == 0)
18351 const Value *PtrB1 =
18352 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets(
18354 if (PtrA1 == PtrB1 &&
18355 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.
sextOrTrunc(IdxWidth))
18394 const APInt &GapMask)
const {
18397 "Invalid interleave factor");
18402 "Unexpected mask on store");
18405 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
18407 unsigned LaneLen = VecTy->getNumElements() / Factor;
18408 Type *EltTy = VecTy->getElementType();
18429 Type *IntTy =
DL.getIntPtrType(EltTy);
18430 unsigned NumOpElts =
18435 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
18436 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
18443 LaneLen /= NumStores;
18450 Value *BaseAddr =
SI->getPointerOperand();
18464 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 &&
18472 Type *PtrTy =
SI->getPointerOperandType();
18474 STVTy->getElementCount());
18477 UseScalable, STVTy, PtrTy);
18479 Value *PTrue =
nullptr;
18481 std::optional<unsigned> PgPattern =
18483 if (Subtarget->getMinSVEVectorSizeInBits() ==
18484 Subtarget->getMaxSVEVectorSizeInBits() &&
18485 Subtarget->getMinSVEVectorSizeInBits() ==
18486 DL.getTypeSizeInBits(SubVecTy))
18487 PgPattern = AArch64SVEPredPattern::all;
18491 PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
18495 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
18500 for (
unsigned i = 0; i < Factor; i++) {
18502 unsigned IdxI = StoreCount * LaneLen * Factor + i;
18503 if (Mask[IdxI] >= 0) {
18504 Shuffle = Builder.CreateShuffleVector(
18507 unsigned StartMask = 0;
18508 for (
unsigned j = 1; j < LaneLen; j++) {
18509 unsigned IdxJ = StoreCount * LaneLen * Factor + j * Factor + i;
18510 if (Mask[IdxJ] >= 0) {
18511 StartMask = Mask[IdxJ] - j;
18520 Shuffle = Builder.CreateShuffleVector(
18528 Ops.push_back(Shuffle);
18532 Ops.push_back(PTrue);
18536 if (StoreCount > 0)
18537 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
18538 BaseAddr, LaneLen * Factor);
18540 Ops.push_back(BaseAddr);
18541 Builder.CreateCall(StNFunc,
Ops);
18549 if (Factor != 2 && Factor != 3 && Factor != 4) {
18550 LLVM_DEBUG(
dbgs() <<
"Matching ld2, ld3 and ld4 patterns failed\n");
18556 assert(!Mask &&
"Unexpected mask on a load\n");
18560 const DataLayout &
DL = LI->getModule()->getDataLayout();
18575 Type *PtrTy = LI->getPointerOperandType();
18577 UseScalable, LdTy, PtrTy);
18580 Value *Pred =
nullptr;
18583 Builder.CreateVectorSplat(LdTy->
getElementCount(), Builder.getTrue());
18585 Value *BaseAddr = LI->getPointerOperand();
18586 Value *Result =
nullptr;
18587 if (NumLoads > 1) {
18590 for (
unsigned I = 0;
I < NumLoads; ++
I) {
18594 Value *LdN =
nullptr;
18596 LdN = Builder.CreateCall(LdNFunc, {Pred,
Address},
"ldN");
18598 LdN = Builder.CreateCall(LdNFunc,
Address,
"ldN");
18601 for (
unsigned J = 0; J < Factor; ++J) {
18602 ExtractedLdValues[J] = Builder.CreateInsertVector(
18603 VTy, ExtractedLdValues[J], Builder.CreateExtractValue(LdN, J), Idx);
18610 for (
unsigned J = 0; J < Factor; ++J)
18611 Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J);
18614 Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr},
"ldN");
18616 Result = Builder.CreateCall(LdNFunc, BaseAddr,
"ldN");
18627 unsigned Factor = InterleavedValues.
size();
18628 if (Factor != 2 && Factor != 3 && Factor != 4) {
18629 LLVM_DEBUG(
dbgs() <<
"Matching st2, st3 and st4 patterns failed\n");
18635 assert(!Mask &&
"Unexpected mask on plain store");
18655 Type *PtrTy =
SI->getPointerOperandType();
18657 UseScalable, StTy, PtrTy);
18661 Value *BaseAddr =
SI->getPointerOperand();
18662 Value *Pred =
nullptr;
18666 Builder.CreateVectorSplat(StTy->
getElementCount(), Builder.getTrue());
18668 auto ExtractedValues = InterleavedValues;
18673 for (
unsigned I = 0;
I < NumStores; ++
I) {
18675 if (NumStores > 1) {
18680 for (
unsigned J = 0; J < Factor; J++) {
18682 Builder.CreateExtractVector(StTy, ExtractedValues[J], Idx);
18685 StoreOperands[StoreOperands.
size() - 1] =
Address;
18687 Builder.CreateCall(StNFunc, StoreOperands);
18694 const AttributeList &FuncAttributes)
const {
18695 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
18696 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
18697 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
18701 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
18702 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
18703 if (
Op.isAligned(AlignCheck))
18711 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
18712 AlignmentIsAcceptable(MVT::v16i8,
Align(16)))
18714 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
18716 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
18718 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
18724 const MemOp &
Op,
const AttributeList &FuncAttributes)
const {
18725 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
18726 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
18727 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
18731 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
18732 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
18733 if (
Op.isAligned(AlignCheck))
18741 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
18742 AlignmentIsAcceptable(MVT::v2i64,
Align(16)))
18744 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128,
Align(16)))
18746 if (
Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64,
Align(8)))
18748 if (
Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32,
Align(4)))
18755 if (Immed == std::numeric_limits<int64_t>::min()) {
18764 if (!Subtarget->hasSVE2())
18783 return std::abs(Imm / 8) <= 16;
18786 return std::abs(Imm / 4) <= 16;
18789 return std::abs(Imm / 2) <= 16;
18816 if (Insn.
size() > 1)
18853 if (AM.
Scale == 1) {
18856 }
else if (AM.
Scale == 2) {
18868 if (Ty->isScalableTy()) {
18874 uint64_t VecNumBytes =
DL.getTypeSizeInBits(Ty).getKnownMinValue() / 8;
18896 if (Ty->isSized()) {
18897 uint64_t NumBits =
DL.getTypeSizeInBits(Ty);
18898 NumBytes = NumBits / 8;
18903 return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.
BaseOffs,
18911 int64_t MaxOffset)
const {
18912 int64_t HighPart = MinOffset & ~0xfffULL;
18935 return Subtarget->hasFullFP16();
18941 Subtarget->isNonStreamingSVEorSME2Available();
18951 switch (Ty->getScalarType()->getTypeID()) {
18971 static const MCPhysReg ScratchRegs[] = {
18972 AArch64::X16, AArch64::X17, AArch64::LR, 0
18974 return ScratchRegs;
18978 static const MCPhysReg RCRegs[] = {AArch64::FPCR};
18987 "Expected shift op");
18989 SDValue ShiftLHS =
N->getOperand(0);
18990 EVT VT =
N->getValueType(0);
19011 return SRLC->getZExtValue() == SHLC->getZExtValue();
19023 (
N->getOperand(0).getOpcode() ==
ISD::SHL ||
19024 N->getOperand(0).getOpcode() ==
ISD::SRL) &&
19025 "Expected XOR(SHIFT) pattern");
19030 if (XorC && ShiftC) {
19031 unsigned MaskIdx, MaskLen;
19032 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
19033 unsigned ShiftAmt = ShiftC->getZExtValue();
19034 unsigned BitWidth =
N->getValueType(0).getScalarSizeInBits();
19035 if (
N->getOperand(0).getOpcode() ==
ISD::SHL)
19036 return MaskIdx == ShiftAmt && MaskLen == (
BitWidth - ShiftAmt);
19037 return MaskIdx == 0 && MaskLen == (
BitWidth - ShiftAmt);
19047 N->getOperand(0).getOpcode() ==
ISD::SRL) ||
19049 N->getOperand(0).getOpcode() ==
ISD::SHL)) &&
19050 "Expected shift-shift mask");
19052 if (!
N->getOperand(0)->hasOneUse())
19056 EVT VT =
N->getValueType(0);
19057 if (
N->getOpcode() ==
ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
19060 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
19065 if (
N->getOpcode() ==
ISD::SHL &&
N->hasOneUse()) {
19067 unsigned ShlAmt = C2->getZExtValue();
19068 if (
auto ShouldADD = *
N->user_begin();
19069 ShouldADD->getOpcode() ==
ISD::ADD && ShouldADD->hasOneUse()) {
19071 EVT MemVT = Load->getMemoryVT();
19073 if (Load->getValueType(0).isScalableVector())
19087 unsigned BinOpcode,
EVT VT,
unsigned SelectOpcode,
SDValue X,
19095 assert(Ty->isIntegerTy());
19097 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19101 int64_t Val = Imm.getSExtValue();
19108 Val &= (1LL << 32) - 1;
19116 unsigned Index)
const {
19138 EVT VT =
N->getValueType(0);
19139 if (!Subtarget->hasNEON() || !VT.
isVector())
19153 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.
getSizeInBits() - 1)
19188 if (
N->getValueType(0) != MVT::i32)
19191 SDValue VecReduceOp0 =
N->getOperand(0);
19192 bool SawTrailingZext =
false;
19198 SawTrailingZext =
true;
19203 MVT AbsInputVT = SawTrailingZext ? MVT::v16i16 : MVT::v16i32;
19205 unsigned Opcode = VecReduceOp0.
getOpcode();
19211 if (ABS->getOperand(0)->getOpcode() !=
ISD::SUB ||
19212 ABS->getOperand(0)->getValueType(0) != AbsInputVT)
19215 SDValue SUB = ABS->getOperand(0);
19216 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
19217 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
19219 if (SUB->getOperand(0)->getValueType(0) != AbsInputVT ||
19220 SUB->getOperand(1)->getValueType(0) != AbsInputVT)
19224 bool IsZExt =
false;
19232 SDValue EXT0 = SUB->getOperand(0);
19233 SDValue EXT1 = SUB->getOperand(1);
19250 UABDHigh8Op0, UABDHigh8Op1);
19261 UABDLo8Op0, UABDLo8Op1);
19269 return DAG.
getNode(ISD::VECREDUCE_ADD,
DL, MVT::i32, UADDLP);
19282 if (!
N->getValueType(0).isScalableVector() ||
19283 (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming())))
19288 return Use->getOpcode() == ISD::EXTRACT_SUBVECTOR;
19291 auto MaskEC =
N->getValueType(0).getVectorElementCount();
19292 if (!MaskEC.isKnownMultipleOf(NumExts))
19306 if (
Use->getValueType(0).getVectorElementCount() != ExtMinEC)
19310 unsigned Offset =
Use->getConstantOperandVal(1);
19312 if (Extracts[Part] !=
nullptr)
19315 Extracts[Part] =
Use;
19331 EVT ExtVT = Extracts[0]->getValueType(0);
19335 DCI.
CombineTo(Extracts[0], R.getValue(0));
19336 DCI.
CombineTo(Extracts[1], R.getValue(1));
19340 if (NumExts == 2) {
19341 assert(
N->getValueType(0) == DoubleExtVT);
19347 for (
unsigned I = 2;
I < NumExts;
I += 2) {
19352 DCI.
CombineTo(Extracts[
I + 1], R.getValue(1));
19354 R.getValue(0), R.getValue(1)));
19368 if (!ST->isNeonAvailable())
19371 if (!ST->hasDotProd())
19382 unsigned DotOpcode;
19386 if (
A.getOperand(0).getValueType() !=
B.getOperand(0).getValueType())
19388 auto OpCodeA =
A.getOpcode();
19392 auto OpCodeB =
B.getOpcode();
19396 if (OpCodeA == OpCodeB) {
19401 if (!ST->hasMatMulInt8())
19403 DotOpcode = AArch64ISD::USDOT;
19408 DotOpcode = AArch64ISD::UDOT;
19410 DotOpcode = AArch64ISD::SDOT;
19415 EVT Op0VT =
A.getOperand(0).getValueType();
19418 if (!IsValidElementCount || !IsValidSize)
19427 B =
B.getOperand(0);
19430 unsigned NumOfVecReduce;
19432 if (IsMultipleOf16) {
19434 TargetType = MVT::v4i32;
19437 TargetType = MVT::v2i32;
19440 if (NumOfVecReduce == 1) {
19443 A.getOperand(0),
B);
19444 return DAG.
getNode(ISD::VECREDUCE_ADD,
DL,
N->getValueType(0), Dot);
19450 for (;
I < VecReduce16Num;
I += 1) {
19467 DAG.
getNode(ISD::VECREDUCE_ADD,
DL,
N->getValueType(0), ConcatSDot16);
19469 if (VecReduce8Num == 0)
19470 return VecReduceAdd16;
19482 DAG.
getNode(ISD::VECREDUCE_ADD,
DL,
N->getValueType(0), Dot);
19492 auto DetectAddExtract = [&](
SDValue A) {
19496 EVT VT =
A.getValueType();
19521 : AArch64ISD::SADDLP;
19525 if (
SDValue R = DetectAddExtract(
A))
19528 if (
A.getOperand(0).getOpcode() ==
ISD::ADD &&
A.getOperand(0).hasOneUse())
19532 if (
A.getOperand(1).getOpcode() ==
ISD::ADD &&
A.getOperand(1).hasOneUse())
19545 EVT VT =
A.getValueType();
19546 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
19557 if (ExtVT0 != ExtVT1 ||
19572 return DAG.
getNode(AArch64ISD::NVCAST,
SDLoc(
A), MVT::v8i16, Uaddlv);
19589 MVT OpVT =
A.getSimpleValueType();
19590 assert(
N->getSimpleValueType(0) == OpVT &&
19591 "The operand type should be consistent with the result type of UADDV");
19595 if (KnownLeadingLanes.
isZero())
19611AArch64TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
19618 EVT VT =
N->getValueType(0);
19623 if (VT.
isVector() && Subtarget->isSVEorStreamingSVEAvailable())
19627 if ((VT != MVT::i32 && VT != MVT::i64) ||
19633 if (Divisor == 2 ||
19634 Divisor == APInt(Divisor.
getBitWidth(), -2,
true))
19641AArch64TargetLowering::BuildSREMPow2(
SDNode *
N,
const APInt &Divisor,
19648 EVT VT =
N->getValueType(0);
19656 if ((VT != MVT::i32 && VT != MVT::i64) ||
19672 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT,
And,
And, CCVal, Cmp);
19683 CSNeg = DAG.
getNode(AArch64ISD::CSNEG,
DL, VT, AndPos, AndNeg, CCVal,
19698 case Intrinsic::aarch64_sve_cntb:
19699 case Intrinsic::aarch64_sve_cnth:
19700 case Intrinsic::aarch64_sve_cntw:
19701 case Intrinsic::aarch64_sve_cntd:
19711 if (IID == Intrinsic::aarch64_sve_cntp)
19712 return Op.getOperand(1).getValueType().getVectorElementCount();
19714 case Intrinsic::aarch64_sve_cntd:
19716 case Intrinsic::aarch64_sve_cntw:
19718 case Intrinsic::aarch64_sve_cnth:
19720 case Intrinsic::aarch64_sve_cntb:
19723 return std::nullopt;
19750 return TypeNode->
getVT();
19760 if (Mask == UCHAR_MAX)
19762 else if (Mask == USHRT_MAX)
19764 else if (Mask == UINT_MAX)
19786 unsigned ExtendOpcode = Extend.
getOpcode();
19802 if (PreExtendType == MVT::Other ||
19807 bool SeenZExtOrSExt = !IsAnyExt;
19815 unsigned Opc =
Op.getOpcode();
19822 if (SeenZExtOrSExt && OpcIsSExt != IsSExt)
19825 IsSExt = OpcIsSExt;
19826 SeenZExtOrSExt =
true;
19833 EVT PreExtendLegalType =
19839 PreExtendLegalType));
19849 unsigned ExtOpc = !SeenZExtOrSExt
19852 return DAG.
getNode(ExtOpc,
DL, VT, NBV);
19859 EVT VT =
Mul->getValueType(0);
19860 if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
19871 return DAG.
getNode(
Mul->getOpcode(),
DL, VT, Op0 ? Op0 :
Mul->getOperand(0),
19872 Op1 ? Op1 :
Mul->getOperand(1));
19887 EVT VT =
Mul->getValueType(0);
19889 int ConstMultiplier =
19895 unsigned AbsConstValue =
abs(ConstMultiplier);
19896 unsigned OperandShift =
19905 unsigned B = ConstMultiplier < 0 ? 32 : 31;
19906 unsigned CeilAxOverB = (AbsConstValue + (
B - 1)) /
B;
19910 if (LowerBound > UpperBound)
19915 int Shift = std::min(std::max( 0, LowerBound), UpperBound);
19918 int32_t RdsvlMul = (AbsConstValue >> (OperandShift + Shift)) *
19919 (ConstMultiplier < 0 ? -1 : 1);
19920 auto Rdsvl = DAG.
getNode(AArch64ISD::RDSVL,
DL, MVT::i64,
19933 EVT VT =
N->getValueType(0);
19934 if (VT != MVT::v2i64 && VT != MVT::v1i64 && VT != MVT::v2i32 &&
19935 VT != MVT::v4i32 && VT != MVT::v4i16 && VT != MVT::v8i16)
19937 if (
N->getOperand(0).getOpcode() !=
ISD::AND ||
19938 N->getOperand(0).getOperand(0).getOpcode() !=
ISD::SRL)
19951 if (!V1.
isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
19952 V3 != (HalfSize - 1))
19963 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, CM);
19971 EVT VT =
N->getValueType(0);
19977 N->getOperand(0).getOperand(0).getValueType() !=
19978 N->getOperand(1).getOperand(0).getValueType())
19982 N->getOperand(0).getOpcode() !=
N->getOperand(1).getOpcode())
19985 SDValue N0 =
N->getOperand(0).getOperand(0);
19986 SDValue N1 =
N->getOperand(1).getOperand(0);
19991 if ((S2 == MVT::i32 &&
S1 == MVT::i8) ||
19992 (S2 == MVT::i64 && (
S1 == MVT::i8 ||
S1 == MVT::i16))) {
20025 EVT VT =
N->getValueType(0);
20029 unsigned AddSubOpc;
20031 auto IsAddSubWith1 = [&](
SDValue V) ->
bool {
20032 AddSubOpc = V->getOpcode();
20044 if (IsAddSubWith1(N0)) {
20046 return DAG.
getNode(AddSubOpc,
DL, VT, N1, MulVal);
20049 if (IsAddSubWith1(N1)) {
20051 return DAG.
getNode(AddSubOpc,
DL, VT, N0, MulVal);
20062 const APInt &ConstValue =
C->getAPIntValue();
20069 if (ConstValue.
sge(1) && ConstValue.
sle(16))
20084 unsigned TrailingZeroes = ConstValue.
countr_zero();
20085 if (TrailingZeroes) {
20093 if (
N->hasOneUse() && (
N->user_begin()->getOpcode() ==
ISD::ADD ||
20094 N->user_begin()->getOpcode() ==
ISD::SUB))
20099 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
20102 auto Shl = [&](
SDValue N0,
unsigned N1) {
20133 for (
unsigned i = 1; i <
BitWidth / 2; i++) {
20153 unsigned TrailingZeroes = CVMinus1.
countr_zero();
20154 APInt SCVMinus1 = CVMinus1.
ashr(TrailingZeroes) - 1;
20170 unsigned TrailingZeroes = CVMinus1.
countr_zero();
20171 APInt CVPlus1 = CVMinus1.
ashr(TrailingZeroes) + 1;
20191 APInt SCVMinus1 = ShiftedConstValue - 1;
20192 APInt SCVPlus1 = ShiftedConstValue + 1;
20193 APInt CVPlus1 = ConstValue + 1;
20197 return Shl(
Add(Shl(N0, ShiftAmt), N0), TrailingZeroes);
20200 return Sub(Shl(N0, ShiftAmt), N0);
20202 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
20203 return Sub(Shl(N0, ShiftAmt), Shl(N0, TrailingZeroes));
20205 if (Subtarget->hasALULSLFast() &&
20206 isPowPlusPlusConst(ConstValue, CVM, CVN)) {
20207 APInt CVMMinus1 = CVM - 1;
20208 APInt CVNMinus1 = CVN - 1;
20209 unsigned ShiftM1 = CVMMinus1.
logBase2();
20210 unsigned ShiftN1 = CVNMinus1.
logBase2();
20212 if (ShiftM1 <= 4 && ShiftN1 <= 4) {
20214 return Add(Shl(MVal, ShiftN1), MVal);
20217 if (Subtarget->hasALULSLFast() &&
20218 isPowPlusPlusOneConst(ConstValue, CVM, CVN)) {
20222 if (ShiftM <= 4 && ShiftN <= 4) {
20228 if (Subtarget->hasALULSLFast() &&
20229 isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
20233 if (ShiftM <= 4 && ShiftN <= 4) {
20242 APInt SCVPlus1 = -ShiftedConstValue + 1;
20243 APInt CVNegPlus1 = -ConstValue + 1;
20244 APInt CVNegMinus1 = -ConstValue - 1;
20247 return Sub(N0, Shl(N0, ShiftAmt));
20249 ShiftAmt = CVNegMinus1.
logBase2();
20250 return Negate(
Add(Shl(N0, ShiftAmt), N0));
20252 ShiftAmt = SCVPlus1.
logBase2() + TrailingZeroes;
20253 return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt));
20273 EVT VT =
N->getValueType(0);
20275 N->getOperand(0)->getOperand(0)->getOpcode() !=
ISD::SETCC ||
20276 VT.
getSizeInBits() !=
N->getOperand(0)->getValueType(0).getSizeInBits())
20286 if (!BV->isConstant())
20291 EVT IntVT = BV->getValueType(0);
20298 N->getOperand(0)->getOperand(0), MaskConst);
20312 if (
N->isStrictFPOpcode())
20323 return !VT.
isVector() && VT != MVT::bf16 && VT != MVT::f128;
20326 SDValue SrcVal =
N->getOperand(0);
20328 EVT DestTy =
N->getValueType(0);
20335 if (DestTy.
bitsGT(SrcTy)) {
20344 if (SrcVecTy == MVT::nxv2i32 || DestVecTy == MVT::nxv2i32)
20350 DAG.
getUNDEF(SrcVecTy), SrcVal, ZeroIdx);
20367 EVT VT =
N->getValueType(0);
20368 if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64)
20370 if (VT == MVT::f16 && !Subtarget->hasFullFP16())
20374 if (VT.
getSizeInBits() !=
N->getOperand(0).getValueSizeInBits())
20395 (
N->getOpcode() ==
ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
20414 if (!
N->getValueType(0).isSimple())
20418 if (!
Op.getValueType().isSimple() ||
Op.getOpcode() !=
ISD::FMUL)
20421 if (!
Op.getValueType().is64BitVector() && !
Op.getValueType().is128BitVector())
20428 MVT FloatTy =
Op.getSimpleValueType().getVectorElementType();
20430 if (FloatBits != 32 && FloatBits != 64 &&
20431 (FloatBits != 16 || !Subtarget->hasFullFP16()))
20434 MVT IntTy =
N->getSimpleValueType(0).getVectorElementType();
20435 uint32_t IntBits = IntTy.getSizeInBits();
20436 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
20440 if (IntBits > FloatBits)
20445 int32_t Bits = IntBits == 64 ? 64 : 32;
20447 if (
C == -1 ||
C == 0 ||
C > Bits)
20450 EVT ResTy =
Op.getValueType().changeVectorElementTypeToInteger();
20464 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
20465 : Intrinsic::aarch64_neon_vcvtfp2fxu;
20471 if (IntBits < FloatBits)
20488 EVT VT =
N->getValueType(0);
20492 if (CSel0.
getOpcode() != AArch64ISD::CSEL ||
20511 if (Cmp1.
getOpcode() != AArch64ISD::SUBS &&
20512 Cmp0.
getOpcode() == AArch64ISD::SUBS) {
20517 if (Cmp1.
getOpcode() != AArch64ISD::SUBS)
20537 if (Op1 && Op1->getAPIntValue().isNegative() &&
20538 Op1->getAPIntValue().sgt(-32)) {
20545 AbsOp1, NZCVOp, Condition, Cmp0);
20548 Cmp1.
getOperand(1), NZCVOp, Condition, Cmp0);
20572 MaskForTy = 0xffull;
20575 MaskForTy = 0xffffull;
20578 MaskForTy = 0xffffffffull;
20587 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
20595 while (
Op.getOpcode() == AArch64ISD::REINTERPRET_CAST &&
20597 Op =
Op->getOperand(0);
20607 unsigned Opc = Src->getOpcode();
20610 if (
Opc == AArch64ISD::UUNPKHI ||
Opc == AArch64ISD::UUNPKLO) {
20624 auto MaskAndTypeMatch = [ExtVal](
EVT VT) ->
bool {
20625 return ((ExtVal == 0xFF && VT == MVT::i8) ||
20626 (ExtVal == 0xFFFF && VT == MVT::i16) ||
20627 (ExtVal == 0xFFFFFFFF && VT == MVT::i32));
20633 if (MaskAndTypeMatch(EltTy))
20639 if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() ==
ISD::ZEXTLOAD ||
20642 if (MaskAndTypeMatch(EltTy))
20666 return N->getOperand(1);
20668 return N->getOperand(0);
20675 if (!Src.hasOneUse())
20683 case AArch64ISD::LD1_MERGE_ZERO:
20684 case AArch64ISD::LDNF1_MERGE_ZERO:
20685 case AArch64ISD::LDFF1_MERGE_ZERO:
20688 case AArch64ISD::GLD1_MERGE_ZERO:
20689 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
20690 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
20691 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
20692 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
20693 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
20694 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
20695 case AArch64ISD::GLDFF1_MERGE_ZERO:
20696 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
20697 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
20698 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
20699 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
20700 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
20701 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
20702 case AArch64ISD::GLDNT1_MERGE_ZERO:
20723 EVT VT =
N->getValueType(0);
20729 for (
auto U :
N->users())
20760 EVT VT =
N->getValueType(0);
20800 DefBits = ~(DefBits | ZeroSplat);
20807 UndefBits = ~(UndefBits | ZeroSplat);
20809 UndefBits, &
LHS)) ||
20823 EVT VT =
N->getValueType(0);
20826 if (!
N->getFlags().hasAllowReassociation())
20833 unsigned Opc =
A.getConstantOperandVal(0);
20834 if (
Opc != Intrinsic::aarch64_neon_vcmla_rot0 &&
20835 Opc != Intrinsic::aarch64_neon_vcmla_rot90 &&
20836 Opc != Intrinsic::aarch64_neon_vcmla_rot180 &&
20837 Opc != Intrinsic::aarch64_neon_vcmla_rot270)
20842 A.getOperand(2),
A.getOperand(3));
20858 return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
20860 return VT == MVT::i64;
20872 (
N.getOpcode() == ISD::GET_ACTIVE_LANE_MASK) ||
20874 (
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
20875 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege_x2 ||
20876 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
20877 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt_x2 ||
20878 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
20879 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi_x2 ||
20880 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
20881 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs_x2 ||
20882 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
20883 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele_x2 ||
20884 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
20885 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo_x2 ||
20886 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
20887 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels_x2 ||
20888 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
20889 N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt_x2)))
20948 if (VS.getOpcode() != ISD::VSCALE)
20952 if (VS.getConstantOperandVal(0) != NumEls)
20975 MVT::bf16, MVT::f32, MVT::f64}),
20981 if (!TLI.
isOperationLegal(ISD::VECTOR_FIND_LAST_ACTIVE, Mask.getValueType()))
20984 return DAG.
getNode(AArch64ISD::LASTB,
SDLoc(
N),
N->getValueType(0), Mask,
21000 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
21002 EVT VT =
N->getValueType(0);
21003 const bool FullFP16 = Subtarget->hasFullFP16();
21035 if (Shuffle && Shuffle->
getMaskElt(0) == 1 &&
21050 {N0->getOperand(0), Extract1, Extract2});
21069 unsigned OffsetElts = 0;
21085 Load->getMemoryVT().isByteSized() &&
21087 return U.getResNo() != N0.getResNo() ||
21088 (U.getUser()->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21089 !any_of(U.getUser()->uses(), [](const SDUse &U2) {
21090 return U2.getUser()->getOpcode() ==
21091 ISD::INSERT_VECTOR_ELT ||
21092 U2.getUser()->getOpcode() == ISD::BUILD_VECTOR ||
21093 U2.getUser()->getOpcode() == ISD::SCALAR_TO_VECTOR;
21100 unsigned Offset = (OffsetElts +
N->getConstantOperandVal(1)) *
21101 Load->getValueType(0).getScalarSizeInBits() / 8;
21110 DAG.
getExtLoad(ExtType,
DL, VT, Load->getChain(), BasePtr,
21111 Load->getPointerInfo().getWithOffset(
Offset),
21112 Load->getValueType(0).getScalarType(),
21114 Load->getMemOperand()->getFlags(), Load->getAAInfo());
21127 EVT VT =
N->getValueType(0);
21128 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
21153 (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
21155 MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
21157 for (
size_t i = 0; i < Mask.size(); ++i)
21162 DAG.
getNode(ISD::BITCAST,
DL, MidVT, N00),
21163 DAG.
getNode(ISD::BITCAST,
DL, MidVT, N10), Mask));
21179 if (N00Opc == AArch64ISD::VLSHR && N10Opc == AArch64ISD::VLSHR &&
21185 NScalarSize =
N->getValueType(0).getScalarSizeInBits();
21187 if (N001ConstVal == N101ConstVal && N001ConstVal > NScalarSize) {
21188 N000 = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, N000);
21189 N100 = DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, N100);
21194 return DAG.
getNode(AArch64ISD::VLSHR,
DL, VT, Uzp, NewShiftConstant);
21199 if (
N->getOperand(0).getValueType() == MVT::v4i8 ||
21200 N->getOperand(0).getValueType() == MVT::v2i16 ||
21201 N->getOperand(0).getValueType() == MVT::v2i8) {
21202 EVT SrcVT =
N->getOperand(0).getValueType();
21206 if (
N->getNumOperands() % 2 == 0 &&
21208 if (V.getValueType() != SrcVT)
21212 LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
21213 return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
21214 LD->getExtensionType() == ISD::NON_EXTLOAD;
21216 EVT FVT = SrcVT == MVT::v2i8 ? MVT::f16 : MVT::f32;
21220 for (
unsigned i = 0; i <
N->getNumOperands(); i++) {
21227 LD->getBasePtr(), LD->getMemOperand());
21229 Ops.push_back(NewLoad);
21248 auto isBitwiseVectorNegate = [](
SDValue V) {
21249 return V->getOpcode() ==
ISD::XOR &&
21275 if (
N->getNumOperands() == 2 && N0Opc == N1Opc && VT.
is128BitVector() &&
21287 return DAG.
getNode(N0Opc,
DL, VT, Concat0, Concat1);
21291 auto IsRSHRN = [](
SDValue Shr) {
21292 if (Shr.getOpcode() != AArch64ISD::VLSHR)
21295 EVT VT =
Op.getValueType();
21296 unsigned ShtAmt = Shr.getConstantOperandVal(1);
21301 if (
Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
21303 Op.getOperand(1).getConstantOperandVal(0)
21304 <<
Op.getOperand(1).getConstantOperandVal(1));
21305 else if (
Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
21308 Op.getOperand(1).getConstantOperandVal(0));
21312 if (Imm != 1ULL << (ShtAmt - 1))
21318 if (
N->getNumOperands() == 2 && IsRSHRN(N0) &&
21326 X.getValueType().getDoubleNumVectorElementsVT(*DCI.
DAG.
getContext());
21337 if (
N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
21344 return DAG.
getNode(AArch64ISD::ZIP1,
DL, VT, E0, E1);
21365 if (
N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
21368 MVT RHSTy =
RHS.getValueType().getSimpleVT();
21374 dbgs() <<
"aarch64-lower: concat_vectors bitcast simplification\n");
21378 return DAG.
getNode(ISD::BITCAST,
DL, VT,
21380 DAG.
getNode(ISD::BITCAST,
DL, RHSTy, N0),
21390 EVT VT =
N->getValueType(0);
21412 SDValue SubVec =
N->getOperand(1);
21413 uint64_t IdxVal =
N->getConstantOperandVal(2);
21430 if (IdxVal == 0 && Vec.
isUndef())
21436 (IdxVal != 0 && IdxVal != NumSubElts))
21481 EVT ResTy =
N->getValueType(0);
21492 VecResTy = MVT::v4f32;
21494 VecResTy = MVT::v2f64;
21519 MVT VT =
N.getSimpleValueType();
21521 N.getConstantOperandVal(1) == 0)
21522 N =
N.getOperand(0);
21524 switch (
N.getOpcode()) {
21525 case AArch64ISD::DUP:
21526 case AArch64ISD::DUPLANE8:
21527 case AArch64ISD::DUPLANE16:
21528 case AArch64ISD::DUPLANE32:
21529 case AArch64ISD::DUPLANE64:
21530 case AArch64ISD::MOVI:
21531 case AArch64ISD::MOVIshift:
21532 case AArch64ISD::MOVIedit:
21533 case AArch64ISD::MOVImsl:
21534 case AArch64ISD::MVNIshift:
21535 case AArch64ISD::MVNImsl:
21549 if (
N.getValueType().is64BitVector()) {
21560 if (
N.getOpcode() == ISD::BITCAST)
21561 N =
N.getOperand(0);
21564 if (
N.getOperand(0).getValueType().isScalableVector())
21566 return N.getConstantOperandAPInt(1) ==
21567 N.getOperand(0).getValueType().getVectorNumElements() / 2;
21616 if (
Op.getOpcode() != AArch64ISD::CSEL)
21632 if (!TValue || !FValue)
21636 if (!TValue->
isOne()) {
21683 if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
21700 EVT VT =
Op->getValueType(0);
21707 EVT VT =
N->getValueType(0);
21720 if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
21727 if (Op1.
getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
21728 Op2.
getOpcode() != AArch64ISD::UADDV ||
21738 DAG.
getNode(AArch64ISD::UADDV,
DL, ValVT, AddVal),
21746 EVT VT =
N->getValueType(0);
21754 if (
LHS.getOpcode() != AArch64ISD::CSEL &&
21755 LHS.getOpcode() != AArch64ISD::CSNEG) {
21757 if (
LHS.getOpcode() != AArch64ISD::CSEL &&
21758 LHS.getOpcode() != AArch64ISD::CSNEG) {
21763 if (!
LHS.hasOneUse())
21773 if (!CTVal || !CFVal)
21776 if (!(
LHS.getOpcode() == AArch64ISD::CSEL &&
21778 !(
LHS.getOpcode() == AArch64ISD::CSNEG &&
21783 if (
LHS.getOpcode() == AArch64ISD::CSEL && CTVal->
isOne() &&
21791 if (
LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->
isOne() &&
21806 assert(((
LHS.getOpcode() == AArch64ISD::CSEL && CFVal->
isOne()) ||
21807 (
LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->
isAllOnes())) &&
21808 "Unexpected constant value");
21814 return DAG.
getNode(AArch64ISD::CSINC,
DL, VT, NewNode,
RHS, CCVal, Cmp);
21819 EVT VT =
N->getValueType(0);
21826 auto isZeroDot = [](
SDValue Dot) {
21827 return (Dot.
getOpcode() == AArch64ISD::UDOT ||
21828 Dot.
getOpcode() == AArch64ISD::SDOT) &&
21831 if (!isZeroDot(Dot))
21833 if (!isZeroDot(Dot))
21894 MVT VT =
N->getSimpleValueType(0);
21906 LHS.getOpcode() !=
RHS.getOpcode())
21909 unsigned ExtType =
LHS.getOpcode();
21915 if (!
RHS.getNode())
21921 if (!
LHS.getNode())
21931 return Op.getOpcode() == AArch64ISD::SUBS &&
21932 !
Op.getNode()->hasAnyUseOfValue(0);
21938 if (
Op.getOpcode() != AArch64ISD::CSEL)
21939 return std::nullopt;
21942 return std::nullopt;
21948 return getInvertedCondCode(CC);
21950 return std::nullopt;
21974 Op->getOperand(0),
Op->getOperand(1),
21987 EVT VT =
N->getValueType(0);
21999 EVT VT =
N->getValueType(0);
22002 (VT == MVT::v4f16 || VT == MVT::v4bf16)) {
22003 SDValue Elt0 =
N->getOperand(0), Elt1 =
N->getOperand(1),
22004 Elt2 =
N->getOperand(2), Elt3 =
N->getOperand(3);
22018 Elt1->getOperand(0)->getConstantOperandVal(1) == 1) {
22022 if (Elt2->isUndef() && Elt3->isUndef()) {
22028 Elt2->getConstantOperandVal(1) ==
22029 Elt3->getConstantOperandVal(1) &&
22030 Elt2->getOperand(0)->getOpcode() ==
22032 Elt3->getOperand(0)->getOpcode() ==
22037 Elt2->getOperand(0)->getOperand(0) ==
22038 Elt3->getOperand(0)->getOperand(0) &&
22039 Elt2->getOperand(0)->getConstantOperandVal(1) == 0 &&
22040 Elt3->getOperand(0)->getConstantOperandVal(1) == 1) {
22043 DAG.
getNode(AArch64ISD::FCVTXN,
DL, MVT::v2f32, HighLanesSrcVec);
22046 SDValue DoubleToSingleSticky =
22047 DAG.
getNode(AArch64ISD::FCVTXN,
DL, MVT::v2f32, LowLanesSrcVec);
22049 DoubleToSingleSticky, HighLanes);
22057 if (VT == MVT::v2f64) {
22058 SDValue Elt0 =
N->getOperand(0), Elt1 =
N->getOperand(1);
22059 if (Elt0->
getOpcode() == ISD::FP_EXTEND &&
22060 Elt1->getOpcode() == ISD::FP_EXTEND &&
22069 Elt1->getOperand(0)->getConstantOperandVal(1) &&
22079 DAG.
getNode(ISD::FP_EXTEND,
DL, MVT::v4f32, SrcVec);
22083 HalfToSingle, SubvectorIdx);
22084 return DAG.
getNode(ISD::FP_EXTEND,
DL, VT, Extract);
22098 if (VT != MVT::v2i32)
22101 SDValue Elt0 =
N->getOperand(0), Elt1 =
N->getOperand(1);
22138 EVT DestVT =
N->getValueType(0);
22150 unsigned ShiftAmt = 0;
22152 case (1ULL << 15) - 1:
22153 ScalarType = MVT::i16;
22156 case (1ULL << 31) - 1:
22157 ScalarType = MVT::i32;
22169 if (!RightShiftVec)
22173 if (SExtValue != (ShiftAmt - 1))
22190 if (SExt0Type != SExt1Type || SExt0Type.
getScalarType() != ScalarType ||
22225 EVT VT =
N->getValueType(0);
22248 "Unexpected legalisation result!");
22250 EVT SrcVectorType =
Op.getValueType();
22253 assert((SrcVectorType == MVT::v2i64 || SrcVectorType == MVT::nxv2i64) &&
22254 "Unexpected legalisation result!");
22256 unsigned ExtractIndex =
22270 unsigned Opcode =
N.getOpcode();
22276 SrcVT =
N.getOperand(0).getValueType();
22278 return SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8;
22284 return AndMask == 0xff || AndMask == 0xffff || AndMask == 0xffffffff;
22296 auto IsOneUseExtend = [](
SDValue N) {
22307 if (SUB.getOpcode() !=
ISD::SUB || !SUB.hasOneUse())
22310 SDValue Shift = SUB.getOperand(0);
22311 if (!IsOneUseExtend(Shift))
22315 EVT VT =
N->getValueType(0);
22331 EVT VT =
N->getValueType(0);
22332 if (VT != MVT::i32 && VT != MVT::i64)
22355 RHSImm > 4 &&
LHS.hasOneUse())
22372 if (!
Add.hasOneUse())
22379 if (
M1.getOpcode() !=
ISD::MUL &&
M1.getOpcode() != AArch64ISD::SMULL &&
22380 M1.getOpcode() != AArch64ISD::UMULL)
22386 EVT VT =
N->getValueType(0);
22411 if (!
N->getValueType(0).isFixedLengthVector())
22422 if (MulValue.
getOpcode() != AArch64ISD::MUL_PRED)
22434 DAG.
getNode(
N->getOpcode(),
SDLoc(
N), ScalableVT, {ScaledOp, MulValue});
22438 if (
SDValue res = performOpt(
N->getOperand(0),
N->getOperand(1)))
22441 return performOpt(
N->getOperand(1),
N->getOperand(0));
22449 EVT VT =
N->getValueType(0);
22450 if (VT != MVT::i64 ||
22478 DAG.
getNode(
N->getOpcode(),
DL, MVT::v1i64, Op0, Op1),
22487 if (!Ld || !Ld->isSimple())
22518 B.getOperand(1).getNumOperands() != 4)
22522 int NumElts =
B.getValueType().getVectorNumElements();
22523 int NumSubElts = NumElts / 4;
22524 for (
int I = 0;
I < NumSubElts;
I++) {
22526 if (SV1->getMaskElt(
I) !=
I ||
22527 SV1->getMaskElt(
I + NumSubElts) !=
I + NumSubElts ||
22528 SV1->getMaskElt(
I + NumSubElts * 2) !=
I + NumSubElts * 2 ||
22529 SV1->getMaskElt(
I + NumSubElts * 3) !=
I + NumElts)
22532 if (SV2->getMaskElt(
I) !=
I ||
22533 SV2->getMaskElt(
I + NumSubElts) !=
I + NumSubElts ||
22534 SV2->getMaskElt(
I + NumSubElts * 2) !=
I + NumElts)
22541 if (!Ld0 || !Ld1 || !Ld2 || !Ld3 || !Ld0->isSimple() || !Ld1->isSimple() ||
22542 !Ld2->isSimple() || !Ld3->isSimple())
22555 unsigned &NumSubLoads) {
22562 if (NumSubLoads && Loads0.
size() != NumSubLoads)
22564 NumSubLoads = Loads0.
size();
22565 return Loads0.
size() == Loads1.
size() &&
22566 all_of(
zip(Loads0, Loads1), [&DAG](
auto L) {
22567 unsigned Size =
get<0>(L)->getValueType(0).getSizeInBits();
22568 return Size ==
get<1>(L)->getValueType(0).getSizeInBits() &&
22581 DAG, NumSubLoads) &&
22609 EVT VT =
N->getValueType(0);
22627 Other.getOperand(0).getValueType() ||
22634 unsigned NumSubLoads = 0;
22643 unsigned NumSubElts = NumElts / NumSubLoads;
22665 for (
const auto &[L0, L1] :
zip(Loads0, Loads1)) {
22667 L0->getBasePtr(), L0->getPointerInfo(),
22668 L0->getBaseAlign());
22678 Ops.push_back(GenCombinedTree(O0, O1, DAG));
22681 SDValue NewOp = GenCombinedTree(Op0, Op1, DAG);
22684 int Hi = NumSubElts,
Lo = 0;
22685 for (
unsigned i = 0; i < NumSubLoads; i++) {
22686 for (
unsigned j = 0; j < NumSubElts; j++) {
22687 LowMask[i * NumSubElts + j] =
Lo++;
22688 HighMask[i * NumSubElts + j] =
Hi++;
22723 return DAG.
getNode(
N->getOpcode(),
DL, VT, Ext0, NShift);
22734 EVT VT =
N->getValueType(0);
22735 if (VT != MVT::i32 && VT != MVT::i64)
22745 if (Flags.getOpcode() != AArch64ISD::SUBS)
22765 EVT VT =
N->getValueType(0);
22766 if (VT != MVT::v2i32 && VT != MVT::v4i16 && VT != MVT::v8i8)
22772 if (AShr.
getOpcode() != AArch64ISD::VASHR)
22774 if (AShr.
getOpcode() != AArch64ISD::VASHR ||
22775 LShr.
getOpcode() != AArch64ISD::VLSHR ||
22784 AArch64ISD::VLSHR,
DL, VT, Trunc,
22836 assert(
LHS.getValueType().is64BitVector() &&
22837 RHS.getValueType().is64BitVector() &&
22838 "unexpected shape for long operation");
22845 if (!
RHS.getNode())
22849 if (!
LHS.getNode())
22862 MVT ElemTy =
N->getSimpleValueType(0).getScalarType();
22863 unsigned ElemBits = ElemTy.getSizeInBits();
22865 int64_t ShiftAmount;
22867 APInt SplatValue, SplatUndef;
22868 unsigned SplatBitSize;
22871 HasAnyUndefs, ElemBits) ||
22872 SplatBitSize != ElemBits)
22877 ShiftAmount = CVN->getSExtValue();
22882 if (ShiftAmount == 0 && IID != Intrinsic::aarch64_neon_sqshlu)
22883 return N->getOperand(1);
22890 case Intrinsic::aarch64_neon_sqshl:
22891 Opcode = AArch64ISD::SQSHL_I;
22892 IsRightShift =
false;
22894 case Intrinsic::aarch64_neon_uqshl:
22895 Opcode = AArch64ISD::UQSHL_I;
22896 IsRightShift =
false;
22898 case Intrinsic::aarch64_neon_srshl:
22899 Opcode = AArch64ISD::SRSHR_I;
22900 IsRightShift =
true;
22902 case Intrinsic::aarch64_neon_urshl:
22903 Opcode = AArch64ISD::URSHR_I;
22904 IsRightShift =
true;
22906 case Intrinsic::aarch64_neon_sqshlu:
22907 Opcode = AArch64ISD::SQSHLU_I;
22908 IsRightShift =
false;
22910 case Intrinsic::aarch64_neon_sshl:
22911 case Intrinsic::aarch64_neon_ushl:
22915 if (ShiftAmount < 0) {
22916 Opcode = IID == Intrinsic::aarch64_neon_sshl ? AArch64ISD::VASHR
22917 : AArch64ISD::VLSHR;
22918 ShiftAmount = -ShiftAmount;
22920 Opcode = AArch64ISD::VSHL;
22921 IsRightShift =
false;
22925 EVT VT =
N->getValueType(0);
22928 if (VT == MVT::i64) {
22933 if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(
int)ElemBits) {
22936 if (
N->getValueType(0) == MVT::i64)
22940 }
else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
22943 if (
N->getValueType(0) == MVT::i64)
22965 N->getOperand(0),
N->getOperand(1), AndN.
getOperand(0));
22972 DAG.
getNode(
Opc,
DL,
N->getOperand(1).getSimpleValueType(),
22982 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
22983 ScalarTy = MVT::i32;
22995 SDValue Scalar =
N->getOperand(3);
22996 EVT ScalarTy = Scalar.getValueType();
22998 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
23001 SDValue Passthru =
N->getOperand(1);
23003 return DAG.
getNode(AArch64ISD::DUP_MERGE_PASSTHRU,
DL,
N->getValueType(0),
23004 Pred, Scalar, Passthru);
23010 EVT VT =
N->getValueType(0);
23030 return DAG.
getNode(ISD::BITCAST,
DL, VT, EXT);
23039 SDValue Comparator =
N->getOperand(3);
23040 if (Comparator.
getOpcode() == AArch64ISD::DUP ||
23043 EVT VT =
N->getValueType(0);
23044 EVT CmpVT =
N->getOperand(2).getValueType();
23055 case Intrinsic::aarch64_sve_cmpeq_wide:
23056 case Intrinsic::aarch64_sve_cmpne_wide:
23057 case Intrinsic::aarch64_sve_cmpge_wide:
23058 case Intrinsic::aarch64_sve_cmpgt_wide:
23059 case Intrinsic::aarch64_sve_cmplt_wide:
23060 case Intrinsic::aarch64_sve_cmple_wide: {
23062 int64_t ImmVal = CN->getSExtValue();
23063 if (ImmVal >= -16 && ImmVal <= 15)
23071 case Intrinsic::aarch64_sve_cmphs_wide:
23072 case Intrinsic::aarch64_sve_cmphi_wide:
23073 case Intrinsic::aarch64_sve_cmplo_wide:
23074 case Intrinsic::aarch64_sve_cmpls_wide: {
23076 uint64_t ImmVal = CN->getZExtValue();
23090 return DAG.
getNode(AArch64ISD::SETCC_MERGE_ZERO,
DL, VT, Pred,
23102 assert(
Op.getValueType().isScalableVector() &&
23104 "Expected legal scalable vector type!");
23106 "Expected same type for PTEST operands");
23114 if (
Op.getValueType() != MVT::nxv16i1) {
23117 Pg = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv16i1, Pg);
23120 Op = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv16i1,
Op);
23123 unsigned PTest = AArch64ISD::PTEST;
23125 PTest = AArch64ISD::PTEST_ANY;
23127 PTest = AArch64ISD::PTEST_FIRST;
23144 SDValue VecToReduce =
N->getOperand(2);
23163 SDValue VecToReduce =
N->getOperand(2);
23180 SDValue InitVal =
N->getOperand(2);
23181 SDValue VecToReduce =
N->getOperand(3);
23188 DAG.
getUNDEF(ReduceVT), InitVal, Zero);
23200 if (
N->getValueType(0) != MVT::i16)
23214 bool SwapOperands =
false) {
23216 assert(
N->getNumOperands() == 4 &&
"Expected 3 operand intrinsic!");
23218 SDValue Op1 =
N->getOperand(SwapOperands ? 3 : 2);
23219 SDValue Op2 =
N->getOperand(SwapOperands ? 2 : 3);
23235 EVT VT =
N->getValueType(0);
23243 case Intrinsic::aarch64_sve_bsl:
23244 return DAG.
getNode(AArch64ISD::BSP,
DL, VT, Op3, Op1, Op2);
23245 case Intrinsic::aarch64_sve_bsl1n:
23248 case Intrinsic::aarch64_sve_bsl2n:
23249 return DAG.
getNode(AArch64ISD::BSP,
DL, VT, Op3, Op1,
23251 case Intrinsic::aarch64_sve_nbsl:
23265 case Intrinsic::aarch64_neon_vcvtfxs2fp:
23266 case Intrinsic::aarch64_neon_vcvtfxu2fp:
23268 case Intrinsic::aarch64_neon_saddv:
23270 case Intrinsic::aarch64_neon_uaddv:
23272 case Intrinsic::aarch64_neon_sminv:
23274 case Intrinsic::aarch64_neon_uminv:
23276 case Intrinsic::aarch64_neon_smaxv:
23278 case Intrinsic::aarch64_neon_umaxv:
23280 case Intrinsic::aarch64_neon_fmax:
23282 N->getOperand(1),
N->getOperand(2));
23283 case Intrinsic::aarch64_neon_fmin:
23285 N->getOperand(1),
N->getOperand(2));
23286 case Intrinsic::aarch64_neon_fmaxnm:
23288 N->getOperand(1),
N->getOperand(2));
23289 case Intrinsic::aarch64_neon_fminnm:
23291 N->getOperand(1),
N->getOperand(2));
23292 case Intrinsic::aarch64_neon_smull:
23293 return DAG.
getNode(AArch64ISD::SMULL,
SDLoc(
N),
N->getValueType(0),
23294 N->getOperand(1),
N->getOperand(2));
23295 case Intrinsic::aarch64_neon_umull:
23296 return DAG.
getNode(AArch64ISD::UMULL,
SDLoc(
N),
N->getValueType(0),
23297 N->getOperand(1),
N->getOperand(2));
23298 case Intrinsic::aarch64_neon_pmull:
23299 return DAG.
getNode(AArch64ISD::PMULL,
SDLoc(
N),
N->getValueType(0),
23300 N->getOperand(1),
N->getOperand(2));
23301 case Intrinsic::aarch64_neon_sqdmull:
23303 case Intrinsic::aarch64_neon_sqshl:
23304 case Intrinsic::aarch64_neon_uqshl:
23305 case Intrinsic::aarch64_neon_sqshlu:
23306 case Intrinsic::aarch64_neon_srshl:
23307 case Intrinsic::aarch64_neon_urshl:
23308 case Intrinsic::aarch64_neon_sshl:
23309 case Intrinsic::aarch64_neon_ushl:
23311 case Intrinsic::aarch64_neon_sabd:
23313 N->getOperand(1),
N->getOperand(2));
23314 case Intrinsic::aarch64_neon_uabd:
23316 N->getOperand(1),
N->getOperand(2));
23317 case Intrinsic::aarch64_neon_fcvtzs:
23319 case Intrinsic::aarch64_neon_fcvtzu:
23321 case Intrinsic::aarch64_neon_fcvtas:
23323 case Intrinsic::aarch64_neon_fcvtau:
23325 case Intrinsic::aarch64_neon_fcvtms:
23327 case Intrinsic::aarch64_neon_fcvtmu:
23329 case Intrinsic::aarch64_neon_fcvtns:
23331 case Intrinsic::aarch64_neon_fcvtnu:
23333 case Intrinsic::aarch64_neon_fcvtps:
23335 case Intrinsic::aarch64_neon_fcvtpu:
23337 case Intrinsic::aarch64_crc32b:
23338 case Intrinsic::aarch64_crc32cb:
23340 case Intrinsic::aarch64_crc32h:
23341 case Intrinsic::aarch64_crc32ch:
23343 case Intrinsic::aarch64_sve_saddv:
23345 if (
N->getOperand(2).getValueType().getVectorElementType() == MVT::i64)
23349 case Intrinsic::aarch64_sve_uaddv:
23351 case Intrinsic::aarch64_sve_smaxv:
23353 case Intrinsic::aarch64_sve_umaxv:
23355 case Intrinsic::aarch64_sve_sminv:
23357 case Intrinsic::aarch64_sve_uminv:
23359 case Intrinsic::aarch64_sve_orv:
23361 case Intrinsic::aarch64_sve_eorv:
23363 case Intrinsic::aarch64_sve_andv:
23365 case Intrinsic::aarch64_sve_index:
23367 case Intrinsic::aarch64_sve_dup:
23369 case Intrinsic::aarch64_sve_dup_x:
23372 case Intrinsic::aarch64_sve_ext:
23374 case Intrinsic::aarch64_sve_mul_u:
23375 return DAG.
getNode(AArch64ISD::MUL_PRED,
SDLoc(
N),
N->getValueType(0),
23376 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23377 case Intrinsic::aarch64_sve_smulh_u:
23378 return DAG.
getNode(AArch64ISD::MULHS_PRED,
SDLoc(
N),
N->getValueType(0),
23379 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23380 case Intrinsic::aarch64_sve_umulh_u:
23381 return DAG.
getNode(AArch64ISD::MULHU_PRED,
SDLoc(
N),
N->getValueType(0),
23382 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23383 case Intrinsic::aarch64_sve_smin_u:
23384 return DAG.
getNode(AArch64ISD::SMIN_PRED,
SDLoc(
N),
N->getValueType(0),
23385 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23386 case Intrinsic::aarch64_sve_umin_u:
23387 return DAG.
getNode(AArch64ISD::UMIN_PRED,
SDLoc(
N),
N->getValueType(0),
23388 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23389 case Intrinsic::aarch64_sve_smax_u:
23390 return DAG.
getNode(AArch64ISD::SMAX_PRED,
SDLoc(
N),
N->getValueType(0),
23391 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23392 case Intrinsic::aarch64_sve_umax_u:
23393 return DAG.
getNode(AArch64ISD::UMAX_PRED,
SDLoc(
N),
N->getValueType(0),
23394 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23395 case Intrinsic::aarch64_sve_lsl_u:
23396 return DAG.
getNode(AArch64ISD::SHL_PRED,
SDLoc(
N),
N->getValueType(0),
23397 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23398 case Intrinsic::aarch64_sve_lsr_u:
23399 return DAG.
getNode(AArch64ISD::SRL_PRED,
SDLoc(
N),
N->getValueType(0),
23400 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23401 case Intrinsic::aarch64_sve_asr_u:
23402 return DAG.
getNode(AArch64ISD::SRA_PRED,
SDLoc(
N),
N->getValueType(0),
23403 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23404 case Intrinsic::aarch64_sve_fadd_u:
23405 return DAG.
getNode(AArch64ISD::FADD_PRED,
SDLoc(
N),
N->getValueType(0),
23406 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23407 case Intrinsic::aarch64_sve_fdiv_u:
23408 return DAG.
getNode(AArch64ISD::FDIV_PRED,
SDLoc(
N),
N->getValueType(0),
23409 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23410 case Intrinsic::aarch64_sve_fmax_u:
23411 return DAG.
getNode(AArch64ISD::FMAX_PRED,
SDLoc(
N),
N->getValueType(0),
23412 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23413 case Intrinsic::aarch64_sve_fmaxnm_u:
23414 return DAG.
getNode(AArch64ISD::FMAXNM_PRED,
SDLoc(
N),
N->getValueType(0),
23415 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23416 case Intrinsic::aarch64_sve_fmla_u:
23417 return DAG.
getNode(AArch64ISD::FMA_PRED,
SDLoc(
N),
N->getValueType(0),
23418 N->getOperand(1),
N->getOperand(3),
N->getOperand(4),
23420 case Intrinsic::aarch64_sve_fmin_u:
23421 return DAG.
getNode(AArch64ISD::FMIN_PRED,
SDLoc(
N),
N->getValueType(0),
23422 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23423 case Intrinsic::aarch64_sve_fminnm_u:
23424 return DAG.
getNode(AArch64ISD::FMINNM_PRED,
SDLoc(
N),
N->getValueType(0),
23425 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23426 case Intrinsic::aarch64_sve_fmul_u:
23427 return DAG.
getNode(AArch64ISD::FMUL_PRED,
SDLoc(
N),
N->getValueType(0),
23428 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23429 case Intrinsic::aarch64_sve_fsub_u:
23430 return DAG.
getNode(AArch64ISD::FSUB_PRED,
SDLoc(
N),
N->getValueType(0),
23431 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23432 case Intrinsic::aarch64_sve_add_u:
23435 case Intrinsic::aarch64_sve_sub_u:
23438 case Intrinsic::aarch64_sve_subr:
23440 case Intrinsic::aarch64_sve_and_u:
23443 case Intrinsic::aarch64_sve_bic_u:
23444 return DAG.
getNode(AArch64ISD::BIC,
SDLoc(
N),
N->getValueType(0),
23445 N->getOperand(2),
N->getOperand(3));
23446 case Intrinsic::aarch64_sve_saddwb:
23447 return DAG.
getNode(AArch64ISD::SADDWB,
SDLoc(
N),
N->getValueType(0),
23448 N->getOperand(1),
N->getOperand(2));
23449 case Intrinsic::aarch64_sve_saddwt:
23450 return DAG.
getNode(AArch64ISD::SADDWT,
SDLoc(
N),
N->getValueType(0),
23451 N->getOperand(1),
N->getOperand(2));
23452 case Intrinsic::aarch64_sve_uaddwb:
23453 return DAG.
getNode(AArch64ISD::UADDWB,
SDLoc(
N),
N->getValueType(0),
23454 N->getOperand(1),
N->getOperand(2));
23455 case Intrinsic::aarch64_sve_uaddwt:
23456 return DAG.
getNode(AArch64ISD::UADDWT,
SDLoc(
N),
N->getValueType(0),
23457 N->getOperand(1),
N->getOperand(2));
23458 case Intrinsic::aarch64_sve_eor_u:
23461 case Intrinsic::aarch64_sve_orr_u:
23464 case Intrinsic::aarch64_sve_sabd_u:
23467 return DAG.
getNode(AArch64ISD::ABDS_PRED,
SDLoc(
N),
N->getValueType(0),
23468 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23469 case Intrinsic::aarch64_sve_uabd_u:
23472 return DAG.
getNode(AArch64ISD::ABDU_PRED,
SDLoc(
N),
N->getValueType(0),
23473 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23474 case Intrinsic::aarch64_sve_sdiv_u:
23475 return DAG.
getNode(AArch64ISD::SDIV_PRED,
SDLoc(
N),
N->getValueType(0),
23476 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23477 case Intrinsic::aarch64_sve_udiv_u:
23478 return DAG.
getNode(AArch64ISD::UDIV_PRED,
SDLoc(
N),
N->getValueType(0),
23479 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23480 case Intrinsic::aarch64_sve_sqadd:
23482 case Intrinsic::aarch64_sve_sqsub_u:
23484 N->getOperand(2),
N->getOperand(3));
23485 case Intrinsic::aarch64_sve_uqadd:
23487 case Intrinsic::aarch64_sve_uqsub_u:
23489 N->getOperand(2),
N->getOperand(3));
23490 case Intrinsic::aarch64_sve_sqadd_x:
23492 N->getOperand(1),
N->getOperand(2));
23493 case Intrinsic::aarch64_sve_sqsub_x:
23495 N->getOperand(1),
N->getOperand(2));
23496 case Intrinsic::aarch64_sve_uqadd_x:
23498 N->getOperand(1),
N->getOperand(2));
23499 case Intrinsic::aarch64_sve_uqsub_x:
23501 N->getOperand(1),
N->getOperand(2));
23502 case Intrinsic::aarch64_sve_asrd:
23503 return DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
SDLoc(
N),
N->getValueType(0),
23504 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23505 case Intrinsic::aarch64_sve_cmphs:
23506 if (!
N->getOperand(2).getValueType().isFloatingPoint())
23508 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23511 case Intrinsic::aarch64_sve_cmphi:
23512 if (!
N->getOperand(2).getValueType().isFloatingPoint())
23514 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23517 case Intrinsic::aarch64_sve_fcmpge:
23518 case Intrinsic::aarch64_sve_cmpge:
23520 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23523 case Intrinsic::aarch64_sve_fcmpgt:
23524 case Intrinsic::aarch64_sve_cmpgt:
23526 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23529 case Intrinsic::aarch64_sve_fcmpeq:
23530 case Intrinsic::aarch64_sve_cmpeq:
23532 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23535 case Intrinsic::aarch64_sve_fcmpne:
23536 case Intrinsic::aarch64_sve_cmpne:
23538 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23541 case Intrinsic::aarch64_sve_fcmpuo:
23543 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23546 case Intrinsic::aarch64_sve_fadda:
23548 case Intrinsic::aarch64_sve_faddv:
23550 case Intrinsic::aarch64_sve_fmaxnmv:
23552 case Intrinsic::aarch64_sve_fmaxv:
23554 case Intrinsic::aarch64_sve_fminnmv:
23556 case Intrinsic::aarch64_sve_fminv:
23558 case Intrinsic::aarch64_sve_sel:
23560 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
23561 case Intrinsic::aarch64_sve_cmpeq_wide:
23563 case Intrinsic::aarch64_sve_cmpne_wide:
23565 case Intrinsic::aarch64_sve_cmpge_wide:
23567 case Intrinsic::aarch64_sve_cmpgt_wide:
23569 case Intrinsic::aarch64_sve_cmplt_wide:
23571 case Intrinsic::aarch64_sve_cmple_wide:
23573 case Intrinsic::aarch64_sve_cmphs_wide:
23575 case Intrinsic::aarch64_sve_cmphi_wide:
23577 case Intrinsic::aarch64_sve_cmplo_wide:
23579 case Intrinsic::aarch64_sve_cmpls_wide:
23581 case Intrinsic::aarch64_sve_ptest_any:
23582 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23584 case Intrinsic::aarch64_sve_ptest_first:
23585 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23587 case Intrinsic::aarch64_sve_ptest_last:
23588 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
23590 case Intrinsic::aarch64_sve_whilelo:
23591 return DAG.
getNode(ISD::GET_ACTIVE_LANE_MASK,
SDLoc(
N),
N->getValueType(0),
23592 N->getOperand(1),
N->getOperand(2));
23593 case Intrinsic::aarch64_sve_bsl:
23594 case Intrinsic::aarch64_sve_bsl1n:
23595 case Intrinsic::aarch64_sve_bsl2n:
23596 case Intrinsic::aarch64_sve_nbsl:
23603 unsigned OC =
N->getOpcode();
23604 return OC == ISD::LOAD || OC == ISD::MLOAD ||
23619 const SDValue SetCC =
N->getOperand(0);
23641 SDLoc(SetCC),
N->getValueType(0), Ext1, Ext2,
23653 EVT VT =
N->getValueType(0);
23654 if ((VT != MVT::v4i32 && VT != MVT::v8i16) ||
23659 unsigned ExtOffset =
N->getOperand(0).getConstantOperandVal(1);
23663 EVT InVT =
N->getOperand(0).getOperand(0).getValueType();
23676 bool IsUndefDeInterleave =
false;
23677 if (!IsDeInterleave)
23678 IsUndefDeInterleave =
23679 Shuffle->getOperand(1).isUndef() &&
23682 [](
int M) { return M < 0; }) &&
23687 if ((!IsDeInterleave && !IsUndefDeInterleave) || Idx >= 4)
23691 Shuffle->getOperand(IsUndefDeInterleave ? 1 : 0));
23693 Shuffle->getOperand(IsUndefDeInterleave ? 0 : 1));
23694 SDValue UZP = DAG.
getNode(Idx < 2 ? AArch64ISD::UZP1 : AArch64ISD::UZP2,
DL,
23696 if ((Idx & 1) == 1)
23712 EVT VT =
N->getValueType(0);
23714 (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16))
23718 unsigned ExtOffset = (
unsigned)-1;
23720 ExtOffset =
Op.getConstantOperandVal(1);
23721 Op =
Op.getOperand(0);
23724 unsigned Shift = 0;
23726 Op.getValueType().getScalarSizeInBits());
23728 if (
Op.getOpcode() == AArch64ISD::VLSHR) {
23729 Shift =
Op.getConstantOperandVal(1);
23730 Op =
Op.getOperand(0);
23731 Mask = Mask.lshr(Shift);
23735 Op =
Op.getOperand(0);
23737 }
else if (
Op.getOpcode() == AArch64ISD::BICi) {
23738 Mask =
~APInt(
Op.getValueType().getScalarSizeInBits(),
23739 Op.getConstantOperandVal(1) <<
Op.getConstantOperandVal(2));
23741 Op =
Op.getOperand(0);
23744 if (ExtOffset == (
unsigned)-1) {
23746 ExtOffset =
Op.getConstantOperandVal(1);
23747 Op =
Op.getOperand(0);
23754 if (
Op.getOpcode() != AArch64ISD::UZP1 &&
Op.getOpcode() != AArch64ISD::UZP2)
23756 if (
Op.getOpcode() == AArch64ISD::UZP2)
23761 Op.getOperand(ExtOffset == 0 ? 0 : 1));
23763 BC = DAG.
getNode(AArch64ISD::VLSHR,
DL, VT, BC,
23776 N->getOperand(0).getValueType().is64BitVector() &&
23777 (
N->getOperand(0).getOpcode() ==
ISD::ABDU ||
23778 N->getOperand(0).getOpcode() ==
ISD::ABDS)) {
23779 SDNode *ABDNode =
N->getOperand(0).getNode();
23793 if (
N->getValueType(0).isFixedLengthVector() &&
23809 (
N->getValueType(0) == MVT::i32 ||
N->getValueType(0) == MVT::i64)) {
23813 return DAG.
getNode(AArch64ISD::REV16,
SDLoc(
N),
N->getValueType(0),
23821 SDValue SplatVal,
unsigned NumVecElts) {
23840 if (BasePtr->getOpcode() ==
ISD::ADD &&
23843 BasePtr = BasePtr->getOperand(0);
23846 unsigned Offset = EltOffset;
23847 while (--NumVecElts) {
23863 assert(ContentTy.
isSimple() &&
"No SVE containers for extended types");
23874 return MVT::nxv2i64;
23879 return MVT::nxv4i32;
23883 case MVT::nxv8bf16:
23884 return MVT::nxv8i16;
23886 return MVT::nxv16i8;
23892 EVT VT =
N->getValueType(0);
23897 EVT ContainerVT = VT;
23910 if (ContainerVT.
isInteger() && (VT != ContainerVT))
23918 EVT VT =
N->getValueType(0);
23919 EVT PtrTy =
N->getOperand(3).getValueType();
23928 MINode->getOperand(3), DAG.
getUNDEF(PtrTy),
23930 MINode->getMemoryVT(), MINode->getMemOperand(),
23941template <
unsigned Opcode>
23943 static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
23944 Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
23945 "Unsupported opcode.");
23947 EVT VT =
N->getValueType(0);
23953 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(2),
N->getOperand(3)};
23958 Load = DAG.
getNode(ISD::BITCAST,
DL, VT, Load.getValue(0));
23966 EVT DataVT =
Data.getValueType();
23974 if (
Data.getValueType().isFloatingPoint())
23986 return DAG.
getNode(AArch64ISD::ST1_PRED,
DL,
N->getValueType(0),
Ops);
23993 EVT DataVT =
Data.getValueType();
23994 EVT PtrTy =
N->getOperand(4).getValueType();
24002 MINode->getMemoryVT(), MINode->getMemOperand(),
24032 if (!(((NumVecElts == 2 || NumVecElts == 3) &&
24034 ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
24060 for (
int I = 0;
I < NumVecElts; ++
I) {
24072 ZeroReg = AArch64::WZR;
24075 ZeroReg = AArch64::XZR;
24099 if (NumVecElts != 4 && NumVecElts != 2)
24110 std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
24112 for (
unsigned I = 0;
I < NumVecElts; ++
I) {
24128 if (IndexVal >= NumVecElts)
24130 IndexNotInserted.reset(IndexVal);
24135 if (IndexNotInserted.any())
24159 return ReplacedZeroSplat;
24165 if (!Subtarget->isMisaligned128StoreSlow())
24190 return ReplacedSplat;
24213 assert(
N->getOpcode() == AArch64ISD::SPLICE &&
"Unexpected Opcode!");
24216 if (
N->getOperand(2).isUndef())
24217 return N->getOperand(1);
24224 assert((
N->getOpcode() == AArch64ISD::UUNPKHI ||
24225 N->getOpcode() == AArch64ISD::UUNPKLO) &&
24226 "Unexpected Opcode!");
24229 if (
N->getOperand(0).isUndef())
24230 return DAG.
getUNDEF(
N->getValueType(0));
24235 if (
N->getOperand(0).getOpcode() == ISD::MLOAD &&
24236 N->getOpcode() == AArch64ISD::UUNPKLO) {
24242 SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE &&
24246 unsigned PgPattern = Mask->getConstantOperandVal(0);
24247 EVT VT =
N->getValueType(0);
24272 if (
N->getOpcode() != AArch64ISD::UZP1)
24276 EVT DstVT =
N->getValueType(0);
24277 return (SrcVT == MVT::nxv8i16 && DstVT == MVT::nxv16i8) ||
24278 (SrcVT == MVT::nxv4i32 && DstVT == MVT::nxv8i16) ||
24279 (SrcVT == MVT::nxv2i64 && DstVT == MVT::nxv4i32);
24286 assert(
N->getOpcode() == AArch64ISD::UZP1 &&
"Only UZP1 expected.");
24289 EVT ResVT =
N->getValueType(0);
24292 if (RshOpc != AArch64ISD::RSHRNB_I)
24303 if (
Lo.getOpcode() != AArch64ISD::UUNPKLO &&
24304 Hi.getOpcode() != AArch64ISD::UUNPKHI)
24307 if (OrigArg !=
Hi.getOperand(0))
24311 return DAG.
getNode(AArch64ISD::URSHR_I_PRED,
DL, ResVT,
24332 if (VT == MVT::nxv8i16)
24333 ResVT = MVT::nxv16i8;
24334 else if (VT == MVT::nxv4i32)
24335 ResVT = MVT::nxv8i16;
24336 else if (VT == MVT::nxv2i64)
24337 ResVT = MVT::nxv4i32;
24342 unsigned ShiftValue;
24347 AArch64ISD::RSHRNB_I,
DL, ResVT,
24349 return DAG.
getNode(AArch64ISD::NVCAST,
DL, VT, Rshrnb);
24353 if (V.getOpcode() != AArch64ISD::NVCAST)
24357 if (!
Op.getValueType().isVector() ||
24358 V.getValueType().getVectorElementCount() !=
24359 Op.getValueType().getVectorElementCount() * 2)
24370 EVT ResVT =
N->getValueType(0);
24381 if (ExtIdx0 == 0 && ExtIdx1 == NumElements / 2) {
24392 if (
N->getOpcode() == AArch64ISD::UZP2)
24397 EVT BCVT = MVT::Other, HalfVT = MVT::Other;
24403 HalfVT = MVT::v8i8;
24407 HalfVT = MVT::v4i16;
24411 HalfVT = MVT::v2i32;
24414 if (BCVT != MVT::Other) {
24427 Rshrnb = DAG.
getNode(AArch64ISD::NVCAST,
DL, ResVT, Rshrnb);
24428 return DAG.
getNode(AArch64ISD::UZP1,
DL, ResVT, Rshrnb, Op1);
24434 Rshrnb = DAG.
getNode(AArch64ISD::NVCAST,
DL, ResVT, Rshrnb);
24435 return DAG.
getNode(AArch64ISD::UZP1,
DL, ResVT, Op0, Rshrnb);
24441 if (PreCast.getOpcode() == AArch64ISD::UUNPKLO) {
24442 if (PreCast.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
24443 SDValue X = PreCast.getOperand(0).getOperand(0);
24444 return DAG.
getNode(AArch64ISD::UZP1,
DL, ResVT,
X, Op1);
24451 if (PreCast.getOpcode() == AArch64ISD::UUNPKHI) {
24452 if (PreCast.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
24453 SDValue Z = PreCast.getOperand(0).getOperand(1);
24454 return DAG.
getNode(AArch64ISD::UZP1,
DL, ResVT, Op0, Z);
24476 if (ResVT != MVT::v2i32 && ResVT != MVT::v4i16 && ResVT != MVT::v8i8)
24485 if ((ResVT == MVT::v4i16 && Op0Ty == MVT::v2i32) ||
24486 (ResVT == MVT::v8i8 && Op0Ty == MVT::v4i16)) {
24490 SourceOp0, SourceOp1);
24510 ResultTy = MVT::v4i32;
24513 ResultTy = MVT::v8i16;
24516 ResultTy = MVT::v16i8;
24527 EVT BitcastResultTy;
24531 BitcastResultTy = MVT::v2i64;
24534 BitcastResultTy = MVT::v4i32;
24537 BitcastResultTy = MVT::v8i16;
24544 DAG.
getNode(ISD::BITCAST,
DL, BitcastResultTy, UzpResult));
24548 unsigned Opc =
N->getOpcode();
24550 const bool Scaled =
Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO ||
24551 Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
24552 const bool Signed =
Opc == AArch64ISD::GLD1S_MERGE_ZERO ||
24553 Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
24554 const bool Extended =
Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO ||
24555 Opc == AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO ||
24556 Opc == AArch64ISD::GLD1_UXTW_MERGE_ZERO ||
24557 Opc == AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO;
24566 EVT ResVT =
N->getValueType(0);
24568 const auto OffsetOpc =
Offset.getOpcode();
24569 const bool OffsetIsZExt =
24570 OffsetOpc == AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU;
24571 const bool OffsetIsSExt =
24572 OffsetOpc == AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU;
24575 if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
24583 if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
24590 return DAG.
getNode(NewOpc,
DL, {ResVT, MVT::Other},
24591 {Chain, Pg,
Base, UnextendedOffset, Ty});
24603 assert(
N->getOpcode() == AArch64ISD::VASHR ||
24604 N->getOpcode() == AArch64ISD::VLSHR);
24607 unsigned OpScalarSize =
Op.getScalarValueSizeInBits();
24609 unsigned ShiftImm =
N->getConstantOperandVal(1);
24610 assert(OpScalarSize > ShiftImm &&
"Invalid shift imm");
24613 if (
N->getOpcode() == AArch64ISD::VASHR &&
24614 Op.getOpcode() == AArch64ISD::VSHL &&
24615 N->getOperand(1) ==
Op.getOperand(1))
24617 return Op.getOperand(0);
24620 if (
N->getFlags().hasExact())
24624 APInt DemandedMask = ~ShiftedOutBits;
24637 N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
24639 SDValue CC =
N->getOperand(0)->getOperand(0);
24658 EVT VT =
N->getValueType(0);
24664 unsigned LoadIdx = IsLaneOp ? 1 : 0;
24671 if (LD->isIndexed())
24677 Lane =
N->getOperand(2);
24693 for (
SDUse &U : LD->uses()) {
24694 if (U.getResNo() == 1)
24696 if (U.getUser() !=
N)
24703 if (
N->hasOneUse()) {
24704 unsigned UseOpc =
N->user_begin()->getOpcode();
24709 SDValue Addr = LD->getOperand(1);
24720 uint32_t IncVal = CInc->getZExtValue();
24722 if (IncVal != NumBytes)
24740 Ops.push_back(LD->getOperand(0));
24743 Ops.push_back(Lane);
24745 Ops.push_back(Addr);
24746 Ops.push_back(Inc);
24748 EVT Tys[3] = { VT, MVT::i64, MVT::Other };
24750 unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
24771static bool performTBISimplification(
SDValue Addr,
24777 unsigned NumIgnoreBits =
24778 Subtarget.hasMTE() || Subtarget.isTargetDarwin() ? 4 : 8;
24792 assert((
N->getOpcode() == ISD::STORE ||
N->getOpcode() == ISD::MSTORE) &&
24793 "Expected STORE dag node in input!");
24796 if (!
Store->isTruncatingStore() ||
Store->isIndexed())
24807 Store->getBasePtr(),
Store->getMemOperand());
24830 EVT MemVT =
LD->getMemoryVT();
24832 LD->getBaseAlign() >= 4)
24840 assert(
LD->getOffset().isUndef() &&
"undef offset expected");
24876 performTBISimplification(
N->getOperand(1), DCI, DAG);
24879 EVT RegVT =
LD->getValueType(0);
24880 EVT MemVT =
LD->getMemoryVT();
24885 unsigned AddrSpace =
LD->getAddressSpace();
24889 if (PtrVT !=
LD->getBasePtr().getSimpleValueType()) {
24893 Cast,
LD->getPointerInfo(), MemVT,
24894 LD->getBaseAlign(),
24895 LD->getMemOperand()->getFlags());
24902 if (
SDValue Res = combineV3I8LoadExt(LD, DAG))
24905 if (!
LD->isNonTemporal())
24926 for (
unsigned I = 0;
I < Num256Loads;
I++) {
24927 unsigned PtrOffset =
I * 32;
24932 NewVT,
DL, Chain, NewPtr,
LD->getPointerInfo().getWithOffset(PtrOffset),
24933 NewAlign,
LD->getMemOperand()->getFlags(),
LD->getAAInfo());
24943 unsigned PtrOffset = (MemVT.
getSizeInBits() - BitsRemaining) / 8;
24951 DAG.
getLoad(RemainingVT,
DL, Chain, NewPtr,
24952 LD->getPointerInfo().getWithOffset(PtrOffset), NewAlign,
24953 LD->getMemOperand()->getFlags(),
LD->getAAInfo());
24956 SDValue ExtendedRemainingLoad =
24958 {UndefVector, RemainingLoad, InsertIdx});
24959 LoadOps.
push_back(ExtendedRemainingLoad);
24976 EVT VecVT =
Op.getValueType();
24978 "Need boolean vector type.");
24985 return Op.getOperand(0).getValueType();
24989 for (
SDValue Operand :
Op->op_values()) {
24993 EVT OperandVT = tryGetOriginalBoolVectorType(Operand,
Depth + 1);
24995 BaseVT = OperandVT;
24996 else if (OperandVT != BaseVT)
25010 EVT VecVT = ComparisonResult.getValueType();
25014 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
25024 VecVT = tryGetOriginalBoolVectorType(ComparisonResult);
25026 unsigned BitsPerElement = std::max(64 / NumElts, 8u);
25044 VecVT == MVT::v16i8) {
25048 for (
unsigned Half = 0; Half < 2; ++Half) {
25049 for (
unsigned I = 0;
I < 8; ++
I) {
25052 unsigned MaskBit = IsLE ? (1u <<
I) : (1u << (7 -
I));
25060 SDValue UpperRepresentativeBits =
25061 DAG.
getNode(AArch64ISD::EXT,
DL, VecVT, RepresentativeBits,
25064 RepresentativeBits, UpperRepresentativeBits);
25065 Zipped = DAG.
getNode(ISD::BITCAST,
DL, MVT::v8i16, Zipped);
25066 return DAG.
getNode(ISD::VECREDUCE_ADD,
DL, MVT::i16, Zipped);
25071 for (
unsigned I = 0;
I < NumEl; ++
I) {
25072 unsigned MaskBit = IsLE ? (1u <<
I) : (1u << (NumEl - 1 -
I));
25081 return DAG.
getNode(ISD::VECREDUCE_ADD,
DL, ResultVT, RepresentativeBits);
25086 if (!
Store->isTruncatingStore())
25112 Store->getMemOperand());
25129 if (
Value.getValueType().isVector())
25133 while (
Value->isAssert())
25144 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
25148 EVT VT =
Value.getSimpleValueType();
25165 DCI.
CombineTo(
ST->getValue().getNode(), Extracted);
25169bool isHalvingTruncateOfLegalScalableType(
EVT SrcVT,
EVT DstVT) {
25170 return (SrcVT == MVT::nxv8i16 && DstVT == MVT::nxv8i8) ||
25171 (SrcVT == MVT::nxv4i32 && DstVT == MVT::nxv4i16) ||
25172 (SrcVT == MVT::nxv2i64 && DstVT == MVT::nxv2i32);
25179 EVT ValueVT =
Value.getValueType();
25186 assert(
ST->getOffset().isUndef() &&
"undef offset expected");
25190 Value->getOperand(0).getValueType().getVectorElementType(), 4);
25196 ISD::BITCAST,
DL, WideVT.
getSizeInBits() == 64 ? MVT::v8i8 : MVT::v16i8,
25222static unsigned getFPSubregForVT(
EVT VT) {
25225 case MVT::aarch64mfp8:
25226 return AArch64::bsub;
25228 return AArch64::hsub;
25230 return AArch64::ssub;
25232 return AArch64::dsub;
25246 EVT ValueVT =
Value.getValueType();
25247 EVT MemVT =
ST->getMemoryVT();
25251 if (
SDValue Res = combineStoreValueFPToInt(ST, DCI, DAG, Subtarget))
25254 auto hasValidElementTypeForFPTruncStore = [](
EVT VT) {
25256 return EltVT == MVT::f32 || EltVT == MVT::f64;
25260 unsigned AddrSpace =
ST->getAddressSpace();
25267 ST->getBaseAlign(),
ST->getMemOperand()->getFlags(),
25272 if (
SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
25284 hasValidElementTypeForFPTruncStore(
Value.getOperand(0).getValueType()))
25286 ST->getMemOperand());
25292 performTBISimplification(
N->getOperand(2), DCI, DAG))
25295 if (
SDValue Store = foldTruncStoreOfExt(DAG,
N))
25298 if (
SDValue Store = combineBoolVectorAndTruncateStore(DAG, ST))
25301 if (
ST->isTruncatingStore() &&
25302 isHalvingTruncateOfLegalScalableType(ValueVT, MemVT)) {
25306 MemVT,
ST->getMemOperand());
25330 if (ValueVT != MemVT && !
ST->isTruncatingStore())
25343 !ExtCst->isZero() &&
ST->getBasePtr().getOpcode() !=
ISD::ADD)
25346 if (MemVT == MVT::i64 || MemVT == MVT::i32) {
25350 for (
const auto &
Use :
Vector->uses()) {
25351 if (
Use.getResNo() !=
Vector.getResNo())
25362 if (!ExtCst || !ExtCst->isZero()) {
25368 DAG.
getUNDEF(VectorVT), Ext, Zero);
25371 EVT FPMemVT = MemVT == MVT::i8
25375 FPMemVT, ExtVector);
25377 return DAG.
getStore(
ST->getChain(),
DL, FPSubreg,
ST->getBasePtr(),
25378 ST->getMemOperand());
25389 unsigned NumParts =
N->getNumOperands();
25398 for (
unsigned I = 0;
I < NumParts;
I++)
25399 if (
N->getOperand(
I) !=
SDValue(InterleaveOp,
I))
25408 unsigned RequiredNumParts) {
25411 if (!isSequentialConcatOfVectorInterleave(WideMask.
getNode(),
25412 MaskInterleaveOps))
25415 if (MaskInterleaveOps.
size() != RequiredNumParts)
25422 return MaskInterleaveOps[0];
25429 assert(
EC.isKnownMultipleOf(RequiredNumParts) &&
25430 "Expected element count divisible by number of parts");
25431 EC =
EC.divideCoefficientBy(RequiredNumParts);
25436static SDValue performInterleavedMaskedStoreCombine(
25452 if (!isSequentialConcatOfVectorInterleave(WideValue.
getNode(),
25453 ValueInterleaveOps))
25456 unsigned NumParts = ValueInterleaveOps.
size();
25457 if (NumParts != 2 && NumParts != 4)
25462 EVT SubVecTy = ValueInterleaveOps[0].getValueType();
25470 getNarrowMaskForInterleavedOps(DAG,
DL, MST->
getMask(), NumParts);
25475 NumParts == 2 ? Intrinsic::aarch64_sve_st2 : Intrinsic::aarch64_sve_st4;
25478 NewStOps.
append(ValueInterleaveOps);
25492 if (
SDValue Res = performInterleavedMaskedStoreCombine(
N, DCI, DAG))
25500 Value.getValueType().isInteger()) {
25502 if (
Value.getOpcode() == ISD::BITCAST) {
25505 EVT InVT =
Value.getOperand(0).getValueType();
25509 unsigned PgPattern =
Mask->getConstantOperandVal(0);
25528 EVT ValueVT =
Value->getValueType(0);
25530 if (!isHalvingTruncateOfLegalScalableType(ValueVT, MemVT))
25547 EVT IndexVT = Index.getValueType();
25557 if (Index.getOpcode() ==
ISD::ADD) {
25572 if (Index.getOpcode() ==
ISD::SHL &&
25573 Index.getOperand(0).getOpcode() ==
ISD::ADD) {
25583 Add.getOperand(0), ShiftOp);
25605 EVT IndexVT = Index.getValueType();
25610 EVT DataVT =
N->getOperand(1).getValueType();
25623 int64_t Stride = 0;
25629 else if (Index.getOpcode() ==
ISD::SHL &&
25635 Stride = Step << Shift->getZExtValue();
25643 if (Stride < std::numeric_limits<int32_t>::min() ||
25644 Stride > std::numeric_limits<int32_t>::max())
25648 unsigned MaxVScale =
25650 int64_t LastElementOffset =
25653 if (LastElementOffset < std::numeric_limits<int32_t>::min() ||
25654 LastElementOffset > std::numeric_limits<int32_t>::max())
25685 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
25698 SDValue Ops[] = {Chain, HG->getInc(), Mask, BasePtr,
25699 Index, Scale, HG->getIntID()};
25701 DL,
Ops, HG->getMemOperand(), IndexType);
25712 unsigned AddrOpIdx =
N->getNumOperands() - 1;
25713 SDValue Addr =
N->getOperand(AddrOpIdx);
25733 bool IsStore =
false;
25734 bool IsLaneOp =
false;
25735 bool IsDupOp =
false;
25736 unsigned NewOpc = 0;
25737 unsigned NumVecs = 0;
25738 unsigned IntNo =
N->getConstantOperandVal(1);
25741 case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
25742 NumVecs = 2;
break;
25743 case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
25744 NumVecs = 3;
break;
25745 case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
25746 NumVecs = 4;
break;
25747 case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
25748 NumVecs = 2; IsStore =
true;
break;
25749 case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
25750 NumVecs = 3; IsStore =
true;
break;
25751 case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
25752 NumVecs = 4; IsStore =
true;
break;
25753 case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
25754 NumVecs = 2;
break;
25755 case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
25756 NumVecs = 3;
break;
25757 case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
25758 NumVecs = 4;
break;
25759 case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
25760 NumVecs = 2; IsStore =
true;
break;
25761 case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
25762 NumVecs = 3; IsStore =
true;
break;
25763 case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
25764 NumVecs = 4; IsStore =
true;
break;
25765 case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
25766 NumVecs = 2; IsDupOp =
true;
break;
25767 case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
25768 NumVecs = 3; IsDupOp =
true;
break;
25769 case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
25770 NumVecs = 4; IsDupOp =
true;
break;
25771 case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
25772 NumVecs = 2; IsLaneOp =
true;
break;
25773 case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
25774 NumVecs = 3; IsLaneOp =
true;
break;
25775 case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
25776 NumVecs = 4; IsLaneOp =
true;
break;
25777 case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
25778 NumVecs = 2; IsStore =
true; IsLaneOp =
true;
break;
25779 case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
25780 NumVecs = 3; IsStore =
true; IsLaneOp =
true;
break;
25781 case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
25782 NumVecs = 4; IsStore =
true; IsLaneOp =
true;
break;
25787 VecTy =
N->getOperand(2).getValueType();
25789 VecTy =
N->getValueType(0);
25794 uint32_t IncVal = CInc->getZExtValue();
25796 if (IsLaneOp || IsDupOp)
25798 if (IncVal != NumBytes)
25803 Ops.push_back(
N->getOperand(0));
25805 if (IsLaneOp || IsStore)
25806 for (
unsigned i = 2; i < AddrOpIdx; ++i)
25807 Ops.push_back(
N->getOperand(i));
25808 Ops.push_back(Addr);
25809 Ops.push_back(Inc);
25813 unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
25815 for (n = 0; n < NumResultVecs; ++n)
25817 Tys[n++] = MVT::i64;
25818 Tys[n] = MVT::Other;
25827 std::vector<SDValue> NewResults;
25828 for (
unsigned i = 0; i < NumResultVecs; ++i) {
25831 NewResults.push_back(
SDValue(UpdN.
getNode(), NumResultVecs + 1));
25845 switch(V.getNode()->getOpcode()) {
25850 if ((LoadNode->
getMemoryVT() == MVT::i8 && width == 8)
25851 || (LoadNode->
getMemoryVT() == MVT::i16 && width == 16)) {
25859 if ((TypeNode->
getVT() == MVT::i8 && width == 8)
25860 || (TypeNode->
getVT() == MVT::i16 && width == 16)) {
25868 if ((TypeNode->
getVT() == MVT::i8 && width == 8)
25869 || (TypeNode->
getVT() == MVT::i16 && width == 16)) {
25878 1LL << (width - 1);
25948 int CompConstant) {
25952 int MaxUInt = (1 << width);
25960 AddConstant -= (1 << (width-1));
25965 if ((AddConstant == 0) ||
25966 (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
25967 (AddConstant >= 0 && CompConstant < 0) ||
25968 (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
25973 if ((AddConstant == 0) ||
25974 (AddConstant >= 0 && CompConstant <= 0) ||
25975 (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
25980 if ((AddConstant >= 0 && CompConstant < 0) ||
25981 (AddConstant <= 0 && CompConstant >= -1 &&
25982 CompConstant < AddConstant + MaxUInt))
25987 if ((AddConstant == 0) ||
25988 (AddConstant > 0 && CompConstant <= 0) ||
25989 (AddConstant < 0 && CompConstant <= AddConstant))
25994 if ((AddConstant >= 0 && CompConstant <= 0) ||
25995 (AddConstant <= 0 && CompConstant >= 0 &&
25996 CompConstant <= AddConstant + MaxUInt))
26001 if ((AddConstant > 0 && CompConstant < 0) ||
26002 (AddConstant < 0 && CompConstant >= 0 &&
26003 CompConstant < AddConstant + MaxUInt) ||
26004 (AddConstant >= 0 && CompConstant >= 0 &&
26005 CompConstant >= AddConstant) ||
26006 (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
26025 unsigned CCIndex,
unsigned CmpIndex,
26054 N->getOperand(CCIndex)->getValueType(0));
26062 assert((CCIndex == 2 && CmpIndex == 3) &&
26063 "Expected CCIndex to be 2 and CmpIndex to be 3.");
26064 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1), AArch64_CC,
26066 return DAG.
getNode(
N->getOpcode(),
N,
N->getVTList(),
Ops);
26073 unsigned CmpIndex) {
26075 SDNode *SubsNode =
N->getOperand(CmpIndex).getNode();
26076 unsigned CondOpcode = SubsNode->
getOpcode();
26086 unsigned MaskBits = 0;
26110 unsigned ShiftAmt = M.countl_zero();
26115 ShiftedC, ShiftedX);
26122 uint32_t CNV = CN->getZExtValue();
26125 else if (CNV == 65535)
26199 SDValue CSel = Cmp.getOperand(0);
26203 return DAG.
getNode(
N->getOpcode(),
DL,
N->getVTList(), Chain, Dest,
26209 unsigned CmpOpc = Cmp.getOpcode();
26210 if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
26215 if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
26222 "Expected the value type to be the same for both operands!");
26223 if (
LHS.getValueType() != MVT::i32 &&
LHS.getValueType() != MVT::i64)
26239 BR = DAG.
getNode(AArch64ISD::CBZ,
SDLoc(
N), MVT::Other, Chain,
LHS, Dest);
26241 BR = DAG.
getNode(AArch64ISD::CBNZ,
SDLoc(
N), MVT::Other, Chain,
LHS, Dest);
26250 unsigned CC =
N->getConstantOperandVal(2);
26255 Zero =
N->getOperand(0);
26256 CTTZ =
N->getOperand(1);
26258 Zero =
N->getOperand(1);
26259 CTTZ =
N->getOperand(0);
26265 CTTZ.getOperand(0).getOpcode() !=
ISD::CTTZ))
26268 assert((CTTZ.getValueType() == MVT::i32 || CTTZ.getValueType() == MVT::i64) &&
26269 "Illegal type in CTTZ folding");
26275 ? CTTZ.getOperand(0).getOperand(0)
26276 : CTTZ.getOperand(0);
26282 ? CTTZ.getOperand(0).getValueSizeInBits()
26283 : CTTZ.getValueSizeInBits();
26310 if (CmpRHS.
getOpcode() == AArch64ISD::CSEL)
26312 else if (CmpLHS.
getOpcode() != AArch64ISD::CSEL)
26335 else if (CmpRHS !=
X)
26344 EVT VT =
Op->getValueType(0);
26347 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, L, R, CCValue,
Cond);
26356 SDValue SubsNode =
N->getOperand(3);
26362 EVT VT =
N->getValueType(0);
26364 unsigned ExpectedOpcode;
26372 CmpOpConst->getValueType(0));
26374 CmpOpConst->getValueType(0));
26377 ExpectedOp = CmpOpToMatch;
26378 SubsOp = CmpOpToMatch;
26383 if (
Op.getOpcode() != ExpectedOpcode)
26385 if (
Op.getOperand(0).getOpcode() !=
ISD::ADD ||
26386 !
Op.getOperand(0).hasOneUse())
26390 if (
X != CmpOpOther)
26392 if (
X != CmpOpOther)
26394 if (ExpectedOp !=
Op.getOperand(1))
26402 SDValue TReassocOp = GetReassociationOp(
N->getOperand(0), ExpectedOp);
26403 SDValue FReassocOp = GetReassociationOp(
N->getOperand(1), ExpectedOp);
26404 if (!TReassocOp && !FReassocOp)
26411 auto Reassociate = [&](
SDValue ReassocOp,
unsigned OpNum) {
26413 return N->getOperand(OpNum);
26420 SDValue TValReassoc = Reassociate(TReassocOp, 0);
26421 SDValue FValReassoc = Reassociate(FReassocOp, 1);
26422 return DAG.
getNode(AArch64ISD::CSEL,
SDLoc(
N), VT, TValReassoc, FValReassoc,
26430 if (
SDValue R = Fold(CC, ExpectedOp, SubsOp))
26452 auto CheckedFold = [&](
bool Check,
APInt NewCmpConst,
26455 CmpOpConst->getValueType(0));
26457 CmpOpConst->getValueType(0));
26458 return Check ? Fold(NewCC, ExpectedOp, SubsOp) :
SDValue();
26463 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
26467 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
26470 return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
26473 return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
26476 return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
26479 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
26482 return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
26485 return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
26500 if (PTest.
getOpcode() != AArch64ISD::PTEST_ANY)
26506 if (TruePred.
getOpcode() == AArch64ISD::REINTERPRET_CAST)
26509 if (AnyPred.
getOpcode() == AArch64ISD::REINTERPRET_CAST)
26530 if (
N->getOperand(0) ==
N->getOperand(1))
26531 return N->getOperand(0);
26550 Cond.hasOneUse() &&
Cond->hasNUsesOfValue(0, 0) &&
26552 {Cond.getOperand(1), Cond.getOperand(0)}) &&
26554 {Cond.getOperand(0), Cond.getOperand(1)}) &&
26562 Cond.getOperand(1),
Cond.getOperand(0));
26563 return DAG.
getNode(AArch64ISD::CSEL,
DL,
N->getVTList(),
N->getOperand(0),
26580 EVT Op0MVT =
Op->getOperand(0).getValueType();
26586 SDNode *FirstUse = *
Op->user_begin();
26593 return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
26608 Op->getOperand(0));
26610 Op->getOperand(0));
26611 if (Op0SExt && (isSignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
26612 Op0ExtV =
SDValue(Op0SExt, 0);
26614 }
else if (Op0ZExt && (isUnsignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
26615 Op0ExtV =
SDValue(Op0ZExt, 0);
26621 Op0ExtV, Op1ExtV,
Op->getOperand(2));
26648 EVT VT =
N->getValueType(0);
26655 LHS->getOpcode() == AArch64ISD::CSEL &&
26657 LHS->hasOneUse()) {
26661 auto NewCond = getInvertedCondCode(OldCond);
26665 LHS.getOperand(0),
LHS.getOperand(1),
26673 LHS->hasOneUse()) {
26674 EVT TstVT =
LHS->getValueType(0);
26678 uint64_t TstImm = -1ULL <<
LHS->getConstantOperandVal(1);
26692 LHS->getOpcode() == ISD::BITCAST) {
26693 EVT ToVT =
LHS->getValueType(0);
26694 EVT FromVT =
LHS->getOperand(0).getValueType();
26698 LHS = DAG.
getNode(IsNull ? ISD::VECREDUCE_OR : ISD::VECREDUCE_AND,
26699 DL, MVT::i1,
LHS->getOperand(0));
26710 EVT CmpVT =
LHS.getValueType();
26717 SplatLHSVal.
isOne())
26727 unsigned GenericOpcode) {
26731 EVT VT =
N->getValueType(0);
26734 if (!
N->hasAnyUseOfValue(1)) {
26768 if (InnerSetCC->
getOpcode() != AArch64ISD::SETCC_MERGE_ZERO)
26776 if (Pred.getOpcode() == AArch64ISD::PTRUE &&
26777 InnerPred.
getOpcode() == AArch64ISD::PTRUE &&
26779 Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 &&
26780 Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256)
26787 if (V.getOpcode() != AArch64ISD::VASHR ||
26788 V.getOperand(0).getOpcode() != AArch64ISD::VSHL)
26791 unsigned BitWidth = V->getValueType(0).getScalarSizeInBits();
26792 unsigned ShiftAmtR = V.getConstantOperandVal(1);
26793 unsigned ShiftAmtL = V.getOperand(0).getConstantOperandVal(1);
26794 return (ShiftAmtR == ShiftAmtL && ShiftAmtR == (
BitWidth - 1));
26799 assert(
N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
26800 "Unexpected opcode!");
26813 LHS->getOperand(0)->getValueType(0) ==
N->getValueType(0)) {
26817 if (
LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
26818 LHS->getOperand(0)->getOperand(0) == Pred)
26819 return LHS->getOperand(0);
26825 return LHS->getOperand(0);
26834 LHS->getOperand(0), Pred);
26851 LHS.getValueType(), L0, Shl, L2);
26852 return DAG.
getNode(AArch64ISD::SETCC_MERGE_ZERO,
DL,
N->getValueType(0),
26853 Pred, NewLHS,
RHS,
N->getOperand(3));
26867 if (!
Op->hasOneUse())
26883 Bit < Op->getOperand(0).getValueSizeInBits()) {
26887 if (
Op->getNumOperands() != 2)
26894 switch (
Op->getOpcode()) {
26900 if ((
C->getZExtValue() >> Bit) & 1)
26906 if (
C->getZExtValue() <= Bit &&
26907 (Bit -
C->getZExtValue()) <
Op->getValueType(0).getSizeInBits()) {
26908 Bit = Bit -
C->getZExtValue();
26915 Bit = Bit +
C->getZExtValue();
26916 if (Bit >=
Op->getValueType(0).getSizeInBits())
26917 Bit =
Op->getValueType(0).getSizeInBits() - 1;
26922 if ((Bit +
C->getZExtValue()) <
Op->getValueType(0).getSizeInBits()) {
26923 Bit = Bit +
C->getZExtValue();
26930 if ((
C->getZExtValue() >> Bit) & 1)
26940 unsigned Bit =
N->getConstantOperandVal(2);
26941 bool Invert =
false;
26942 SDValue TestSrc =
N->getOperand(1);
26945 if (TestSrc == NewTestSrc)
26948 unsigned NewOpc =
N->getOpcode();
26950 if (NewOpc == AArch64ISD::TBZ)
26951 NewOpc = AArch64ISD::TBNZ;
26953 assert(NewOpc == AArch64ISD::TBNZ);
26954 NewOpc = AArch64ISD::TBZ;
26959 return DAG.
getNode(NewOpc,
DL, MVT::Other,
N->getOperand(0), NewTestSrc,
26969 auto SelectA =
N->getOperand(1);
26970 auto SelectB =
N->getOperand(2);
26971 auto NTy =
N->getValueType(0);
26973 if (!NTy.isScalableVector())
26979 switch (SelectB.getOpcode()) {
26987 if (SelectA != SelectB.getOperand(0))
26993 auto InverseSetCC =
26998 {InverseSetCC, SelectB, SelectA});
27011 SDValue IfTrue =
N->getOperand(1);
27012 SDValue IfFalse =
N->getOperand(2);
27013 EVT ResVT =
N->getValueType(0);
27017 return N->getOperand(1);
27020 return N->getOperand(2);
27047 SDNode *SplatLHS =
N->getOperand(1).getNode();
27048 SDNode *SplatRHS =
N->getOperand(2).getNode();
27050 if (CmpLHS.
getValueType() ==
N->getOperand(1).getValueType() &&
27053 MVT::v2i32, MVT::v4i32, MVT::v2i64}),
27097 EVT ResVT =
N->getValueType(0);
27109 "Scalar-SETCC feeding SELECT has unexpected result type!");
27115 if (SrcVT == MVT::i1 ||
27123 if (!ResVT.
isVector() || NumMaskElts == 0)
27158 return DAG.
getSelect(
DL, ResVT, Mask,
N->getOperand(1),
N->getOperand(2));
27163 EVT VT =
N->getValueType(0);
27177 if (
N->getOpcode() == AArch64ISD::DUP) {
27188 EVT MemVT = LD->getMemoryVT();
27191 (MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) &&
27192 ElemVT != MemVT && LD->hasOneUse()) {
27208 if (
Op.getOpcode() == AArch64ISD::FCMEQ ||
27209 Op.getOpcode() == AArch64ISD::FCMGE ||
27210 Op.getOpcode() == AArch64ISD::FCMGT) {
27212 EVT ExpandedVT = VT;
27231 SDValue EXTRACT_VEC_ELT =
N->getOperand(0);
27249 if (
N->getValueType(0) ==
N->getOperand(0).getValueType())
27250 return N->getOperand(0);
27251 if (
N->getOperand(0).getOpcode() == AArch64ISD::NVCAST)
27252 return DAG.
getNode(AArch64ISD::NVCAST,
SDLoc(
N),
N->getValueType(0),
27253 N->getOperand(0).getOperand(0));
27278 MinOffset = std::min(MinOffset,
C->getZExtValue());
27297 if (
Offset >= (1 << 20))
27302 if (!
T->isSized() ||
27328 "This method is only for scalable vectors of offsets");
27344 unsigned ScalarSizeInBytes) {
27346 if (OffsetInBytes % ScalarSizeInBytes)
27350 if (OffsetInBytes / ScalarSizeInBytes > 31)
27364 unsigned ScalarSizeInBytes) {
27372 bool OnlyPackedOffsets =
true) {
27373 const SDValue Src =
N->getOperand(2);
27374 const EVT SrcVT = Src->getValueType(0);
27376 "Scatter stores are only possible for SVE vectors");
27388 if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64) &&
27389 ((Opcode != AArch64ISD::SST1Q_PRED &&
27390 Opcode != AArch64ISD::SST1Q_INDEX_PRED) ||
27391 ((SrcVT != MVT::nxv8f16) && (SrcVT != MVT::nxv8bf16))))
27404 if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
27407 Opcode = AArch64ISD::SSTNT1_PRED;
27408 }
else if (Opcode == AArch64ISD::SST1Q_INDEX_PRED) {
27411 Opcode = AArch64ISD::SST1Q_PRED;
27419 if ((Opcode == AArch64ISD::SSTNT1_PRED || Opcode == AArch64ISD::SST1Q_PRED) &&
27420 Offset.getValueType().isVector())
27429 if (Opcode == AArch64ISD::SST1_IMM_PRED) {
27432 if (MVT::nxv4i32 ==
Base.getValueType().getSimpleVT().SimpleTy)
27433 Opcode = AArch64ISD::SST1_UXTW_PRED;
27435 Opcode = AArch64ISD::SST1_PRED;
27448 if (!OnlyPackedOffsets &&
27449 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
27468 if (Src.getValueType().isFloatingPoint())
27469 SrcNew = DAG.
getNode(ISD::BITCAST,
DL, HwSrcVt, Src);
27485 bool OnlyPackedOffsets =
true) {
27486 const EVT RetVT =
N->getValueType(0);
27488 "Gather loads are only possible for SVE vectors");
27506 if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
27509 Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
27510 }
else if (Opcode == AArch64ISD::GLD1Q_INDEX_MERGE_ZERO) {
27513 Opcode = AArch64ISD::GLD1Q_MERGE_ZERO;
27521 if ((Opcode == AArch64ISD::GLDNT1_MERGE_ZERO ||
27522 Opcode == AArch64ISD::GLD1Q_MERGE_ZERO) &&
27523 Offset.getValueType().isVector())
27532 if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
27533 Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
27536 if (MVT::nxv4i32 ==
Base.getValueType().getSimpleVT().SimpleTy)
27537 Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
27538 ? AArch64ISD::GLD1_UXTW_MERGE_ZERO
27539 : AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
27541 Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
27542 ? AArch64ISD::GLD1_MERGE_ZERO
27543 : AArch64ISD::GLDFF1_MERGE_ZERO;
27556 if (!OnlyPackedOffsets &&
27557 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
27578 if (RetVT.
isInteger() && (RetVT != HwRetVt))
27584 Load = DAG.
getNode(ISD::BITCAST,
DL, RetVT, Load.getValue(0));
27594 unsigned Opc = Src->getOpcode();
27597 if (
Opc == AArch64ISD::UUNPKHI ||
Opc == AArch64ISD::UUNPKLO) {
27599 unsigned SOpc =
Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
27600 : AArch64ISD::SUNPKLO;
27615 assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
27616 "Sign extending from an invalid type");
27623 return DAG.
getNode(SOpc,
DL,
N->getValueType(0), Ext);
27627 if (
Opc == AArch64ISD::CSEL &&
27629 EVT VT =
N->getValueType(0);
27635 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT, TVal,
27641 return DAG.
getNode(AArch64ISD::CSEL,
DL, VT,
27655 unsigned MemVTOpNum = 4;
27657 case AArch64ISD::LD1_MERGE_ZERO:
27658 NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
27661 case AArch64ISD::LDNF1_MERGE_ZERO:
27662 NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
27665 case AArch64ISD::LDFF1_MERGE_ZERO:
27666 NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
27669 case AArch64ISD::GLD1_MERGE_ZERO:
27670 NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
27672 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
27673 NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
27675 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
27676 NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
27678 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
27679 NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
27681 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
27682 NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
27684 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
27685 NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
27687 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
27688 NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
27690 case AArch64ISD::GLDFF1_MERGE_ZERO:
27691 NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
27693 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
27694 NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
27696 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
27697 NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
27699 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
27700 NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
27702 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
27703 NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
27705 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
27706 NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
27708 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
27709 NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
27711 case AArch64ISD::GLDNT1_MERGE_ZERO:
27712 NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
27721 if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
27724 EVT DstVT =
N->getValueType(0);
27728 for (
unsigned I = 0;
I < Src->getNumOperands(); ++
I)
27729 Ops.push_back(Src->getOperand(
I));
27743 const unsigned OffsetPos = 4;
27747 if (
Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
27766 unsigned ScalarSizeInBytes) {
27767 const unsigned ImmPos = 4, OffsetPos = 3;
27787 switch (
Op.getOpcode()) {
27790 case AArch64ISD::ANDV_PRED:
27791 case AArch64ISD::EORV_PRED:
27792 case AArch64ISD::FADDA_PRED:
27793 case AArch64ISD::FADDV_PRED:
27794 case AArch64ISD::FMAXNMV_PRED:
27795 case AArch64ISD::FMAXV_PRED:
27796 case AArch64ISD::FMINNMV_PRED:
27797 case AArch64ISD::FMINV_PRED:
27798 case AArch64ISD::ORV_PRED:
27799 case AArch64ISD::SADDV_PRED:
27800 case AArch64ISD::SMAXV_PRED:
27801 case AArch64ISD::SMINV_PRED:
27802 case AArch64ISD::UADDV_PRED:
27803 case AArch64ISD::UMAXV_PRED:
27804 case AArch64ISD::UMINV_PRED:
27812 switch (
Op.getOpcode()) {
27815 case AArch64ISD::REINTERPRET_CAST:
27819 case AArch64ISD::PTRUE:
27820 return Op.getConstantOperandVal(0) == AArch64SVEPredPattern::all;
27826 SDValue InsertVec =
N->getOperand(0);
27827 SDValue InsertElt =
N->getOperand(1);
27828 SDValue InsertIdx =
N->getOperand(2);
27872 EVT VT =
N->getValueType(0);
27878 auto hasValidElementTypeForFPExtLoad = [](
EVT VT) {
27880 return EltVT == MVT::f32 || EltVT == MVT::f64;
27908 EVT VT =
N->getValueType(0);
27911 if (!VT.
isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME())
27927 EVT VT =
N->getValueType(0);
27929 SDValue Insert =
N->getOperand(0);
27933 if (!Insert.getOperand(0).isUndef())
27936 uint64_t IdxInsert = Insert.getConstantOperandVal(2);
27937 uint64_t IdxDupLane =
N->getConstantOperandVal(1);
27938 if (IdxInsert != 0 || IdxDupLane != 0)
27941 SDValue Bitcast = Insert.getOperand(1);
27942 if (Bitcast.getOpcode() != ISD::BITCAST)
27945 SDValue Subvec = Bitcast.getOperand(0);
27955 DAG.
getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
27956 SDValue NewDuplane128 = DAG.
getNode(AArch64ISD::DUPLANE128,
DL, NewSubvecVT,
27957 NewInsert,
N->getOperand(1));
27958 return DAG.
getNode(ISD::BITCAST,
DL, VT, NewDuplane128);
27981 if (
LHS.getOpcode() == ISD::BITCAST)
27982 ExtractHigh =
LHS.getOperand(0);
27988 if (
RHS.getOpcode() == ISD::BITCAST)
27989 ExtractHigh =
RHS.getOperand(0);
28000 if (TruncHighOp.
getOpcode() == AArch64ISD::DUP ||
28018 bool HasFoundMULLow =
true;
28020 if (ExtractHighSrcVec->
use_size() != 2)
28021 HasFoundMULLow =
false;
28030 HasFoundMULLow =
false;
28037 if (!ExtractLow || !ExtractLow->
hasOneUse())
28038 HasFoundMULLow =
false;
28041 if (HasFoundMULLow) {
28043 if (ExtractLowUser->
getOpcode() !=
N->getOpcode()) {
28044 HasFoundMULLow =
false;
28046 if (ExtractLowUser->
getOperand(0) == ExtractLow) {
28050 HasFoundMULLow =
false;
28055 HasFoundMULLow =
false;
28068 if (HasFoundMULLow && (TruncLowOp.
getOpcode() == AArch64ISD::DUP ||
28073 if (TruncHighOpVT != UZP1VT)
28074 TruncHighOp = DAG.
getNode(ISD::BITCAST,
DL, UZP1VT, TruncHighOp);
28075 if (TruncLowOpVT != UZP1VT)
28076 TruncLowOp = DAG.
getNode(ISD::BITCAST,
DL, UZP1VT, TruncLowOp);
28079 DAG.
getNode(AArch64ISD::UZP1,
DL, UZP1VT, TruncLowOp, TruncHighOp);
28086 if (HasFoundMULLow) {
28116 auto Mask =
N->getOperand(0);
28117 auto Pred =
N->getOperand(1);
28122 if (Pred->getOpcode() == AArch64ISD::REINTERPRET_CAST)
28123 Pred = Pred->getOperand(0);
28126 Pred = Pred->getOperand(0);
28127 Pred = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv16i1, Pred);
28128 return DAG.
getNode(AArch64ISD::PTEST_FIRST,
DL,
N->getValueType(0), Mask,
28160 EVT VT =
N->getValueType(0);
28161 if (VT != MVT::v1i64)
28177 if (UADDLV.
getOpcode() != AArch64ISD::UADDLV ||
28187 DAG.
getNode(AArch64ISD::NVCAST,
DL, MVT::v1i64, EXTRACT_SUBVEC);
28197 unsigned NumParts =
N->getNumOperands();
28198 if (NumParts != 2 && NumParts != 4)
28201 EVT SubVecTy =
N->getValueType(0);
28214 for (
unsigned I = 0;
I < NumParts;
I++) {
28230 if (!MaskedLoad || !MaskedLoad->hasNUsesOfValue(NumParts, 0) ||
28232 !MaskedLoad->getOffset().isUndef() ||
28233 (!MaskedLoad->getPassThru()->isUndef() &&
28240 getNarrowMaskForInterleavedOps(DAG,
DL, MaskedLoad->getMask(), NumParts);
28244 const Intrinsic::ID IID = NumParts == 2 ? Intrinsic::aarch64_sve_ld2_sret
28245 : Intrinsic::aarch64_sve_ld4_sret;
28246 SDValue NewLdOps[] = {MaskedLoad->getChain(),
28248 MaskedLoad->getBasePtr()};
28252 {SubVecTy, SubVecTy, MVT::Other}, NewLdOps);
28255 {SubVecTy, SubVecTy, SubVecTy, SubVecTy, MVT::Other},
28260 for (
unsigned Idx = 0; Idx < NumParts; Idx++)
28291 if (
N->hasOneUse()) {
28292 unsigned UseOpc =
N->user_begin()->getOpcode();
28294 UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS)
28299 EVT VT =
N->getValueType(0);
28314 unsigned IntrinsicID =
N->getConstantOperandVal(1);
28316 (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
28317 : AArch64SysReg::RNDRRS);
28342 EVT VT =
N->getValueType(0);
28343 EVT MaskVT = Mask.getValueType();
28362 switch (
N->getOpcode()) {
28368 case ISD::VECREDUCE_AND:
28369 case ISD::VECREDUCE_OR:
28370 case ISD::VECREDUCE_XOR:
28381 case AArch64ISD::ANDS:
28383 case AArch64ISD::ADC:
28387 case AArch64ISD::SBC:
28389 case AArch64ISD::ADCS:
28393 case AArch64ISD::SBCS:
28397 case AArch64ISD::ADDS:
28399 case AArch64ISD::SUBS:
28401 case AArch64ISD::BICi: {
28404 APInt DemandedElts =
28452 return performLOADCombine(
N, DCI, DAG, Subtarget);
28454 return performSTORECombine(
N, DCI, DAG, Subtarget);
28456 return performMSTORECombine(
N, DCI, DAG, Subtarget);
28458 case ISD::MSCATTER:
28459 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM:
28461 case ISD::FP_EXTEND:
28463 case AArch64ISD::BRCOND:
28465 case AArch64ISD::TBNZ:
28466 case AArch64ISD::TBZ:
28468 case AArch64ISD::CSEL:
28470 case AArch64ISD::DUP:
28471 case AArch64ISD::DUPLANE8:
28472 case AArch64ISD::DUPLANE16:
28473 case AArch64ISD::DUPLANE32:
28474 case AArch64ISD::DUPLANE64:
28476 case AArch64ISD::DUPLANE128:
28478 case AArch64ISD::NVCAST:
28480 case AArch64ISD::SPLICE:
28482 case AArch64ISD::UUNPKLO:
28483 case AArch64ISD::UUNPKHI:
28485 case AArch64ISD::UZP1:
28486 case AArch64ISD::UZP2:
28488 case AArch64ISD::SETCC_MERGE_ZERO:
28490 case AArch64ISD::REINTERPRET_CAST:
28492 case AArch64ISD::GLD1_MERGE_ZERO:
28493 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
28494 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
28495 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
28496 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
28497 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
28498 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
28499 case AArch64ISD::GLD1S_MERGE_ZERO:
28500 case AArch64ISD::GLD1S_SCALED_MERGE_ZERO:
28501 case AArch64ISD::GLD1S_UXTW_MERGE_ZERO:
28502 case AArch64ISD::GLD1S_SXTW_MERGE_ZERO:
28503 case AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO:
28504 case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO:
28505 case AArch64ISD::GLD1S_IMM_MERGE_ZERO:
28507 case AArch64ISD::VASHR:
28508 case AArch64ISD::VLSHR:
28510 case AArch64ISD::SUNPKLO:
28512 case AArch64ISD::BSP:
28518 case ISD::VECREDUCE_ADD:
28520 case ISD::GET_ACTIVE_LANE_MASK:
28522 case AArch64ISD::UADDV:
28524 case AArch64ISD::SMULL:
28525 case AArch64ISD::UMULL:
28526 case AArch64ISD::PMULL:
28528 case AArch64ISD::PTEST_FIRST:
28532 switch (
N->getConstantOperandVal(1)) {
28533 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
28535 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
28537 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
28539 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
28541 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
28542 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
28543 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
28544 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
28545 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
28546 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
28547 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
28548 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
28550 case Intrinsic::aarch64_neon_ld2:
28551 case Intrinsic::aarch64_neon_ld3:
28552 case Intrinsic::aarch64_neon_ld4:
28553 case Intrinsic::aarch64_neon_ld1x2:
28554 case Intrinsic::aarch64_neon_ld1x3:
28555 case Intrinsic::aarch64_neon_ld1x4:
28556 case Intrinsic::aarch64_neon_ld2lane:
28557 case Intrinsic::aarch64_neon_ld3lane:
28558 case Intrinsic::aarch64_neon_ld4lane:
28559 case Intrinsic::aarch64_neon_ld2r:
28560 case Intrinsic::aarch64_neon_ld3r:
28561 case Intrinsic::aarch64_neon_ld4r:
28562 case Intrinsic::aarch64_neon_st2:
28563 case Intrinsic::aarch64_neon_st3:
28564 case Intrinsic::aarch64_neon_st4:
28565 case Intrinsic::aarch64_neon_st1x2:
28566 case Intrinsic::aarch64_neon_st1x3:
28567 case Intrinsic::aarch64_neon_st1x4:
28568 case Intrinsic::aarch64_neon_st2lane:
28569 case Intrinsic::aarch64_neon_st3lane:
28570 case Intrinsic::aarch64_neon_st4lane:
28572 case Intrinsic::aarch64_sve_ldnt1:
28574 case Intrinsic::aarch64_sve_ld1rq:
28576 case Intrinsic::aarch64_sve_ld1ro:
28578 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
28580 case Intrinsic::aarch64_sve_ldnt1_gather:
28582 case Intrinsic::aarch64_sve_ldnt1_gather_index:
28584 AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
28585 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
28587 case Intrinsic::aarch64_sve_ld1:
28589 case Intrinsic::aarch64_sve_ldnf1:
28591 case Intrinsic::aarch64_sve_ldff1:
28593 case Intrinsic::aarch64_sve_st1:
28595 case Intrinsic::aarch64_sve_stnt1:
28597 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
28599 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
28601 case Intrinsic::aarch64_sve_stnt1_scatter:
28603 case Intrinsic::aarch64_sve_stnt1_scatter_index:
28605 case Intrinsic::aarch64_sve_ld1_gather:
28607 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
28608 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
28610 case Intrinsic::aarch64_sve_ld1q_gather_index:
28612 AArch64ISD::GLD1Q_INDEX_MERGE_ZERO);
28613 case Intrinsic::aarch64_sve_ld1_gather_index:
28615 AArch64ISD::GLD1_SCALED_MERGE_ZERO);
28616 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
28619 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
28622 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
28624 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
28626 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
28628 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
28630 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
28632 case Intrinsic::aarch64_sve_ldff1_gather:
28634 case Intrinsic::aarch64_sve_ldff1_gather_index:
28636 AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
28637 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
28639 AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
28641 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
28643 AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
28645 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
28647 AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
28649 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
28651 AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
28653 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
28655 AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
28656 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
28657 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
28659 case Intrinsic::aarch64_sve_st1q_scatter_index:
28661 case Intrinsic::aarch64_sve_st1_scatter:
28663 case Intrinsic::aarch64_sve_st1_scatter_index:
28665 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
28668 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
28671 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
28673 AArch64ISD::SST1_SXTW_SCALED_PRED,
28675 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
28677 AArch64ISD::SST1_UXTW_SCALED_PRED,
28679 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
28681 case Intrinsic::aarch64_rndr:
28682 case Intrinsic::aarch64_rndrrs:
28684 case Intrinsic::aarch64_sme_ldr_zt:
28686 DAG.
getVTList(MVT::Other),
N->getOperand(0),
28687 N->getOperand(2),
N->getOperand(3));
28688 case Intrinsic::aarch64_sme_str_zt:
28690 DAG.
getVTList(MVT::Other),
N->getOperand(0),
28691 N->getOperand(2),
N->getOperand(3));
28714bool AArch64TargetLowering::isUsedByReturnOnly(
SDNode *
N,
28716 if (
N->getNumValues() != 1)
28718 if (!
N->hasNUsesOfValue(1, 0))
28722 SDNode *Copy = *
N->user_begin();
28726 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
28729 TCChain = Copy->getOperand(0);
28730 }
else if (Copy->getOpcode() != ISD::FP_EXTEND)
28733 bool HasRet =
false;
28735 if (
Node->getOpcode() != AArch64ISD::RET_GLUE)
28751bool AArch64TargetLowering::mayBeEmittedAsTailCall(
const CallInst *CI)
const {
28759 if (!CstOffset || CstOffset->isZero())
28765 return isInt<9>(CstOffset->getSExtValue());
28768bool AArch64TargetLowering::getIndexedAddressParts(
SDNode *
N,
SDNode *
Op,
28776 SDNode *ValOnlyUser =
nullptr;
28777 for (SDUse &U :
N->uses()) {
28778 if (
U.getResNo() == 1)
28780 if (ValOnlyUser ==
nullptr)
28781 ValOnlyUser =
U.getUser();
28783 ValOnlyUser =
nullptr;
28788 auto IsUndefOrZero = [](
SDValue V) {
28796 (ValOnlyUser->
getOpcode() == AArch64ISD::DUP_MERGE_PASSTHRU &&
28797 IsUndefOrZero(ValOnlyUser->
getOperand(2)))))
28800 Base =
Op->getOperand(0);
28804 int64_t RHSC =
RHS->getSExtValue();
28806 RHSC = -(uint64_t)RHSC;
28812 if (!Subtarget->isLittleEndian() && MemType.
isVector() &&
28830 VT =
LD->getMemoryVT();
28831 Ptr =
LD->getBasePtr();
28833 VT =
ST->getMemoryVT();
28834 Ptr =
ST->getBasePtr();
28844bool AArch64TargetLowering::getPostIndexedAddressParts(
28850 VT =
LD->getMemoryVT();
28851 Ptr =
LD->getBasePtr();
28853 VT =
ST->getMemoryVT();
28854 Ptr =
ST->getBasePtr();
28873 EVT VT =
N->getValueType(0);
28874 [[maybe_unused]]
EVT SrcVT =
Op.getValueType();
28876 "Must be bool vector.");
28882 bool AllUndef =
true;
28883 for (
unsigned I = 1;
I <
Op.getNumOperands(); ++
I)
28884 AllUndef &=
Op.getOperand(
I).isUndef();
28887 Op =
Op.getOperand(0);
28890 SDValue VectorBits = vectorToScalarBitmask(
Op.getNode(), DAG);
28901 EVT VT =
N->getValueType(0);
28911void AArch64TargetLowering::ReplaceBITCASTResults(
28915 EVT VT =
N->getValueType(0);
28916 EVT SrcVT =
Op.getValueType();
28918 if (VT == MVT::v2i16 && SrcVT == MVT::i32) {
28923 if (VT == MVT::v4i8 && SrcVT == MVT::i32) {
28928 if (VT == MVT::v2i8 && SrcVT == MVT::i16) {
28935 "Expected fp->int bitcast!");
28954 if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
28966 EVT VT =
N->getValueType(0);
28969 !
N->getFlags().hasAllowReassociation()) ||
28970 (VT.
getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
28978 X =
N->getOperand(1);
28983 if (Shuf->getOperand(0) !=
X || !Shuf->getOperand(1)->isUndef())
28988 for (
int I = 0,
E = Mask.size();
I <
E;
I++)
28989 if (Mask[
I] != (
I % 2 == 0 ?
I + 1 :
I - 1))
28994 assert(LoHi.first.getValueType() == LoHi.second.getValueType());
28995 SDValue Addp = DAG.
getNode(AArch64ISD::ADDP,
N, LoHi.first.getValueType(),
28996 LoHi.first, LoHi.second);
29007 DAG.
getUNDEF(LoHi.first.getValueType())),
29014 unsigned AcrossOp) {
29025void AArch64TargetLowering::ReplaceExtractSubVectorResults(
29028 EVT InVT =
In.getValueType();
29035 EVT VT =
N->getValueType(0);
29048 unsigned Index = CIndex->getZExtValue();
29053 : (unsigned)AArch64ISD::UUNPKHI;
29060void AArch64TargetLowering::ReplaceGetActiveLaneMaskResults(
29062 assert((Subtarget->hasSVE2p1() ||
29063 (Subtarget->hasSME2() && Subtarget->isStreaming())) &&
29064 "Custom lower of get.active.lane.mask missing required feature.");
29066 assert(
N->getValueType(0) == MVT::nxv32i1 &&
29067 "Unexpected result type for get.active.lane.mask");
29074 "Unexpected operand type for get.active.lane.mask");
29088 {WideMask.getValue(0), WideMask.getValue(1)}));
29094 auto [VLo, VHi] = DAG.
SplitScalar(V,
DL, MVT::i64, MVT::i64);
29101 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
29110 assert(
N->getValueType(0) == MVT::i128 &&
29111 "AtomicCmpSwap on types less than 128 should be legal");
29114 if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
29125 switch (
MemOp->getMergedOrdering()) {
29127 Opcode = AArch64::CASPX;
29130 Opcode = AArch64::CASPAX;
29133 Opcode = AArch64::CASPLX;
29137 Opcode = AArch64::CASPALX;
29147 unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
29161 switch (
MemOp->getMergedOrdering()) {
29163 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
29166 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
29169 Opcode = AArch64::CMP_SWAP_128_RELEASE;
29173 Opcode = AArch64::CMP_SWAP_128;
29180 auto Desired = DAG.
SplitScalar(
N->getOperand(2),
DL, MVT::i64, MVT::i64);
29181 auto New = DAG.
SplitScalar(
N->getOperand(3),
DL, MVT::i64, MVT::i64);
29182 SDValue Ops[] = {
N->getOperand(1), Desired.first, Desired.second,
29183 New.first, New.second,
N->getOperand(0)};
29185 Opcode,
SDLoc(
N), DAG.
getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
29200 assert(ISDOpcode != ISD::ATOMIC_LOAD_CLR &&
29201 "ATOMIC_LOAD_AND should be lowered to LDCLRP directly");
29202 assert(ISDOpcode != ISD::ATOMIC_LOAD_ADD &&
"There is no 128 bit LDADD");
29203 assert(ISDOpcode != ISD::ATOMIC_LOAD_SUB &&
"There is no 128 bit LDSUB");
29205 if (ISDOpcode == ISD::ATOMIC_LOAD_AND) {
29207 switch (Ordering) {
29209 return AArch64::LDCLRP;
29212 return AArch64::LDCLRPA;
29215 return AArch64::LDCLRPL;
29219 return AArch64::LDCLRPAL;
29226 if (ISDOpcode == ISD::ATOMIC_LOAD_OR) {
29227 switch (Ordering) {
29229 return AArch64::LDSETP;
29232 return AArch64::LDSETPA;
29235 return AArch64::LDSETPL;
29239 return AArch64::LDSETPAL;
29246 if (ISDOpcode == ISD::ATOMIC_SWAP) {
29247 switch (Ordering) {
29249 return AArch64::SWPP;
29252 return AArch64::SWPPA;
29255 return AArch64::SWPPL;
29259 return AArch64::SWPPAL;
29281 assert(
N->getValueType(0) == MVT::i128 &&
29282 "AtomicLoadXXX on types less than 128 should be legal");
29284 if (!Subtarget->hasLSE128())
29288 const SDValue &Chain =
N->getOperand(0);
29289 const SDValue &Ptr =
N->getOperand(1);
29290 const SDValue &Val128 =
N->getOperand(2);
29291 std::pair<SDValue, SDValue> Val2x64 =
29294 const unsigned ISDOpcode =
N->getOpcode();
29295 const unsigned MachineOpcode =
29298 if (ISDOpcode == ISD::ATOMIC_LOAD_AND) {
29308 SDValue Ops[] = {Val2x64.first, Val2x64.second, Ptr, Chain};
29326void AArch64TargetLowering::ReplaceNodeResults(
29328 switch (
N->getOpcode()) {
29332 ReplaceBITCASTResults(
N,
Results, DAG);
29334 case ISD::VECREDUCE_ADD:
29335 case ISD::VECREDUCE_SMAX:
29336 case ISD::VECREDUCE_SMIN:
29337 case ISD::VECREDUCE_UMAX:
29338 case ISD::VECREDUCE_UMIN:
29355 case AArch64ISD::SADDV:
29358 case AArch64ISD::UADDV:
29361 case AArch64ISD::SMINV:
29364 case AArch64ISD::UMINV:
29367 case AArch64ISD::SMAXV:
29370 case AArch64ISD::UMAXV:
29376 LowerToPredicatedOp(
SDValue(
N, 0), DAG, AArch64ISD::MULHS_PRED));
29381 LowerToPredicatedOp(
SDValue(
N, 0), DAG, AArch64ISD::MULHU_PRED));
29387 assert(
N->getValueType(0) == MVT::i128 &&
"unexpected illegal conversion");
29390 case ISD::ATOMIC_CMP_SWAP:
29393 case ISD::ATOMIC_LOAD_CLR:
29394 assert(
N->getValueType(0) != MVT::i128 &&
29395 "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP");
29397 case ISD::ATOMIC_LOAD_AND:
29398 case ISD::ATOMIC_LOAD_OR:
29399 case ISD::ATOMIC_SWAP: {
29401 "Expected 128-bit atomicrmw.");
29406 case ISD::ADDRSPACECAST: {
29411 case ISD::ATOMIC_LOAD:
29417 if (LoadNode->
isNonTemporal() && Subtarget->isLittleEndian() &&
29426 AArch64ISD::LDNP, SDLoc(
N),
29427 DAG.
getVTList({MVT::v2i64, MVT::v2i64, MVT::Other}),
29428 {LoadNode->getChain(), LoadNode->getBasePtr()},
29447 bool isLoadAcquire =
29449 unsigned Opcode = isLoadAcquire ? AArch64ISD::LDIAPP : AArch64ISD::LDP;
29452 assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
29455 Opcode, SDLoc(
N), DAG.
getVTList({MVT::i64, MVT::i64, MVT::Other}),
29456 {LoadNode->getChain(), LoadNode->getBasePtr()},
29463 Result.getValue(FirstRes),
Result.getValue(1 - FirstRes));
29469 ReplaceExtractSubVectorResults(
N,
Results, DAG);
29477 case ISD::GET_ACTIVE_LANE_MASK:
29478 ReplaceGetActiveLaneMaskResults(
N,
Results, DAG);
29481 EVT VT =
N->getValueType(0);
29488 case Intrinsic::aarch64_sve_clasta_n: {
29489 assert((VT == MVT::i8 || VT == MVT::i16) &&
29490 "custom lowering for unexpected type");
29493 auto V = DAG.
getNode(AArch64ISD::CLASTA_N,
DL, MVT::i32,
29494 N->getOperand(1), Op2,
N->getOperand(3));
29498 case Intrinsic::aarch64_sve_clastb_n: {
29499 assert((VT == MVT::i8 || VT == MVT::i16) &&
29500 "custom lowering for unexpected type");
29503 auto V = DAG.
getNode(AArch64ISD::CLASTB_N,
DL, MVT::i32,
29504 N->getOperand(1), Op2,
N->getOperand(3));
29508 case Intrinsic::aarch64_sve_lasta: {
29509 assert((VT == MVT::i8 || VT == MVT::i16) &&
29510 "custom lowering for unexpected type");
29512 auto V = DAG.
getNode(AArch64ISD::LASTA,
DL, MVT::i32,
29513 N->getOperand(1),
N->getOperand(2));
29517 case Intrinsic::aarch64_sve_lastb: {
29518 assert((VT == MVT::i8 || VT == MVT::i16) &&
29519 "custom lowering for unexpected type");
29521 auto V = DAG.
getNode(AArch64ISD::LASTB,
DL, MVT::i32,
29522 N->getOperand(1),
N->getOperand(2));
29526 case Intrinsic::aarch64_sme_in_streaming_mode: {
29531 getRuntimePStateSM(DAG, Chain,
DL,
N->getValueType(0));
29536 case Intrinsic::experimental_vector_match: {
29554 assert(
N->getValueType(0) == MVT::i128 &&
29555 "READ_REGISTER custom lowering is only for 128-bit sysregs");
29560 AArch64ISD::MRRS,
DL, DAG.
getVTList({MVT::i64, MVT::i64, MVT::Other}),
29561 Chain, SysRegName);
29575 if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
29590 if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
29600 if (!Subtarget->hasLSE2())
29604 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
29605 LI->getAlign() >=
Align(16);
29608 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
29615 if (!Subtarget->hasLSE128())
29621 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
29622 SI->getAlign() >=
Align(16) &&
29627 return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
29628 RMW->getAlign() >=
Align(16) &&
29637 if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
29641 return LI->getType()->getPrimitiveSizeInBits() == 128 &&
29642 LI->getAlign() >=
Align(16) &&
29646 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
29647 SI->getAlign() >=
Align(16) &&
29670 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
29673 switch (
I->getOpcode()) {
29676 case Instruction::AtomicCmpXchg:
29679 case Instruction::AtomicRMW:
29682 case Instruction::Store:
29693 unsigned Size =
SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
29746 return !Subtarget.hasFPARMv8();
29762 unsigned Size = Ty->getPrimitiveSizeInBits();
29763 assert(
Size <= 128 &&
"AtomicExpandPass should've handled larger sizes.");
29765 bool CanUseLSE128 = Subtarget->hasLSE128() &&
Size == 128 &&
29782 if (Subtarget->hasLSE()) {
29800 if (Subtarget->outlineAtomics()) {
29838 if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
29860 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
29866 if (ValueTy->getPrimitiveSizeInBits() == 128) {
29868 IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
29871 Builder.CreateIntrinsic(
Int, Addr,
nullptr,
"lohi");
29873 Value *
Lo = Builder.CreateExtractValue(LoHi, 0,
"lo");
29874 Value *
Hi = Builder.CreateExtractValue(LoHi, 1,
"hi");
29877 Lo = Builder.CreateZExt(
Lo, Int128Ty,
"lo64");
29878 Hi = Builder.CreateZExt(
Hi, Int128Ty,
"hi64");
29880 Value *
Or = Builder.CreateOr(
29881 Lo, Builder.CreateShl(
Hi, ConstantInt::get(Int128Ty, 64)),
"val64");
29882 return Builder.CreateBitCast(
Or, ValueTy);
29887 IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
29891 CallInst *CI = Builder.CreateIntrinsic(
Int, Tys, Addr);
29893 Attribute::ElementType, IntEltTy));
29894 Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
29896 return Builder.CreateBitCast(Trunc, ValueTy);
29901 Builder.CreateIntrinsic(Intrinsic::aarch64_clrex, {});
29907 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
29915 IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
29920 Value *CastVal = Builder.CreateBitCast(Val, Int128Ty);
29922 Value *
Lo = Builder.CreateTrunc(CastVal, Int64Ty,
"lo");
29924 Builder.CreateTrunc(Builder.CreateLShr(CastVal, 64), Int64Ty,
"hi");
29925 return Builder.CreateCall(Stxr, {
Lo,
Hi, Addr});
29929 IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
29935 Val = Builder.CreateBitCast(Val, IntValTy);
29937 CallInst *CI = Builder.CreateCall(
29938 Stxr, {Builder.CreateZExtOrBitCast(
29942 Attribute::ElementType, Val->
getType()));
29949 if (!Ty->isArrayTy()) {
29950 const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
29960bool AArch64TargetLowering::shouldNormalizeToSelectSequence(
LLVMContext &,
29968 M, Intrinsic::thread_pointer, IRB.
getPtrTy());
29979 if (Subtarget->isTargetAndroid())
29984 if (Subtarget->isTargetFuchsia())
29992 RTLIB::LibcallImpl SecurityCheckCookieLibcall =
29995 RTLIB::LibcallImpl SecurityCookieVar =
29997 if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&
29998 SecurityCookieVar != RTLIB::Unsupported) {
30010 F->addParamAttr(0, Attribute::AttrKind::InReg);
30022 if (Subtarget->isTargetAndroid())
30031 const Constant *PersonalityFn)
const {
30033 return AArch64::X0;
30039 const Constant *PersonalityFn)
const {
30041 return AArch64::X1;
30054 return Mask->getValue().isPowerOf2();
30060 unsigned OldShiftOpcode,
unsigned NewShiftOpcode,
30064 X, XC, CC,
Y, OldShiftOpcode, NewShiftOpcode, DAG))
30067 return X.getValueType().isScalarInteger() || NewShiftOpcode ==
ISD::SHL;
30074 !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
30090 const MCPhysReg *IStart =
TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
30099 if (AArch64::GPR64RegClass.
contains(*
I))
30100 RC = &AArch64::GPR64RegClass;
30101 else if (AArch64::FPR64RegClass.
contains(*
I))
30102 RC = &AArch64::FPR64RegClass;
30112 assert(Entry->getParent()->getFunction().hasFnAttribute(
30113 Attribute::NoUnwind) &&
30114 "Function should be nounwind in insertCopiesSplitCSR!");
30115 Entry->addLiveIn(*
I);
30120 for (
auto *Exit : Exits)
30122 TII->get(TargetOpcode::COPY), *
I)
30135 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
30163 if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
30165 if (FPVT == MVT::v8bf16)
30181 "Invalid call instruction for a KCFI check");
30183 switch (
MBBI->getOpcode()) {
30185 case AArch64::BLRNoIP:
30186 case AArch64::TCRETURNri:
30187 case AArch64::TCRETURNrix16x17:
30188 case AArch64::TCRETURNrix17:
30189 case AArch64::TCRETURNrinotx16:
30196 assert(
Target.isReg() &&
"Invalid target operand for an indirect call");
30197 Target.setIsRenamable(
false);
30211 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
30217void AArch64TargetLowering::finalizeLowering(
MachineFunction &MF)
const {
30243bool AArch64TargetLowering::shouldLocalize(
30245 auto &MF = *
MI.getMF();
30247 auto maxUses = [](
unsigned RematCost) {
30249 if (RematCost == 1)
30250 return std::numeric_limits<unsigned>::max();
30251 if (RematCost == 2)
30260 unsigned Opc =
MI.getOpcode();
30262 case TargetOpcode::G_GLOBAL_VALUE: {
30271 case TargetOpcode::G_FCONSTANT:
30272 case TargetOpcode::G_CONSTANT: {
30273 const ConstantInt *CI;
30274 unsigned AdditionalCost = 0;
30276 if (
Opc == TargetOpcode::G_CONSTANT)
30277 CI =
MI.getOperand(1).getCImm();
30279 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
30284 auto APF =
MI.getOperand(1).getFPImm()->getValueAPF();
30292 AdditionalCost = 1;
30300 RematCost += AdditionalCost;
30302 unsigned MaxUses = maxUses(RematCost);
30304 if (MaxUses == std::numeric_limits<unsigned>::max())
30306 return MRI.hasAtMostUserInstrs(
Reg, MaxUses);
30310 case AArch64::ADRP:
30311 case AArch64::G_ADD_LOW:
30313 case TargetOpcode::G_PTR_ADD:
30335 if (AI->getAllocatedType()->isScalableTy())
30355 "Expected legal fixed length vector!");
30360 return EVT(MVT::nxv16i8);
30362 return EVT(MVT::nxv8i16);
30364 return EVT(MVT::nxv4i32);
30366 return EVT(MVT::nxv2i64);
30368 return EVT(MVT::nxv8bf16);
30370 return EVT(MVT::nxv8f16);
30372 return EVT(MVT::nxv4f32);
30374 return EVT(MVT::nxv2f64);
30383 "Expected legal fixed length vector!");
30385 std::optional<unsigned> PgPattern =
30387 assert(PgPattern &&
"Unexpected element count for SVE predicate");
30395 if (MaxSVESize && MinSVESize == MaxSVESize &&
30397 PgPattern = AArch64SVEPredPattern::all;
30404 MaskVT = MVT::nxv16i1;
30409 MaskVT = MVT::nxv8i1;
30413 MaskVT = MVT::nxv4i1;
30417 MaskVT = MVT::nxv2i1;
30421 return getPTrue(DAG,
DL, MaskVT, *PgPattern);
30427 "Expected legal scalable vector!");
30429 return getPTrue(DAG,
DL, PredTy, AArch64SVEPredPattern::all);
30442 "Expected to convert into a scalable vector!");
30443 assert(V.getValueType().isFixedLengthVector() &&
30444 "Expected a fixed length vector operand!");
30453 "Expected to convert into a fixed length vector!");
30454 assert(V.getValueType().isScalableVector() &&
30455 "Expected a scalable vector operand!");
30462SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
30467 EVT VT =
Op.getValueType();
30469 EVT LoadVT = ContainerVT;
30470 EVT MemVT =
Load->getMemoryVT();
30480 LoadVT,
DL,
Load->getChain(),
Load->getBasePtr(),
Load->getOffset(), Pg,
30482 Load->getAddressingMode(),
Load->getExtensionType());
30487 Load->getMemoryVT().getVectorElementType());
30489 Result = getSVESafeBitCast(ExtendVT, Result, DAG);
30490 Result = DAG.
getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU,
DL, ContainerVT,
30491 Pg, Result, DAG.
getUNDEF(ContainerVT));
30504 EVT InVT = Mask.getValueType();
30511 bool InvertCond =
false;
30514 Mask = Mask.getOperand(0);
30535 {Pg, Op1, Op2, DAG.getCondCode(CC)});
30539SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
30544 EVT VT =
Op.getValueType();
30552 "Incorrect mask type");
30558 bool IsPassThruZeroOrUndef =
false;
30560 if (
Load->getPassThru()->isUndef()) {
30561 PassThru = DAG.
getUNDEF(ContainerVT);
30562 IsPassThruZeroOrUndef =
true;
30569 IsPassThruZeroOrUndef =
true;
30573 ContainerVT,
DL,
Load->getChain(),
Load->getBasePtr(),
Load->getOffset(),
30574 Mask, PassThru,
Load->getMemoryVT(),
Load->getMemOperand(),
30575 Load->getAddressingMode(),
Load->getExtensionType());
30578 if (!IsPassThruZeroOrUndef) {
30590SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
30595 EVT VT =
Store->getValue().getValueType();
30597 EVT MemVT =
Store->getMemoryVT();
30604 Store->getMemoryVT().getVectorElementType());
30606 NewValue = DAG.
getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU,
DL, TruncVT, Pg,
30618 Store->getBasePtr(),
Store->getOffset(), Pg, MemVT,
30619 Store->getMemOperand(),
Store->getAddressingMode(),
30620 Store->isTruncatingStore());
30627 EVT VT =
Store->getValue().getValueType();
30629 return LowerFixedLengthVectorMStoreToSVE(
Op, DAG);
30631 if (!
Store->isCompressingStore())
30634 EVT MaskVT =
Store->getMask().getValueType();
30642 DAG.
getNode(ISD::VECREDUCE_ADD,
DL, MaskReduceVT, MaskExt);
30643 if (MaskReduceVT != MVT::i64)
30650 DAG.
getNode(ISD::GET_ACTIVE_LANE_MASK,
DL, MaskVT, Zero, CntActive);
30654 CompressedMask,
Store->getMemoryVT(),
30655 Store->getMemOperand(),
Store->getAddressingMode(),
30656 Store->isTruncatingStore(),
30660SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
30665 EVT VT =
Store->getValue().getValueType();
30673 Mask,
Store->getMemoryVT(),
Store->getMemOperand(),
30674 Store->getAddressingMode(),
Store->isTruncatingStore(),
30675 Store->isCompressingStore());
30678SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
30681 EVT VT =
Op.getValueType();
30685 unsigned PredOpcode =
Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
30698 DAG.
getNode(AArch64ISD::ASRD_MERGE_OP1,
DL, ContainerVT, Pg, Op1, Op2);
30707 if (EltVT == MVT::i32 || EltVT == MVT::i64)
30708 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
30724 auto HalveAndExtendVector = [&DAG, &
DL, &HalfVT, &PromVT,
30728 DAG.
getConstant(HalfVT.getVectorNumElements(),
DL, MVT::i64);
30731 return std::pair<SDValue, SDValue>(
30737 auto [Op0LoExt, Op0HiExt] = HalveAndExtendVector(
Op.getOperand(0));
30738 auto [Op1LoExt, Op1HiExt] = HalveAndExtendVector(
Op.getOperand(1));
30746SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
30748 EVT VT =
Op.getValueType();
30757 unsigned ExtendOpc =
Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
30764 Val = DAG.
getNode(ExtendOpc,
DL, MVT::nxv8i16, Val);
30769 Val = DAG.
getNode(ExtendOpc,
DL, MVT::nxv4i32, Val);
30774 Val = DAG.
getNode(ExtendOpc,
DL, MVT::nxv2i64, Val);
30782SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
30784 EVT VT =
Op.getValueType();
30797 Val = DAG.
getNode(ISD::BITCAST,
DL, MVT::nxv4i32, Val);
30798 Val = DAG.
getNode(AArch64ISD::UZP1,
DL, MVT::nxv4i32, Val, Val);
30803 Val = DAG.
getNode(ISD::BITCAST,
DL, MVT::nxv8i16, Val);
30804 Val = DAG.
getNode(AArch64ISD::UZP1,
DL, MVT::nxv8i16, Val, Val);
30809 Val = DAG.
getNode(ISD::BITCAST,
DL, MVT::nxv16i8, Val);
30810 Val = DAG.
getNode(AArch64ISD::UZP1,
DL, MVT::nxv16i8, Val, Val);
30818SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
30820 EVT VT =
Op.getValueType();
30821 EVT InVT =
Op.getOperand(0).getValueType();
30831SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
30833 EVT VT =
Op.getValueType();
30837 EVT InVT =
Op.getOperand(0).getValueType();
30842 Op.getOperand(1),
Op.getOperand(2));
30852 unsigned NewOp)
const {
30853 EVT VT =
Op.getValueType();
30863 for (
const SDValue &V :
Op->op_values()) {
30870 EVT VTArg = VTNode->getVT().getVectorElementType();
30877 "Expected only legal fixed-width types");
30884 auto ScalableRes = DAG.
getNode(NewOp,
DL, ContainerVT, Operands);
30891 for (
const SDValue &V :
Op->op_values()) {
30892 assert((!
V.getValueType().isVector() ||
30893 V.getValueType().isScalableVector()) &&
30894 "Only scalable vectors are supported!");
30901 return DAG.
getNode(NewOp,
DL, VT, Operands,
Op->getFlags());
30909 EVT VT =
Op.getValueType();
30911 "Only expected to lower fixed length vector operation!");
30916 for (
const SDValue &V :
Op->op_values()) {
30920 if (!
V.getValueType().isVector()) {
30926 assert(
V.getValueType().isFixedLengthVector() &&
30928 "Only fixed length vectors are supported!");
30932 auto ScalableRes = DAG.
getNode(
Op.getOpcode(), SDLoc(
Op), ContainerVT,
Ops);
30936SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(
SDValue ScalarOp,
30938 SDLoc
DL(ScalarOp);
30944 EVT ContainerVT = SrcVT;
30955 DAG.
getUNDEF(ContainerVT), AccOp, Zero);
30964SDValue AArch64TargetLowering::LowerPredReductionToSVE(
SDValue ReduceOp,
30966 SDLoc
DL(ReduceOp);
30968 EVT OpVT =
Op.getValueType();
30979 case ISD::VECREDUCE_OR:
30986 case ISD::VECREDUCE_AND: {
30990 case ISD::VECREDUCE_XOR: {
30993 if (OpVT == MVT::nxv1i1) {
30995 Pg = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv2i1, Pg);
30996 Op = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, MVT::nxv2i1,
Op);
31007SDValue AArch64TargetLowering::LowerReductionToSVE(
unsigned Opcode,
31010 SDLoc
DL(ScalarOp);
31016 Subtarget->useSVEForFixedLengthVectors())) {
31022 if (ScalarOp.
getOpcode() == ISD::VECREDUCE_ADD &&
31036 EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
31055AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(
SDValue Op,
31057 EVT VT =
Op.getValueType();
31060 EVT InVT =
Op.getOperand(1).getValueType();
31067 EVT MaskVT =
Op.getOperand(0).getValueType();
31079SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
31082 EVT InVT =
Op.getOperand(0).getValueType();
31086 "Only expected to lower fixed length vector operation!");
31088 "Expected integer result of the same bit length as the inputs!");
31095 auto Cmp = DAG.
getNode(AArch64ISD::SETCC_MERGE_ZERO,
DL, CmpVT,
31096 {Pg, Op1, Op2,
Op.getOperand(2)});
31104AArch64TargetLowering::LowerFixedLengthBitcastToSVE(
SDValue Op,
31107 auto SrcOp =
Op.getOperand(0);
31108 EVT VT =
Op.getValueType();
31110 EVT ContainerSrcVT =
31114 Op = DAG.
getNode(ISD::BITCAST,
DL, ContainerDstVT, SrcOp);
31118SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
31124 "Unexpected number of operands in CONCAT_VECTORS");
31126 auto SrcOp1 =
Op.getOperand(0);
31127 auto SrcOp2 =
Op.getOperand(1);
31128 EVT VT =
Op.getValueType();
31129 EVT SrcVT = SrcOp1.getValueType();
31135 DAG.
getNode(AArch64ISD::DUPLANE128,
DL, ContainerVT,
31141 if (NumOperands > 2) {
31144 for (
unsigned I = 0;
I < NumOperands;
I += 2)
31146 Op->getOperand(
I),
Op->getOperand(
I + 1)));
31157 Op = DAG.
getNode(AArch64ISD::SPLICE,
DL, ContainerVT, Pg, SrcOp1, SrcOp2);
31163AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(
SDValue Op,
31165 EVT VT =
Op.getValueType();
31180 Val = getSVESafeBitCast(ExtendVT, Val, DAG);
31181 Val = DAG.
getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU,
DL, ContainerVT,
31182 Pg, Val, DAG.
getUNDEF(ContainerVT));
31188AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(
SDValue Op,
31190 EVT VT =
Op.getValueType();
31202 Val = DAG.
getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU,
DL, RoundVT, Pg, Val,
31208 return DAG.
getNode(ISD::BITCAST,
DL, VT, Val);
31212AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(
SDValue Op,
31214 EVT VT =
Op.getValueType();
31218 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
31219 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
31237 Val = DAG.
getNode(Opcode,
DL, ContainerDstVT, Pg, Val,
31247 Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
31251 return DAG.
getNode(ISD::BITCAST,
DL, VT, Val);
31256AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(
SDValue Op,
31259 EVT OpVT =
Op.getValueType();
31261 "Expected scalable vector in LowerVECTOR_DEINTERLEAVE.");
31272 for (
unsigned I = 0;
I < 3; ++
I) {
31275 SDValue V = getSVESafeBitCast(PackedVT,
Op.getOperand(
I), DAG);
31287 Ops.push_back(StackPtr);
31290 SDVTList VTs = DAG.
getVTList(PackedVT, PackedVT, PackedVT, MVT::Other);
31301 if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
31308 IntID = Intrinsic::aarch64_sve_uzp_x2;
31311 if (Subtarget->getMinSVEVectorSizeInBits() < 256 &&
31314 IntID = Intrinsic::aarch64_sve_uzp_x4;
31320 Ops.append(
Op->op_values().begin(),
Op->op_values().end());
31337 EVT OpVT =
Op.getValueType();
31339 "Expected scalable vector in LowerVECTOR_INTERLEAVE.");
31346 InVecs.
push_back(getSVESafeBitCast(PackedVT, V, DAG));
31358 Ops.append(InVecs);
31360 Ops.push_back(StackPtr);
31367 for (
unsigned I = 0;
I < 3; ++
I) {
31371 Results.push_back(getSVESafeBitCast(OpVT, L, DAG));
31378 if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
31385 IntID = Intrinsic::aarch64_sve_zip_x2;
31388 if (Subtarget->getMinSVEVectorSizeInBits() < 256 &&
31391 IntID = Intrinsic::aarch64_sve_zip_x4;
31397 Ops.append(
Op->op_values().begin(),
Op->op_values().end());
31428 "Unexpected histogram update operation");
31430 EVT IndexVT =
Index.getValueType();
31437 bool ExtTrunc = IncSplatVT != MemVT;
31452 DAG.
getVTList(IncSplatVT, MVT::Other), MemVT,
DL,
Ops, GMMO, IndexType,
31472 ScatterOps, SMMO, IndexType, ExtTrunc);
31485AArch64TargetLowering::LowerPARTIAL_REDUCE_MLA(
SDValue Op,
31492 EVT ResultVT =
Op.getValueType();
31493 EVT OrigResultVT = ResultVT;
31494 EVT OpVT =
LHS.getValueType();
31496 bool ConvertToScalable =
31502 if (!ConvertToScalable && ResultVT == MVT::v2i32 && OpVT == MVT::v16i8) {
31507 SDValue Reduced = DAG.
getNode(AArch64ISD::ADDP,
DL, MVT::v4i32, Wide, Wide);
31511 if (ConvertToScalable) {
31517 Op = DAG.
getNode(
Op.getOpcode(),
DL, ResultVT, {Acc, LHS, RHS});
31531 bool IsUnsigned =
Op.getOpcode() == ISD::PARTIAL_REDUCE_UMLA;
31532 if (Subtarget->hasSVE2() || Subtarget->isStreamingSVEAvailable()) {
31533 unsigned LoOpcode = IsUnsigned ? AArch64ISD::UADDWB : AArch64ISD::SADDWB;
31534 unsigned HiOpcode = IsUnsigned ? AArch64ISD::UADDWT : AArch64ISD::SADDWT;
31536 Res = DAG.
getNode(HiOpcode,
DL, ResultVT,
Lo, DotNode);
31539 auto [DotNodeLo, DotNodeHi] = DAG.
SplitVector(DotNode,
DL);
31556AArch64TargetLowering::LowerGET_ACTIVE_LANE_MASK(
SDValue Op,
31558 EVT VT =
Op.getValueType();
31561 assert(Subtarget->isSVEorStreamingSVEAvailable() &&
31562 "Lowering fixed length get_active_lane_mask requires SVE!");
31572 Op.getOperand(0),
Op.getOperand(1));
31579AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(
SDValue Op,
31581 EVT VT =
Op.getValueType();
31585 unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
31586 : AArch64ISD::FCVTZU_MERGE_PASSTHRU;
31603 Val = getSVESafeBitCast(CvtVT, Val, DAG);
31604 Val = DAG.
getNode(Opcode,
DL, ContainerDstVT, Pg, Val,
31636 if (!IsSingleOp && !Subtarget.hasSVE2())
31639 EVT VTOp1 =
Op.getOperand(0).getValueType();
31641 unsigned IndexLen = MinSVESize / BitsPerElt;
31646 bool MinMaxEqual = (MinSVESize == MaxSVESize);
31647 assert(ElementsPerVectorReg <= IndexLen && ShuffleMask.size() <= IndexLen &&
31648 "Incorrectly legalised shuffle operation");
31657 if (!IsSingleOp && !MinMaxEqual && BitsPerElt == 8)
31660 for (
int Index : ShuffleMask) {
31668 if ((
unsigned)Index >= ElementsPerVectorReg) {
31670 Index += IndexLen - ElementsPerVectorReg;
31672 Index = Index - ElementsPerVectorReg;
31675 }
else if (!MinMaxEqual)
31680 if ((
unsigned)Index >= MaxOffset)
31689 for (
unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i) {
31706 else if (Subtarget.hasSVE2()) {
31707 if (!MinMaxEqual) {
31709 SDValue VScale = (BitsPerElt == 64)
31730 return DAG.
getNode(ISD::BITCAST,
DL,
Op.getValueType(), Shuffle);
31733SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
31735 EVT VT =
Op.getValueType();
31739 auto ShuffleMask = SVN->
getMask();
31749 auto MinLegalExtractEltScalarTy = [](EVT ScalarTy) -> EVT {
31750 if (ScalarTy == MVT::i8 || ScalarTy == MVT::i16)
31764 bool ReverseEXT =
false;
31766 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
31774 Op = DAG.
getNode(AArch64ISD::INSR,
DL, ContainerVT, Op2, Scalar);
31779 for (
unsigned BlockSize : {64U, 32U, 16U}) {
31783 RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU;
31784 else if (EltSize == 16)
31785 RevOp = AArch64ISD::REVH_MERGE_PASSTHRU;
31787 RevOp = AArch64ISD::REVW_MERGE_PASSTHRU;
31795 DAG.
getNode(ISD::BITCAST,
DL, ContainerVT, BlockedRev);
31800 if (Subtarget->hasSVE2p1() && EltSize == 64 &&
31803 SDValue Revd = DAG.
getNode(AArch64ISD::REVD_MERGE_PASSTHRU,
DL, ContainerVT,
31804 Pg, Op1, DAG.
getUNDEF(ContainerVT));
31808 unsigned WhichResult;
31809 unsigned OperandOrder;
31812 WhichResult == 0) {
31814 OperandOrder == 0 ? Op1 : Op2,
31815 OperandOrder == 0 ? Op2 : Op1);
31821 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
31823 DAG.
getNode(
Opc,
DL, ContainerVT, OperandOrder == 0 ? Op1 : Op2,
31824 OperandOrder == 0 ? Op2 : Op1);
31830 DAG, VT, DAG.
getNode(AArch64ISD::ZIP1,
DL, ContainerVT, Op1, Op1));
31833 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
31835 DAG, VT, DAG.
getNode(
Opc,
DL, ContainerVT, Op1, Op1));
31856 unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
31857 unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
31858 if (MinSVESize == MaxSVESize && MaxSVESize == VT.
getSizeInBits()) {
31867 WhichResult != 0) {
31869 OperandOrder == 0 ? Op1 : Op2,
31870 OperandOrder == 0 ? Op2 : Op1);
31875 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
31877 DAG, VT, DAG.
getNode(
Opc,
DL, ContainerVT, Op1, Op2));
31882 DAG, VT, DAG.
getNode(AArch64ISD::ZIP2,
DL, ContainerVT, Op1, Op1));
31885 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
31887 DAG, VT, DAG.
getNode(
Opc,
DL, ContainerVT, Op1, Op1));
31890 if ((Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()) &&
31891 Subtarget->isSVEorStreamingSVEAvailable()) {
31893 "Unsupported SVE vector size");
31897 if (std::optional<unsigned> Lane =
31898 isDUPQMask(ShuffleMask, Segments, SegmentElts)) {
31905 DAG.getConstant(*Lane, DL, MVT::i64,
31919 if (MinSVESize || !Subtarget->isNeonAvailable())
31929 EVT InVT =
Op.getValueType();
31933 "Only expect to cast between legal scalable vector types!");
31936 "For predicate bitcasts, use getSVEPredicateBitCast");
31952 VT == PackedVT || InVT == PackedInVT) &&
31953 "Unexpected bitcast!");
31956 if (InVT != PackedInVT)
31957 Op = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, PackedInVT,
Op);
31959 if (Subtarget->isLittleEndian() ||
31970 Op = DAG.
getNode(AArch64ISD::NVCAST,
DL, PackedVTAsInt,
Op);
31977 if (VT != PackedVT)
31978 Op = DAG.
getNode(AArch64ISD::REINTERPRET_CAST,
DL, VT,
Op);
31985 return ::isAllActivePredicate(DAG,
N);
31989 return ::getPromotedVTForPredicate(VT);
31992bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
31994 const APInt &OriginalDemandedElts,
KnownBits &Known, TargetLoweringOpt &TLO,
31995 unsigned Depth)
const {
31997 unsigned Opc =
Op.getOpcode();
31999 case AArch64ISD::VSHL: {
32003 if (ShiftR->
getOpcode() != AArch64ISD::VLSHR)
32014 if (ShiftRBits != ShiftLBits)
32017 unsigned ScalarSize =
Op.getScalarValueSizeInBits();
32018 assert(ScalarSize > ShiftLBits &&
"Invalid shift imm");
32021 APInt UnusedBits = ~OriginalDemandedBits;
32023 if ((ZeroBits & UnusedBits) != ZeroBits)
32030 case AArch64ISD::BICi: {
32034 TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts,
Depth + 1);
32036 APInt BitsToClear =
32037 (
Op->getConstantOperandAPInt(1) <<
Op->getConstantOperandAPInt(2))
32039 APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.
Zero;
32040 if (BitsToClear.
isSubsetOf(AlreadyZeroedBitsToClear))
32041 return TLO.CombineTo(
Op, Op0);
32050 unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
32051 if (!MaxSVEVectorSizeInBits)
32053 unsigned VscaleMax = MaxSVEVectorSizeInBits / 128;
32054 unsigned MaxValue = MaxCount->getKnownMinValue() * VscaleMax;
32069 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO,
Depth);
32072bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
32077 switch (
Op.getOpcode()) {
32078 case AArch64ISD::MOVI:
32079 case AArch64ISD::MOVIedit:
32080 case AArch64ISD::MOVImsl:
32081 case AArch64ISD::MOVIshift:
32082 case AArch64ISD::MVNImsl:
32083 case AArch64ISD::MVNIshift:
32084 case AArch64ISD::VASHR:
32085 case AArch64ISD::VLSHR:
32086 case AArch64ISD::VSHL:
32093bool AArch64TargetLowering::isTargetCanonicalConstantNode(
SDValue Op)
const {
32094 return Op.getOpcode() == AArch64ISD::DUP ||
32095 Op.getOpcode() == AArch64ISD::MOVI ||
32096 Op.getOpcode() == AArch64ISD::MOVIshift ||
32097 Op.getOpcode() == AArch64ISD::MOVImsl ||
32098 Op.getOpcode() == AArch64ISD::MOVIedit ||
32099 Op.getOpcode() == AArch64ISD::MVNIshift ||
32100 Op.getOpcode() == AArch64ISD::MVNImsl ||
32104 (
Op.getOpcode() == ISD::FNEG &&
32105 Op.getOperand(0).getOpcode() == AArch64ISD::MOVIedit &&
32106 Op.getOperand(0).getConstantOperandVal(0) == 0) ||
32108 Op.getOperand(0).getOpcode() == AArch64ISD::DUP) ||
32113 return Subtarget->hasSVE() || Subtarget->hasSVE2() ||
32114 Subtarget->hasComplxNum();
32125 if (!VTy->isScalableTy() && !Subtarget->hasComplxNum())
32129 unsigned NumElements = VTy->getElementCount().getKnownMinValue();
32135 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
32136 if ((VTyWidth < 128 && (VTy->isScalableTy() || VTyWidth != 64)) ||
32140 if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) {
32144 return ScalarWidth == 32 || ScalarWidth == 64;
32145 return 8 <= ScalarWidth && ScalarWidth <= 64;
32152 return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) ||
32153 ScalarTy->isFloatTy() || ScalarTy->isDoubleTy();
32163 bool IsScalable = Ty->isScalableTy();
32164 bool IsInt = Ty->getElementType()->isIntegerTy();
32167 Ty->getScalarSizeInBits() * Ty->getElementCount().getKnownMinValue();
32170 "Vector type must be either 64 or a power of 2 that is at least 128");
32172 if (TyWidth > 128) {
32173 int Stride = Ty->getElementCount().getKnownMinValue() / 2;
32175 ->getElementCount()
32176 .getKnownMinValue() /
32179 auto *LowerSplitA =
B.CreateExtractVector(HalfTy, InputA,
uint64_t(0));
32180 auto *LowerSplitB =
B.CreateExtractVector(HalfTy, InputB,
uint64_t(0));
32181 auto *UpperSplitA =
B.CreateExtractVector(HalfTy, InputA, Stride);
32182 auto *UpperSplitB =
B.CreateExtractVector(HalfTy, InputB, Stride);
32183 Value *LowerSplitAcc =
nullptr;
32184 Value *UpperSplitAcc =
nullptr;
32190 UpperSplitAcc =
B.CreateExtractVector(HalfAccTy,
Accumulator, AccStride);
32192 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
32194 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
32198 return B.CreateInsertVector(FullTy, Result, UpperSplitInt, AccStride);
32204 return B.CreateIntrinsic(
32205 Intrinsic::aarch64_sve_cmla_x, Ty,
32206 {
Accumulator, InputA, InputB,
B.getInt32((
int)Rotation * 90)});
32208 auto *Mask =
B.getAllOnesMask(Ty->getElementCount());
32209 return B.CreateIntrinsic(
32210 Intrinsic::aarch64_sve_fcmla, Ty,
32211 {Mask,
Accumulator, InputA, InputB,
B.getInt32((
int)Rotation * 90)});
32214 Intrinsic::ID IdMap[4] = {Intrinsic::aarch64_neon_vcmla_rot0,
32215 Intrinsic::aarch64_neon_vcmla_rot90,
32216 Intrinsic::aarch64_neon_vcmla_rot180,
32217 Intrinsic::aarch64_neon_vcmla_rot270};
32220 return B.CreateIntrinsic(IdMap[(
int)Rotation], Ty,
32229 return B.CreateIntrinsic(
32230 Intrinsic::aarch64_sve_cadd_x, Ty,
32231 {InputA, InputB,
B.getInt32((
int)Rotation * 90)});
32233 auto *Mask =
B.getAllOnesMask(Ty->getElementCount());
32234 return B.CreateIntrinsic(
32235 Intrinsic::aarch64_sve_fcadd, Ty,
32236 {Mask, InputA, InputB,
B.getInt32((
int)Rotation * 90)});
32243 IntId = Intrinsic::aarch64_neon_vcadd_rot90;
32245 IntId = Intrinsic::aarch64_neon_vcadd_rot270;
32250 return B.CreateIntrinsic(IntId, Ty, {InputA, InputB});
32255 return B.CreateIntrinsic(
32256 Intrinsic::aarch64_sve_cdot,
Accumulator->getType(),
32257 {Accumulator, InputA, InputB, B.getInt32((int)Rotation * 90)});
32263bool AArch64TargetLowering::preferScalarizeSplat(
SDNode *
N)
const {
32264 unsigned Opc =
N->getOpcode();
32267 [&](
SDNode *
Use) { return Use->getOpcode() == ISD::MUL; }))
32274 return Subtarget->getMinimumJumpTableEntries();
32280 bool NonUnitFixedLengthVector =
32282 if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
32287 unsigned NumIntermediates;
32295 bool NonUnitFixedLengthVector =
32297 if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors())
32302 unsigned NumIntermediates;
32304 NumIntermediates, VT2);
32309 unsigned &NumIntermediates,
MVT &RegisterVT)
const {
32311 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
32316 assert(Subtarget->useSVEForFixedLengthVectors() &&
"Unexpected mode!");
32317 assert(IntermediateVT == RegisterVT &&
"Unexpected VT mismatch!");
32328 IntermediateVT = NewVT;
32331 return NumIntermediates;
32338 NumIntermediates *= NumSubRegs;
32339 NumRegs *= NumSubRegs;
32345 IntermediateVT = RegisterVT = MVT::v16i8;
32348 IntermediateVT = RegisterVT = MVT::v8i16;
32351 IntermediateVT = RegisterVT = MVT::v4i32;
32354 IntermediateVT = RegisterVT = MVT::v2i64;
32357 IntermediateVT = RegisterVT = MVT::v8f16;
32360 IntermediateVT = RegisterVT = MVT::v4f32;
32363 IntermediateVT = RegisterVT = MVT::v2f64;
32366 IntermediateVT = RegisterVT = MVT::v8bf16;
32375 return !Subtarget->isTargetWindows() &&
32384 if (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32)
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static SDValue trySVESplat64(SDValue Op, SelectionDAG &DAG, const AArch64Subtarget *ST, APInt &DefBits)
static SDValue tryLowerSmallVectorExtLoad(LoadSDNode *Load, SelectionDAG &DAG)
Helper function to optimize loads of extended small vectors.
static void CustomNonLegalBITCASTResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, EVT ExtendVT, EVT CastVT)
static bool isConcatMask(ArrayRef< int > Mask, EVT VT, bool SplitLHS)
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC usable with the vector...
static SDValue performZExtDeinterleaveShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isSingletonEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG)
static SDValue performCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex)
static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG)
static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of zeros to a vector store by scalar stores of WZR/XZR.
static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG)
static SDValue performLastTrueTestVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue GenerateTBL(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performDUPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG)
static std::optional< PredicateConstraint > parsePredicateConstraint(StringRef Constraint)
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static void analyzeCallOperands(const AArch64TargetLowering &TLI, const AArch64Subtarget *Subtarget, const TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo)
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo)
Check whether or not Op is a SET_CC operation, either a generic or an AArch64 lowered one.
static bool isLegalArithImmed(uint64_t C)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performVectorDeinterleaveCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static ScalableVectorType * getSVEContainerIRType(FixedVectorType *VTy)
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG)
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend)
static SDValue performMulVectorCmpZeroCombine(SDNode *N, SelectionDAG &DAG)
static SDValue convertFixedMaskToScalableVector(SDValue Mask, SelectionDAG &DAG)
static bool isZeroingInactiveLanes(SDValue Op)
static SDValue performPTestFirstCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue trySwapVSelectOperands(SDNode *N, SelectionDAG &DAG)
static SDValue tryCombineMULLWithUZP1(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG)
static bool isZerosVector(const SDNode *N)
isZerosVector - Check whether SDNode N is a zero-filled vector.
static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue performNVCASTCombine(SDNode *N, SelectionDAG &DAG)
Get rid of unnecessary NVCASTs (that don't change the type).
static const TargetRegisterClass * getReducedGprRegisterClass(ReducedGprConstraint Constraint, EVT VT)
static const MachineInstr * stripVRegCopies(const MachineRegisterInfo &MRI, Register Reg)
static SDValue carryFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG, bool Invert)
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset, SDLoc DL, unsigned BitWidth)
static bool isPredicateCCSettingOp(SDValue N)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performSVEAndCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
bool isVectorizedBinOp(unsigned Opcode)
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG)
static SDValue overflowFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG)
static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2, ArrayRef< int > ShuffleMask, EVT VT, EVT ContainerVT, SelectionDAG &DAG)
static SDValue performBRCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static MVT getSVEContainerType(EVT ContentTy)
static bool isMergePassthruOpcode(unsigned Opc)
static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG, SDLoc DL, bool &IsMLA)
static SDValue performFADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performNEONPostLDSTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Target-specific DAG combine function for NEON load/store intrinsics to merge base address updates.
static SDValue emitVectorComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode CC, bool NoNans, EVT VT, const SDLoc &DL, SelectionDAG &DAG)
Emit vector comparison for floating-point values, producing a mask.
static SDValue performVectorExtCombine(SDNode *N, SelectionDAG &DAG)
static void ReplaceCMP_SWAP_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp, SelectionDAG &DAG)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static SDValue performSelectCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with the compare-mask instruct...
static bool isCheapToExtend(const SDValue &N)
static cl::opt< bool > EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, cl::desc("Enable AArch64 logical imm instruction " "optimization"), cl::init(true))
static SDValue performExtractLastActiveCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG)
static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes, unsigned ScalarSizeInBytes)
Check if the value of OffsetInBytes can be used as an immediate for the gather load/prefetch and scat...
static bool isUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of "vector_shuffle v,...
static bool shouldLowerTailCallStackArg(const MachineFunction &MF, const CCValAssign &VA, SDValue Arg, ISD::ArgFlagsTy Flags, int CallOffset)
Check whether a stack argument requires lowering in a tail call.
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static std::optional< ElementCount > getMaxValueForSVECntIntrinsic(SDValue Op)
static unsigned getDUPLANEOp(EVT EltType)
static void changeFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget, const TargetMachine &TM)
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, EVT VT, EVT MemVT, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool canLowerSRLToRoundingShiftForVT(SDValue Shift, EVT ResVT, SelectionDAG &DAG, unsigned &ShiftValue, SDValue &RShOperand)
static bool isExtendOrShiftOperand(SDValue N)
static bool isLanes1toNKnownZero(SDValue Op)
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG)
static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N, SelectionDAG &DAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static EVT getPackedSVEVectorVT(EVT VT)
static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT, AArch64PACKey::ID KeyC, SDValue Discriminator, SDValue AddrDiscriminator, SelectionDAG &DAG)
static SDValue performVecReduceBitwiseCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performFlagSettingCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, unsigned GenericOpcode)
static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performCSELCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static void ReplaceReductionResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, unsigned InterOp, unsigned AcrossOp)
static bool isEquivalentMaskless(unsigned CC, unsigned width, ISD::LoadExtType ExtType, int AddConstant, int CompConstant)
static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG)
static SDValue constructDup(SDValue V, int Lane, SDLoc DL, EVT VT, unsigned Opcode, SelectionDAG &DAG)
static bool isCMP(SDValue Op)
static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool rmwOpMayLowerToLibcall(const AArch64Subtarget &Subtarget, const AtomicRMWInst *RMW)
static Function * getStructuredLoadFunction(Module *M, unsigned Factor, bool Scalable, Type *LDVTy, Type *PtrTy)
unsigned numberOfInstrToLoadImm(APInt C)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG)
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc, SelectionDAG &DAG, bool UnpredOp=false, bool SwapOperands=false)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue convertFromSVEContainerType(SDLoc DL, SDValue Vec, EVT VecVT, SelectionDAG &DAG)
SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated)
static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian)
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG)
static AArch64CC::CondCode parseConstraintCode(llvm::StringRef Constraint)
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static bool callConvSupportsVarArgs(CallingConv::ID CC)
Return true if the call convention supports varargs Currently only those that pass varargs like the C...
static const MCPhysReg GPRArgRegs[]
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits)
static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG)
static SDValue performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static bool isPassedInFPR(EVT VT)
static unsigned getIntrinsicID(const SDNode *N)
static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert)
static SDValue performAddUADDVCombine(SDNode *N, SelectionDAG &DAG)
static bool IsSVECntIntrinsic(SDValue S)
static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG)
static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N, SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static SDValue emitRestoreZALazySave(SDValue Chain, SDLoc DL, const AArch64TargetLowering &TLI, const AArch64RegisterInfo &TRI, AArch64FunctionInfo &FuncInfo, SelectionDAG &DAG)
static bool isWideDUPMask(ArrayRef< int > M, EVT VT, unsigned BlockSize, unsigned &DupLaneOp)
Check if a vector shuffle corresponds to a DUP instructions with a larger element width than the vect...
constexpr MVT FlagsVT
Value type used for NZCV flags.
static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static cl::opt< bool > EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden, cl::desc("Combine ext and trunc to TBL"), cl::init(true))
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue SplatVal, unsigned NumVecElts)
static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG)
static std::optional< std::pair< unsigned, const TargetRegisterClass * > > parseSVERegAsConstraint(StringRef Constraint)
static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *ST)
static SDValue tryLowerToBSL(SDValue N, SelectionDAG &DAG)
static SDValue performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue removeRedundantInsertVectorElt(SDNode *N)
static std::optional< AArch64CC::CondCode > getCSETCondCode(SDValue Op)
static bool isLane0KnownActive(SDValue Op)
static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG)
Legalize the gather prefetch (scalar + vector addressing mode) when the offset vector is an unpacked ...
static bool isNegatedInteger(SDValue Op)
static SDValue performFirstTrueTestVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
constexpr MVT CondCodeVT
Value type used for condition codes.
static bool isLoadOrMultipleLoads(SDValue B, SmallVector< LoadSDNode * > &Loads)
static SDValue performSubAddMULCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc)
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16)
static SDValue performSMINCombine(SDNode *N, SelectionDAG &DAG)
SDValue LowerVectorMatch(SDValue Op, SelectionDAG &DAG)
static Function * getStructuredStoreFunction(Module *M, unsigned Factor, bool Scalable, Type *STVTy, Type *PtrTy)
static SDValue performZExtUZPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorShiftCombine(SDNode *N, const AArch64TargetLowering &TLI, TargetLowering::DAGCombinerInfo &DCI)
Optimize a vector shift instruction and its operand if shifted out bits are not used.
static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG)
static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG, unsigned ScalarSizeInBytes)
Combines a node carrying the intrinsic aarch64_sve_prf<T>_gather_scalar_offset into a node that uses ...
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of a scalar to a vector store by scalar stores of the scalar value.
unsigned getSignExtendedGatherOpcode(unsigned Opcode)
static bool isOrXorChain(SDValue N, unsigned &Num, SmallVector< std::pair< SDValue, SDValue >, 16 > &WorkList)
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
static SDValue foldOverflowCheck(SDNode *Op, SelectionDAG &DAG, bool IsAdd)
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG)
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, const APInt &Demanded, TargetLowering::TargetLoweringOpt &TLO, unsigned NewOpc)
bool isLegalCmpImmed(APInt C)
static bool isSafeSignedCMN(SDValue Op, SelectionDAG &DAG)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
static SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue performCTPOPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG)
static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performSVEMulAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue foldCSELofLASTB(SDNode *Op, SelectionDAG &DAG)
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, SelectionDAG &DAG)
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, const SDLoc &DL)
static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG)
static SDValue performBuildShuffleExtendCombine(SDValue BV, SelectionDAG &DAG)
Combines a buildvector(sext/zext) or shuffle(sext/zext, undef) node pattern into sext/zext(buildvecto...
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static Value * createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op, FixedVectorType *ZExtTy, FixedVectorType *DstTy, bool IsLittleEndian)
static SDValue performAddSubIntoVectorOp(SDNode *N, SelectionDAG &DAG)
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG)
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC, SDValue RHS={})
changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 CC
static const MCPhysReg FPRArgRegs[]
static SDValue getSETCC(AArch64CC::CondCode CC, SDValue NZCV, const SDLoc &DL, SelectionDAG &DAG)
Helper function to create 'CSET', which is equivalent to 'CSINC <Wd>, WZR, WZR, invert(<cond>)'.
static SDValue performAddTruncShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryCombineNeonFcvtFP16ToI16(SDNode *N, unsigned Opcode, SelectionDAG &DAG)
static void replaceBoolVectorBitcast(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performActiveLaneMaskCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *ST)
static SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, int Pattern)
static bool isEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseEXT, unsigned &Imm)
static std::optional< ReducedGprConstraint > parseReducedGprConstraint(StringRef Constraint)
static SDValue tryCombineFixedPointConvert(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SMECallAttrs getSMECallAttrs(const Function &Caller, const RTLIB::RuntimeLibcallsInfo &RTLCI, const TargetLowering::CallLoweringInfo &CLI)
static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG)
Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup)) making use of the vector SExt/ZE...
static SDValue performAddSubLongCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG)
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point multiply by power of two into floating-point to fixed-point conversion.
static EVT calculatePreExtendType(SDValue Extend)
Calculates what the pre-extend type is, based on the extension operation node provided by Extend.
static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG)
static bool isSignExtInReg(const SDValue &V)
static EVT getPromotedVTForPredicate(EVT VT)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Turn vector tests of the signbit in the form of: xor (sra X, elt_size(X)-1), -1 into: cmge X,...
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG)
static bool isAllConstantBuildVector(const SDValue &PotentialBVec, uint64_t &ConstVal)
static SDValue performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue tryToReplaceScalarFPConversionWithSVE(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Tries to replace scalar FP <-> INT conversions with SVE in streaming functions, this can help to redu...
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG)
static Value * UseTlsOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG, const AArch64Subtarget *ST)
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, AArch64CC::CondCode Cond)
static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info)
cl::opt< bool > EnableAArch64ELFLocalDynamicTLSGeneration("aarch64-elf-ldtls-generation", cl::Hidden, cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false))
static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG)
static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue getCondCode(SelectionDAG &DAG, AArch64CC::CondCode CC)
Like SelectionDAG::getCondCode(), but for AArch64 condition codes.
static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG)
static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG)
static SDValue optimizeIncrementingWhile(SDNode *N, SelectionDAG &DAG, bool IsSigned, bool IsEqual)
static SDValue performSunpkloCombine(SDNode *N, SelectionDAG &DAG)
static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static unsigned getAtomicLoad128Opcode(unsigned ISDOpcode, AtomicOrdering Ordering)
static void ReplaceAddWithADDP(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
cl::opt< bool > EnableSVEGISel("aarch64-enable-gisel-sve", cl::Hidden, cl::desc("Enable / disable SVE scalable vectors in Global ISel"), cl::init(false))
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode, SelectionDAG &DAG)
static SDValue performPostLD1Combine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, bool IsLaneOp)
Target-specific DAG combine function for post-increment LD1 (lane) and post-increment LD1R.
std::pair< SDValue, uint64_t > lookThroughSignExtension(SDValue Val)
static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG)
bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL)
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG)
static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool &PreferFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale, SDLoc DL, SelectionDAG &DAG)
static SDValue emitFloatCompareMask(SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal, ISD::CondCode CC, bool NoNaNs, const SDLoc &DL, SelectionDAG &DAG)
For SELECT_CC, when the true/false values are (-1, 0) and the compared values are scalars,...
static SDValue getZT0FrameIndex(MachineFrameInfo &MFI, AArch64FunctionInfo &FuncInfo, SelectionDAG &DAG)
static SDValue performRNDRCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG)
static SDValue performOrXorChainCombine(SDNode *N, SelectionDAG &DAG)
static SDValue convertToSVEContainerType(SDLoc DL, SDValue Vec, EVT ContainerVT, SelectionDAG &DAG)
static SDValue performAddCombineForShiftedOperands(SDNode *N, SelectionDAG &DAG)
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static bool shouldBeAdjustedToZero(SDValue LHS, APInt C, ISD::CondCode &CC)
static SDValue combineSVEBitSel(unsigned IID, SDNode *N, SelectionDAG &DAG)
static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode, bool IsSigned)
static bool isPackedVectorType(EVT VT, SelectionDAG &DAG)
Returns true if VT's elements occupy the lowest bit positions of its associated register class withou...
static bool isTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of "vector_shuffle v,...
static AArch64SME::ToggleCondition getSMToggleCondition(const SMECallAttrs &CallAttrs)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performMaskedGatherScatterCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, SelectionDAG &DAG)
static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performBuildVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG)
static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static SDValue isNVCastToHalfWidthElements(SDValue V)
static bool isHalvingTruncateAndConcatOfLegalIntScalableType(SDNode *N)
static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode, SDValue Operand, SelectionDAG &DAG, int &ExtraSteps)
static SDValue performUADDVZextCombine(SDValue A, SelectionDAG &DAG)
static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG)
Perform the scalar expression combine in the form of: CSEL(c, 1, cc) + b => CSINC(b+c,...
static SDValue performCTLZCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool isEligibleForSmallVectorLoadOpt(LoadSDNode *LD, const AArch64Subtarget &Subtarget)
Helper function to check if a small vector load can be optimized.
static std::optional< uint64_t > getConstantLaneNumOfExtractHalfOperand(SDValue &Op)
static void ReplaceATOMIC_LOAD_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL, SelectionDAG &DAG, SDValue Chain, bool IsSignaling)
static bool areLoadedOffsetButOtherwiseSame(SDValue Op0, SDValue Op1, SelectionDAG &DAG, unsigned &NumSubLoads)
static SDValue performMulRdsvlCombine(SDNode *Mul, SelectionDAG &DAG)
static bool isEssentiallyExtractHighSubvector(SDValue N)
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
static Value * createTblShuffleForSExt(IRBuilderBase &Builder, Value *Op, FixedVectorType *DstTy, bool IsLittleEndian)
static unsigned getExtFactor(SDValue &V)
getExtFactor - Determine the adjustment factor for the position when generating an "extract from vect...
static bool setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL, AArch64TargetLowering::IntrinsicInfo &Info, const CallBase &CI)
Set the IntrinsicInfo for the aarch64_sve_st<N> intrinsics.
static cl::opt< unsigned > MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden, cl::desc("Maximum of xors"))
static SDValue performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue performMULLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue trySimplifySrlAddToRshrnb(SDValue Srl, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performReinterpretCastCombine(SDNode *N)
static SDValue emitSMEStateSaveRestore(const AArch64TargetLowering &TLI, SelectionDAG &DAG, AArch64FunctionInfo *Info, SDLoc DL, SDValue Chain, bool IsSave)
SDValue ReconstructShuffleWithRuntimeMask(SDValue Op, SelectionDAG &DAG)
static SDValue performTBZCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static void simplifySetCCIntoEq(ISD::CondCode &CC, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG, const SDLoc DL)
static SDValue tryCombineExtendRShTrunc(SDNode *N, SelectionDAG &DAG)
static bool isAllInactivePredicate(SDValue N)
static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT, SDLoc DL, SelectionDAG &DAG)
static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static cl::opt< bool > EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, cl::desc("Combine extends of AArch64 masked " "gather intrinsics"), cl::init(true))
static bool isZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of "vector_shuffle v,...
static SDValue performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static cl::opt< bool > UseFEATCPACodegen("aarch64-use-featcpa-codegen", cl::Hidden, cl::desc("Generate ISD::PTRADD nodes for pointer arithmetic in " "SelectionDAG for FEAT_CPA"), cl::init(false))
static bool createTblShuffleMask(unsigned SrcWidth, unsigned DstWidth, unsigned NumElts, bool IsLittleEndian, SmallVectorImpl< int > &Mask)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z, SelectionDAG &DAG)
static SDValue performANDSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static const TargetRegisterClass * getPredicateRegisterClass(PredicateConstraint Constraint, EVT VT)
static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSubsToAndsCombine(SDNode *N, SDNode *SubsNode, SDNode *AndNode, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex, unsigned CC)
static std::pair< SDValue, SDValue > getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG)
#define FALKOR_STRIDED_ACCESS_MD
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static bool isSigned(unsigned int Opcode)
const HexagonInstrInfo * TII
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file provides utility analysis objects describing memory locations.
This file defines ARC utility functions which are used by various parts of the compiler.
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
PowerPC Reduce CR logical Operation
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Contains matchers for matching SelectionDAG nodes and values.
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static LLVM_ATTRIBUTE_ALWAYS_INLINE MVT::SimpleValueType getSimpleVT(const unsigned char *MatcherTable, unsigned &MatcherIndex)
getSimpleVT - Decode a value in MatcherTable, if it's a VBR encoded value, use GetVBR to decode it.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static const int BlockSize
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool branchTargetEnforcement() const
unsigned getVarArgsFPRSize() const
void setVarArgsStackOffset(unsigned Offset)
void setVarArgsStackIndex(int Index)
void setEarlyAllocSMESaveBuffer(Register Ptr)
int getZT0SpillSlotIndex() const
TPIDR2Object & getTPIDR2Obj()
void setTailCallReservedStack(unsigned bytes)
bool hasELFSignedGOT() const
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setIsSplitCSR(bool s)
int getVarArgsFPRIndex() const
void incNumLocalDynamicTLSAccesses()
void setBytesInStackArgArea(unsigned bytes)
int getVarArgsStackIndex() const
void setVarArgsGPRIndex(int Index)
int getVarArgsGPRIndex() const
void setPStateSMReg(Register Reg)
void setVarArgsFPRSize(unsigned Size)
unsigned getVarArgsStackOffset() const
SMEAttrs getSMEFnAttrs() const
unsigned getVarArgsGPRSize() const
void setZT0SpillSlotIndex(int FI)
unsigned getSRetReturnReg() const
Register getPStateSMReg() const
bool hasZT0SpillSlotIndex() const
void setSMESaveBufferUsed(bool Used=true)
void setSRetReturnReg(unsigned Reg)
void setSMESaveBufferAddr(Register Reg)
unsigned getBytesInStackArgArea() const
unsigned isSMESaveBufferUsed() const
void setVarArgsFPRIndex(int Index)
void setVarArgsGPRSize(unsigned Size)
void setArgumentStackToRestore(unsigned bytes)
void setHasStreamingModeChanges(bool HasChanges)
bool isTargetWindows() const
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
unsigned getMaximumJumpTableSize() const
Align getPrefLoopAlignment() const
Align getPrefFunctionAlignment() const
bool isTargetMachO() const
unsigned getMaxBytesForLoopAlignment() const
bool supportsAddressTopByteIgnored() const
CPU has TBI (top byte of addresses is ignored during HW address translation) and OS enables it.
bool isStreamingCompatible() const
Returns true if the function has a streaming-compatible body.
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
bool useSVEForFixedLengthVectors() const
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isLittleEndian() const
bool isStreaming() const
Returns true if the function has a streaming body.
unsigned getMaxSVEVectorSizeInBits() const
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
unsigned getMinSVEVectorSizeInBits() const
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, SDValue Chain, SDValue InGlue, unsigned Condition, bool InsertVectorLengthCheck=false) const
If a change in streaming mode is required on entry to/return from a function call it emits and return...
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset) const override
Return true if it is profitable to reduce a load to a smaller type.
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
EVT getPromotedVTForPredicate(EVT VT) const
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
unsigned getVaListSizeInBits(const DataLayout &DL) const override
Returns the size of the platform's va_list object.
MachineBasicBlock * EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const override
Return the prefered common base offset.
bool shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert a trailing fence without reducing the ordering f...
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
MachineBasicBlock * EmitInitTPIDR2Object(MachineInstr &MI, MachineBasicBlock *BB) const
bool lowerInterleavedStore(Instruction *Store, Value *Mask, ShuffleVectorInst *SVI, unsigned Factor, const APInt &GapMask) const override
Lower an interleaved store into a stN intrinsic.
MachineBasicBlock * EmitTileLoad(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool preferSelectsOverBooleanArithmetic(EVT VT) const override
Should we prefer selects to doing arithmetic on boolean types.
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
Provide custom lowering hooks for some operations.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, EVT VT) const override
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool shouldRemoveRedundantExtend(SDValue Op) const override
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC) const
Selects the correct CCAssignFn for a given CallingConvention value.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ISD::SETCC ValueType.
bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
This method returns a target specific FastISel object, or null if the target does not support "fast" ...
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Selects the correct CCAssignFn for a given CallingConvention value.
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, IntrinsicInst *DI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool isLegalICmpImmediate(int64_t) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool isOpSuitableForLSE128(const Instruction *I) const
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
void fixupPtrauthDiscriminator(MachineInstr &MI, MachineBasicBlock *BB, MachineOperand &IntDiscOp, MachineOperand &AddrDiscOp, const TargetRegisterClass *AddrDiscRC) const
Replace (0, vreg) discriminator components with the operands of blend or with (immediate,...
bool lowerInterleavedLoad(Instruction *Load, Value *Mask, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor, const APInt &GapMask) const override
Lower an interleaved load into a ldN intrinsic.
bool fallBackToDAGISel(const Instruction &Inst) const override
bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override
Return true if the target has native support for the specified value type and it is 'desirable' to us...
bool isLegalAddScalableImmediate(int64_t) const override
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
MachineBasicBlock * EmitCheckMatchingVL(MachineInstr &MI, MachineBasicBlock *MBB) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const
Returns true if VecTy is a legal interleaved access type.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
MachineBasicBlock * EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
bool isZExtFree(Type *Ty1, Type *Ty2) const override
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const override
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const
MachineBasicBlock * EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const override
If the target has a standard location for the unsafe stack pointer, returns the address of that locat...
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isProfitableToHoist(Instruction *I) const override
Check if it is profitable to hoist instruction in then/else to if.
bool isOpSuitableForRCPC3(const Instruction *I) const
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
MachineBasicBlock * EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, bool Op0IsDef) const
MachineBasicBlock * EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const
bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const override
Return true if the @llvm.experimental.vector.match intrinsic should be expanded for vector type ‘VT’ ...
MachineBasicBlock * EmitEntryPStateSM(MachineInstr &MI, MachineBasicBlock *BB) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const override
Control the following reassociation of operands: (op (op x, c1), y) -> (op (op x, y),...
bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override
In AArch64, true if FEAT_CPA is present.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
MachineBasicBlock * EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *BB) const
LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &FuncAttributes) const override
LLT returning variant.
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
MachineBasicBlock * EmitAllocateSMESaveBuffer(MachineInstr &MI, MachineBasicBlock *BB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool needsFixedCatchObjects() const override
Used for exception handling on Win64.
MachineBasicBlock * EmitAllocateZABuffer(MachineInstr &MI, MachineBasicBlock *BB) const
const AArch64TargetMachine & getTM() const
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const override
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
bool isOpSuitableForLDPSTP(const Instruction *I) const
AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI)
MachineBasicBlock * EmitGetSMESaveSize(MachineInstr &MI, MachineBasicBlock *BB) const
bool hasPairedLoad(EVT LoadedType, Align &RequiredAlignment) const override
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
bool isLegalAddImmediate(int64_t) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldConsiderGEPOffsetSplit() const override
bool isVectorClearMaskLegal(ArrayRef< int > M, EVT VT) const override
Similar to isShuffleMaskLegal.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool useLoadStackGuardNode(const Module &M) const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
bool lowerInterleaveIntrinsicToStore(Instruction *Store, Value *Mask, ArrayRef< Value * > InterleaveValues) const override
Lower an interleave intrinsic to a target specific store intrinsic.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool enableAggressiveFMAFusion(EVT VT) const override
Enable aggressive FMA fusion on targets that want it.
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override
Return the type to use for a scalar shift opcode, given the shifted amount type.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
MachineBasicBlock * EmitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override
Return true if the @llvm.get.active.lane.mask intrinsic should be expanded using generic code in Sele...
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON=false) const
bool mergeStoresAfterLegalization(EVT VT) const override
SVE code generation for fixed length vectors does not custom lower BUILD_VECTOR.
bool useNewSMEABILowering() const
Returns true if the new SME ABI lowering should be used.
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
bool isMinSignedValue() const
Determine if this is the smallest signed value.
uint64_t getZExtValue() const
Get zero extended value.
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
unsigned popcount() const
Count the number of bits set.
LLVM_ABI APInt getHiBits(unsigned numBits) const
Compute an APInt containing numBits highbits from this APInt.
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
bool sle(const APInt &RHS) const
Signed less or equal comparison.
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
unsigned countr_zero() const
Count the number of trailing zero bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isMask(unsigned numBits) const
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
bool isOne() const
Determine if this is a value of 1.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getCompareOperand()
an instruction that atomically reads a memory location, combines it with another value,...
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
bool isFloatingPointOperation() const
BinOp getOperation() const
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const BlockAddress * getBlockAddress() const
Function * getFunction() const
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantFPSDNode * getConstantFPSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant FP or null if this is not a constant FP splat.
LLVM_ABI std::optional< std::pair< APInt, APInt > > isConstantSequence() const
If this BuildVector is constant and represents the numerical series "<a, a+n, a+2n,...
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
LLVM_ABI int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static LLVM_ABI bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
bool isLittleEndian() const
Layout endianness...
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Class to represent fixed width SIMD vectors.
static FixedVectorType * getInteger(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
FunctionType * getFunctionType() const
Returns the FunctionType for me.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Constant * getPersonalityFn() const
Get the personality function associated with this function.
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
const Argument * const_arg_iterator
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
int64_t getOffset() const
const GlobalValue * getGlobal() const
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Type * getValueType() const
Common base class shared among various IRBuilders.
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
BasicBlock * GetInsertBlock() const
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool isScalableVT() const
Return true if the type is a scalable type.
static auto all_valuetypes()
SimpleValueType Iteration.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto scalable_vector_valuetypes()
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MachineInstr * remove_instr(MachineInstr *I)
Remove the possibly bundled instruction from the instruction list without deleting it.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const
LLVM_ABI void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
void setAdjustsStack(bool V)
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
@ SSPLK_None
Did not trigger a stack protector.
void setFrameAddressIsTaken(bool T)
bool hasScalableStackID(int ObjectIdx) const
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
int getStackProtectorIndex() const
Return the index for the stack protector object.
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
void setStackID(int ObjectIdx, uint8_t ID)
void setHasTailCall(bool V=true)
bool hasMustTailInVarArgFunc() const
Returns true if the function is variadic and contains a musttail call.
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
LLVM_ABI int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca)
Notify the MachineFrameInfo object that a variable sized object has been created.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackProtectorIndex() const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
This is a base class used to represent MGATHER and MSCATTER nodes.
const SDValue & getIndex() const
bool isIndexScaled() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
bool isIndexSigned() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
AtomicOrdering getMergedOrdering() const
Return a single atomic ordering that is at least as strong as both the success and failure orderings ...
const SDValue & getChain() const
bool isNonTemporal() const
bool isAtomic() const
Return true if the memory operation ordering is Unordered or higher.
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
bool getRtLibUseGOT() const
Returns true if PLT should be avoided for RTLib calls.
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isStrictFPOpcode()
Test if this node is a strict floating point pseudo-op.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
void dropFlags(unsigned Mask)
iterator_range< use_iterator > uses()
size_t use_size() const
Return the number of uses of this node.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
op_iterator op_end() const
bool isAssert() const
Test if this node is an assert operation.
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
void setNode(SDNode *N)
set the SDNode
unsigned getOpcode() const
unsigned getNumOperands() const
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingInterface() const
bool hasStreamingCompatibleInterface() const
bool hasAgnosticZAInterface() const
bool hasStreamingInterfaceOrBody() const
bool hasNonStreamingInterface() const
bool hasStreamingBody() const
bool hasSharedZAInterface() const
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresEnablingZAAfterCall() const
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresDisablingZABeforeCall() const
bool requiresPreservingAllZAState() const
Class to represent scalable SIMD vectors.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags, bool AllowCommute=false)
Get the specified node if it's already available, or else return NULL.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
void addCalledGlobal(const SDNode *Node, const GlobalValue *GV, unsigned OpFlags)
Set CalledGlobal to be associated with Node.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getDeactivationSymbol(const GlobalValue *GV)
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
int getSplatIndex() const
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
constexpr size_t size() const
size - Get the string size.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const
Returns the target-specific address of the unsafe stack pointer.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const
Check whether or not MI needs to be moved close to its uses.
void setMaximumJumpTableSize(unsigned)
Indicate the maximum number of entries in jump tables.
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
unsigned getMaximumJumpTableSize() const
Return upper limit for number of entries in a jump table.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool EnableExtLdPromotion
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
@ ZeroOrOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
MVT getFrameIndexTy(const DataLayout &DL) const
Return the type for frame index, which is determined by the alloca address space specified through th...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned TLSSize
Bit size of immediate TLS offsets (0 == use the default).
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt128Ty(LLVMContext &C)
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
@ HalfTyID
16-bit floating point type
@ FloatTyID
32-bit floating point type
@ BFloatTyID
16-bit floating point type (7-bit significand)
@ DoubleTyID
64-bit floating point type
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeID getTypeID() const
Return the type id for the type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< use_iterator > uses()
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
Base class of all SIMD vector types.
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static VectorType * getTruncatedElementVectorType(VectorType *VTy)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Type * getIndexedType() const
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isValidCBCond(AArch64CC::CondCode Code)
True, if a given condition code can be used in a fused compare-and-branch instructions,...
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint64_t decodeAdvSIMDModImmType10(uint8_t Imm)
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
ArrayRef< MCPhysReg > getFPRArgRegs()
int getSMEPseudoMap(uint16_t Opcode)
static constexpr unsigned SVEMaxBitsPerVector
const unsigned RoundingBitsPos
const uint64_t ReservedFPControlBits
static constexpr unsigned SVEBitsPerBlock
ArrayRef< MCPhysReg > getGPRArgRegs()
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ ARM64EC_Thunk_Native
Calling convention used in the ARM64EC ABI to implement calls between ARM64 code and thunks.
@ AArch64_VectorCall
Used between AArch64 Advanced SIMD functions.
@ Swift
Calling convention for Swift.
@ AArch64_SVE_VectorCall
Used between AArch64 SVE functions.
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
@ PreserveMost
Used for runtime calls that preserves most registers.
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2
Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ CXX_FAST_TLS
Used for access functions.
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0
Preserve X0-X13, X19-X29, SP, Z0-Z31, P0-P15.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
@ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1
Preserve X1-X15, X19-X29, SP, Z0-Z31, P0-P15.
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ PreserveNone
Used for runtime calls that preserves none general registers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ ARM64EC_Thunk_X64
Calling convention used in the ARM64EC ABI to implement calls between x64 code and thunks.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNormalMaskedLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed masked load.
bool isNormalMaskedStore(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed masked store.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
@ LOOP_DEPENDENCE_RAW_MASK
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ ADDROFRETURNADDR
ADDROFRETURNADDR - Represents the llvm.addressofreturnaddress intrinsic.
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
@ SSUBO
Same for subtraction.
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ PtrAuthGlobalAddress
A ptrauth constant.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
@ SMULO
Same for multiplication.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
@ STRICT_FADD
Constrained versions of the binary floating point operators.
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
@ LOOP_DEPENDENCE_WAR_MASK
Set rounding mode.
bool isOverflowIntrOpRes(SDValue Op)
Returns true if the specified value is the overflow result from one of the overflow intrinsic nodes.
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI bool isVectorShrinkable(const SDNode *N, unsigned NewEltSize, bool Signed)
Returns true if the specified node is a vector where all elements can be truncated to the specified e...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
static const int LAST_INDEXED_MODE
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
bool match(Val *V, const Pattern &P)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
CastInst_match< OpTy, UIToFPInst > m_UIToFP(const OpTy &Op)
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
const unsigned VectorBits
@ ScalablePredicateVector
initializer< Ty > init(const Ty &Val)
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
bool attachedCallOpBundleNeedsMarker(const CallBase *CB)
This function determines whether the clang_arc_attachedcall should be emitted with or without the mar...
bool hasAttachedCallOpBundle(const CallBase *CB)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool isPackedVectorType(EVT SomeVT)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool CC_AArch64_Arm64EC_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
bool CC_AArch64_Win64PCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs=nullptr, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
testing::Matcher< const detail::ErrorHolder & > Failed()
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
auto map_to_vector(ContainerTy &&C, FuncTy &&F)
Map a range to a SmallVector with element types deduced from the mapping.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
std::optional< unsigned > getSVEPredPatternFromNumElements(unsigned MinNumElts)
Return specific VL predicate pattern based on the number of elements.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
bool CC_AArch64_Arm64EC_Thunk(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
unsigned M1(unsigned Val)
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool RetCC_AArch64_Arm64EC_Thunk(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
bool CC_AArch64_Arm64EC_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
ComplexDeinterleavingOperation
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
constexpr int PoisonMaskElem
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
ComplexDeinterleavingRotation
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
FunctionAddr VTableAddr uintptr_t uintptr_t Data
bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
gep_type_iterator gep_type_begin(const User *GEP)
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
static const MachineMemOperand::Flags MOStridedAccess
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
@ Default
The result values are uniform if and only if all operands are uniform.
bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
bool CC_AArch64_Arm64EC_Thunk_Native(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool RetCC_AArch64_Arm64EC_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
bool CC_AArch64_Preserve_None(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const unsigned PerfectShuffleTable[6561+1]
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper structure to keep track of a SET_CC lowered into AArch64 code.
Helper structure to keep track of ISD::SET_CC operands.
Helper structure to be able to read SetCC information.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represent subnormal handling kind for floating point instruction inputs and outputs.
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
uint64_t getScalarStoreSize() const
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
ElementCount getVectorElementCount() const
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
EVT changeElementType(EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
bool isScalableVT() const
Return true if the type is a scalable type.
bool isFixedLengthVector() const
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
bool isZero() const
Returns true if value is all zero.
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
unsigned getBitWidth() const
Get the bit width of this value.
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
APInt getSignedMinValue() const
Return the minimal signed value possible given these KnownBits.
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op is the value the constraint a...
A simple container for information about the supported runtime calls.
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
bool isBeforeLegalizeOps() const
bool isAfterLegalizeDAG() const
bool isCalledByLegalizer() const
bool isBeforeLegalize() const
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Helper structure to keep track of SetCC information.