LLVM 22.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
67#include "llvm/IR/Attributes.h"
68#include "llvm/IR/CallingConv.h"
69#include "llvm/IR/Constant.h"
70#include "llvm/IR/Constants.h"
71#include "llvm/IR/DataLayout.h"
72#include "llvm/IR/DebugLoc.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/GlobalValue.h"
78#include "llvm/IR/IRBuilder.h"
79#include "llvm/IR/InlineAsm.h"
80#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Intrinsics.h"
84#include "llvm/IR/IntrinsicsARM.h"
85#include "llvm/IR/Module.h"
86#include "llvm/IR/Type.h"
87#include "llvm/IR/User.h"
88#include "llvm/IR/Value.h"
89#include "llvm/MC/MCInstrDesc.h"
91#include "llvm/MC/MCSchedule.h"
98#include "llvm/Support/Debug.h"
106#include <algorithm>
107#include <cassert>
108#include <cstdint>
109#include <cstdlib>
110#include <iterator>
111#include <limits>
112#include <optional>
113#include <tuple>
114#include <utility>
115#include <vector>
116
117using namespace llvm;
118
119#define DEBUG_TYPE "arm-isel"
120
121STATISTIC(NumTailCalls, "Number of tail calls");
122STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
123STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
124STATISTIC(NumConstpoolPromoted,
125 "Number of constants with their storage promoted into constant pools");
126
127static cl::opt<bool>
128ARMInterworking("arm-interworking", cl::Hidden,
129 cl::desc("Enable / disable ARM interworking (for debugging only)"),
130 cl::init(true));
131
133 "arm-promote-constant", cl::Hidden,
134 cl::desc("Enable / disable promotion of unnamed_addr constants into "
135 "constant pools"),
136 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
138 "arm-promote-constant-max-size", cl::Hidden,
139 cl::desc("Maximum size of constant to promote into a constant pool"),
140 cl::init(64));
142 "arm-promote-constant-max-total", cl::Hidden,
143 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
144 cl::init(128));
145
147MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
148 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
149 cl::init(2));
150
152 "arm-max-base-updates-to-check", cl::Hidden,
153 cl::desc("Maximum number of base-updates to check generating postindex."),
154 cl::init(64));
155
156/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
157constexpr MVT FlagsVT = MVT::i32;
158
159// The APCS parameter registers.
160static const MCPhysReg GPRArgRegs[] = {
161 ARM::R0, ARM::R1, ARM::R2, ARM::R3
162};
163
165 SelectionDAG &DAG, const SDLoc &DL) {
167 assert(Arg.ArgVT.bitsLT(MVT::i32));
168 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, Arg.ArgVT, Value);
169 SDValue Ext =
171 MVT::i32, Trunc);
172 return Ext;
173}
174
175void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
176 if (VT != PromotedLdStVT) {
177 setOperationAction(ISD::LOAD, VT, Promote);
178 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
179
180 setOperationAction(ISD::STORE, VT, Promote);
181 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
182 }
183
184 MVT ElemTy = VT.getVectorElementType();
185 if (ElemTy != MVT::f64)
189 if (ElemTy == MVT::i32) {
194 } else {
199 }
208 if (VT.isInteger()) {
212 }
213
214 // Neon does not support vector divide/remainder operations.
223
224 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
225 for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
227 setOperationAction(Opcode, VT, Legal);
228 if (!VT.isFloatingPoint())
229 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
230 setOperationAction(Opcode, VT, Legal);
231}
232
233void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
234 addRegisterClass(VT, &ARM::DPRRegClass);
235 addTypeForNEON(VT, MVT::f64);
236}
237
238void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
239 addRegisterClass(VT, &ARM::DPairRegClass);
240 addTypeForNEON(VT, MVT::v2f64);
241}
242
243void ARMTargetLowering::setAllExpand(MVT VT) {
244 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
246
247 // We support these really simple operations even on types where all
248 // the actual arithmetic has to be broken down into simpler
249 // operations or turned into library calls.
250 setOperationAction(ISD::BITCAST, VT, Legal);
251 setOperationAction(ISD::LOAD, VT, Legal);
252 setOperationAction(ISD::STORE, VT, Legal);
254}
255
256void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
257 LegalizeAction Action) {
258 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
259 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
260 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
261}
262
263void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
264 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
265
266 for (auto VT : IntTypes) {
267 addRegisterClass(VT, &ARM::MQPRRegClass);
281 setOperationAction(ISD::MLOAD, VT, Custom);
282 setOperationAction(ISD::MSTORE, VT, Legal);
297
298 // No native support for these.
308
309 // Vector reductions
310 setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
311 setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
312 setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
313 setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
314 setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
315 setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
316 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
317 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
318 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
319
320 if (!HasMVEFP) {
325 } else {
328 }
329
330 // Pre and Post inc are supported on loads and stores
331 for (unsigned im = (unsigned)ISD::PRE_INC;
332 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
337 }
338 }
339
340 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
341 for (auto VT : FloatTypes) {
342 addRegisterClass(VT, &ARM::MQPRRegClass);
343 if (!HasMVEFP)
344 setAllExpand(VT);
345
346 // These are legal or custom whether we have MVE.fp or not
355 setOperationAction(ISD::MLOAD, VT, Custom);
356 setOperationAction(ISD::MSTORE, VT, Legal);
359
360 // Pre and Post inc are supported on loads and stores
361 for (unsigned im = (unsigned)ISD::PRE_INC;
362 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
367 }
368
369 if (HasMVEFP) {
370 setOperationAction(ISD::FMINNUM, VT, Legal);
371 setOperationAction(ISD::FMAXNUM, VT, Legal);
372 setOperationAction(ISD::FROUND, VT, Legal);
373 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
374 setOperationAction(ISD::FRINT, VT, Legal);
375 setOperationAction(ISD::FTRUNC, VT, Legal);
376 setOperationAction(ISD::FFLOOR, VT, Legal);
377 setOperationAction(ISD::FCEIL, VT, Legal);
378 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
379 setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
380 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
381 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
382
383 // No native support for these.
386 setOperationAction(ISD::FSQRT, VT, Expand);
387 setOperationAction(ISD::FSIN, VT, Expand);
388 setOperationAction(ISD::FCOS, VT, Expand);
389 setOperationAction(ISD::FTAN, VT, Expand);
390 setOperationAction(ISD::FPOW, VT, Expand);
391 setOperationAction(ISD::FLOG, VT, Expand);
392 setOperationAction(ISD::FLOG2, VT, Expand);
393 setOperationAction(ISD::FLOG10, VT, Expand);
394 setOperationAction(ISD::FEXP, VT, Expand);
395 setOperationAction(ISD::FEXP2, VT, Expand);
396 setOperationAction(ISD::FEXP10, VT, Expand);
397 setOperationAction(ISD::FNEARBYINT, VT, Expand);
398 }
399 }
400
401 // Custom Expand smaller than legal vector reductions to prevent false zero
402 // items being added.
403 setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
404 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
405 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
406 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
407 setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
408 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
409 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
410 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
411
412 // We 'support' these types up to bitcast/load/store level, regardless of
413 // MVE integer-only / float support. Only doing FP data processing on the FP
414 // vector types is inhibited at integer-only level.
415 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
416 for (auto VT : LongTypes) {
417 addRegisterClass(VT, &ARM::MQPRRegClass);
418 setAllExpand(VT);
424 }
426
427 // We can do bitwise operations on v2i64 vectors
428 setOperationAction(ISD::AND, MVT::v2i64, Legal);
429 setOperationAction(ISD::OR, MVT::v2i64, Legal);
430 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
431
432 // It is legal to extload from v4i8 to v4i16 or v4i32.
433 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
434 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
435 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
436
437 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
443
444 // Some truncating stores are legal too.
445 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
446 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
447 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
448
449 // Pre and Post inc on these are legal, given the correct extends
450 for (unsigned im = (unsigned)ISD::PRE_INC;
451 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
452 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
457 }
458 }
459
460 // Predicate types
461 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
462 for (auto VT : pTypes) {
463 addRegisterClass(VT, &ARM::VCCRRegClass);
472 setOperationAction(ISD::LOAD, VT, Custom);
473 setOperationAction(ISD::STORE, VT, Custom);
478
479 if (!HasMVEFP) {
484 }
485 }
489 setOperationAction(ISD::OR, MVT::v2i1, Expand);
495
504}
505
507 return static_cast<const ARMBaseTargetMachine &>(getTargetMachine());
508}
509
511 const ARMSubtarget &STI)
512 : TargetLowering(TM_), Subtarget(&STI),
513 RegInfo(Subtarget->getRegisterInfo()),
514 Itins(Subtarget->getInstrItineraryData()) {
515 const auto &TM = static_cast<const ARMBaseTargetMachine &>(TM_);
516
519
520 const Triple &TT = TM.getTargetTriple();
521
522 if (TT.isOSBinFormatMachO()) {
523 // Uses VFP for Thumb libfuncs if available.
524 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
525 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
526 // clang-format off
527 static const struct {
528 const RTLIB::Libcall Op;
529 const RTLIB::LibcallImpl Impl;
530 } LibraryCalls[] = {
531 // Single-precision floating-point arithmetic.
532 { RTLIB::ADD_F32, RTLIB::impl___addsf3vfp },
533 { RTLIB::SUB_F32, RTLIB::impl___subsf3vfp },
534 { RTLIB::MUL_F32, RTLIB::impl___mulsf3vfp },
535 { RTLIB::DIV_F32, RTLIB::impl___divsf3vfp },
536
537 // Double-precision floating-point arithmetic.
538 { RTLIB::ADD_F64, RTLIB::impl___adddf3vfp },
539 { RTLIB::SUB_F64, RTLIB::impl___subdf3vfp },
540 { RTLIB::MUL_F64, RTLIB::impl___muldf3vfp },
541 { RTLIB::DIV_F64, RTLIB::impl___divdf3vfp },
542
543 // Single-precision comparisons.
544 { RTLIB::OEQ_F32, RTLIB::impl___eqsf2vfp },
545 { RTLIB::UNE_F32, RTLIB::impl___nesf2vfp },
546 { RTLIB::OLT_F32, RTLIB::impl___ltsf2vfp },
547 { RTLIB::OLE_F32, RTLIB::impl___lesf2vfp },
548 { RTLIB::OGE_F32, RTLIB::impl___gesf2vfp },
549 { RTLIB::OGT_F32, RTLIB::impl___gtsf2vfp },
550 { RTLIB::UO_F32, RTLIB::impl___unordsf2vfp },
551
552 // Double-precision comparisons.
553 { RTLIB::OEQ_F64, RTLIB::impl___eqdf2vfp },
554 { RTLIB::UNE_F64, RTLIB::impl___nedf2vfp },
555 { RTLIB::OLT_F64, RTLIB::impl___ltdf2vfp },
556 { RTLIB::OLE_F64, RTLIB::impl___ledf2vfp },
557 { RTLIB::OGE_F64, RTLIB::impl___gedf2vfp },
558 { RTLIB::OGT_F64, RTLIB::impl___gtdf2vfp },
559 { RTLIB::UO_F64, RTLIB::impl___unorddf2vfp },
560
561 // Floating-point to integer conversions.
562 // i64 conversions are done via library routines even when generating VFP
563 // instructions, so use the same ones.
564 { RTLIB::FPTOSINT_F64_I32, RTLIB::impl___fixdfsivfp },
565 { RTLIB::FPTOUINT_F64_I32, RTLIB::impl___fixunsdfsivfp },
566 { RTLIB::FPTOSINT_F32_I32, RTLIB::impl___fixsfsivfp },
567 { RTLIB::FPTOUINT_F32_I32, RTLIB::impl___fixunssfsivfp },
568
569 // Conversions between floating types.
570 { RTLIB::FPROUND_F64_F32, RTLIB::impl___truncdfsf2vfp },
571 { RTLIB::FPEXT_F32_F64, RTLIB::impl___extendsfdf2vfp },
572
573 // Integer to floating-point conversions.
574 // i64 conversions are done via library routines even when generating VFP
575 // instructions, so use the same ones.
576 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
577 // e.g., __floatunsidf vs. __floatunssidfvfp.
578 { RTLIB::SINTTOFP_I32_F64, RTLIB::impl___floatsidfvfp },
579 { RTLIB::UINTTOFP_I32_F64, RTLIB::impl___floatunssidfvfp },
580 { RTLIB::SINTTOFP_I32_F32, RTLIB::impl___floatsisfvfp },
581 { RTLIB::UINTTOFP_I32_F32, RTLIB::impl___floatunssisfvfp },
582 };
583 // clang-format on
584
585 for (const auto &LC : LibraryCalls)
586 setLibcallImpl(LC.Op, LC.Impl);
587 }
588 }
589
590 if (Subtarget->isThumb1Only())
591 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
592 else
593 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
594
595 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
596 Subtarget->hasFPRegs()) {
597 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
598 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
599
604
605 if (!Subtarget->hasVFP2Base())
606 setAllExpand(MVT::f32);
607 if (!Subtarget->hasFP64())
608 setAllExpand(MVT::f64);
609 }
610
611 if (Subtarget->hasFullFP16()) {
612 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
613 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
614 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
615
616 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
617 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
618 }
619
620 if (Subtarget->hasBF16()) {
621 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
622 setAllExpand(MVT::bf16);
623 if (!Subtarget->hasFullFP16())
624 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
625 } else {
626 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand);
627 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Expand);
628 setOperationAction(ISD::FP_TO_BF16, MVT::f32, Custom);
629 setOperationAction(ISD::FP_TO_BF16, MVT::f64, Custom);
630 }
631
633 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
634 setTruncStoreAction(VT, InnerVT, Expand);
635 addAllExtLoads(VT, InnerVT, Expand);
636 }
637
640
642 }
643
644 if (!Subtarget->isThumb1Only() && !Subtarget->hasV8_1MMainlineOps())
646
647 if (!Subtarget->hasV8_1MMainlineOps())
649
652
655
656 if (Subtarget->hasMVEIntegerOps())
657 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
658
659 // Combine low-overhead loop intrinsics so that we can lower i1 types.
660 if (Subtarget->hasLOB()) {
661 setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC});
662 }
663
664 if (Subtarget->hasNEON()) {
665 addDRTypeForNEON(MVT::v2f32);
666 addDRTypeForNEON(MVT::v8i8);
667 addDRTypeForNEON(MVT::v4i16);
668 addDRTypeForNEON(MVT::v2i32);
669 addDRTypeForNEON(MVT::v1i64);
670
671 addQRTypeForNEON(MVT::v4f32);
672 addQRTypeForNEON(MVT::v2f64);
673 addQRTypeForNEON(MVT::v16i8);
674 addQRTypeForNEON(MVT::v8i16);
675 addQRTypeForNEON(MVT::v4i32);
676 addQRTypeForNEON(MVT::v2i64);
677
678 if (Subtarget->hasFullFP16()) {
679 addQRTypeForNEON(MVT::v8f16);
680 addDRTypeForNEON(MVT::v4f16);
681 }
682
683 if (Subtarget->hasBF16()) {
684 addQRTypeForNEON(MVT::v8bf16);
685 addDRTypeForNEON(MVT::v4bf16);
686 }
687 }
688
689 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
690 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
691 // none of Neon, MVE or VFP supports any arithmetic operations on it.
692 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
693 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
694 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
695 // FIXME: Code duplication: FDIV and FREM are expanded always, see
696 // ARMTargetLowering::addTypeForNEON method for details.
697 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
698 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
699 // FIXME: Create unittest.
700 // In another words, find a way when "copysign" appears in DAG with vector
701 // operands.
703 // FIXME: Code duplication: SETCC has custom operation action, see
704 // ARMTargetLowering::addTypeForNEON method for details.
706 // FIXME: Create unittest for FNEG and for FABS.
707 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
708 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
709 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
710 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
711 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
712 setOperationAction(ISD::FTAN, MVT::v2f64, Expand);
713 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
714 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
715 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
716 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
717 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
718 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
719 setOperationAction(ISD::FEXP10, MVT::v2f64, Expand);
720 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
721 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
722 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
723 setOperationAction(ISD::FROUNDEVEN, MVT::v2f64, Expand);
724 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
725 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
726 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
727 }
728
729 if (Subtarget->hasNEON()) {
730 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
731 // supported for v4f32.
732 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
733 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
734 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
735 setOperationAction(ISD::FTAN, MVT::v4f32, Expand);
736 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
737 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
738 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
739 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
740 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
741 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
742 setOperationAction(ISD::FEXP10, MVT::v4f32, Expand);
743 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
744 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
745 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
746 setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Expand);
747 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
748 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
749
750 // Mark v2f32 intrinsics.
751 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
752 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
753 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
754 setOperationAction(ISD::FTAN, MVT::v2f32, Expand);
755 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
756 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
757 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
758 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
759 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
760 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
761 setOperationAction(ISD::FEXP10, MVT::v2f32, Expand);
762 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
763 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
764 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
765 setOperationAction(ISD::FROUNDEVEN, MVT::v2f32, Expand);
766 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
767 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
768
769 for (ISD::NodeType Op : {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
770 ISD::FRINT, ISD::FTRUNC, ISD::FROUNDEVEN}) {
771 setOperationAction(Op, MVT::v4f16, Expand);
772 setOperationAction(Op, MVT::v8f16, Expand);
773 }
774
775 // Neon does not support some operations on v1i64 and v2i64 types.
776 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
777 // Custom handling for some quad-vector types to detect VMULL.
778 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
779 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
780 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
781 // Custom handling for some vector types to avoid expensive expansions
782 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
784 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
786 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
787 // a destination type that is wider than the source, and nor does
788 // it have a FP_TO_[SU]INT instruction with a narrower destination than
789 // source.
798
800 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
801
802 // NEON does not have single instruction CTPOP for vectors with element
803 // types wider than 8-bits. However, custom lowering can leverage the
804 // v8i8/v16i8 vcnt instruction.
811
812 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
813 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
814
815 // NEON does not have single instruction CTTZ for vectors.
817 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
818 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
819 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
820
821 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
822 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
823 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
824 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
825
830
835
839 }
840
841 // NEON only has FMA instructions as of VFP4.
842 if (!Subtarget->hasVFP4Base()) {
843 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
844 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
845 }
846
848 ISD::FP_TO_UINT, ISD::FMUL, ISD::LOAD});
849
850 // It is legal to extload from v4i8 to v4i16 or v4i32.
851 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
852 MVT::v2i32}) {
857 }
858 }
859
860 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
861 MVT::v4i32}) {
862 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
863 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
864 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
865 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
866 }
867 }
868
869 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
875 ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST});
876 }
877 if (Subtarget->hasMVEIntegerOps()) {
879 ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC,
880 ISD::SETCC});
881 }
882 if (Subtarget->hasMVEFloatOps()) {
884 }
885
886 if (!Subtarget->hasFP64()) {
887 // When targeting a floating-point unit with only single-precision
888 // operations, f64 is legal for the few double-precision instructions which
889 // are present However, no double-precision operations other than moves,
890 // loads and stores are provided by the hardware.
899 setOperationAction(ISD::FNEG, MVT::f64, Expand);
900 setOperationAction(ISD::FABS, MVT::f64, Expand);
901 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
902 setOperationAction(ISD::FSIN, MVT::f64, Expand);
903 setOperationAction(ISD::FCOS, MVT::f64, Expand);
904 setOperationAction(ISD::FPOW, MVT::f64, Expand);
905 setOperationAction(ISD::FLOG, MVT::f64, Expand);
906 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
907 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
908 setOperationAction(ISD::FEXP, MVT::f64, Expand);
909 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
910 setOperationAction(ISD::FEXP10, MVT::f64, Expand);
911 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
912 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
913 setOperationAction(ISD::FRINT, MVT::f64, Expand);
914 setOperationAction(ISD::FROUNDEVEN, MVT::f64, Expand);
915 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
916 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
929 }
930
931 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
932 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
934 if (Subtarget->hasFullFP16()) {
937 }
938 }
939
940 if (!Subtarget->hasFP16()) {
941 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
943 }
944
945 computeRegisterProperties(Subtarget->getRegisterInfo());
946
947 // ARM does not have floating-point extending loads.
948 for (MVT VT : MVT::fp_valuetypes()) {
949 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
950 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
951 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
952 }
953
954 // ... or truncating stores
955 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
956 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
957 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
958 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
959 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
960
961 // ARM does not have i1 sign extending load.
962 for (MVT VT : MVT::integer_valuetypes())
964
965 // ARM supports all 4 flavors of integer indexed load / store.
966 if (!Subtarget->isThumb1Only()) {
967 for (unsigned im = (unsigned)ISD::PRE_INC;
969 setIndexedLoadAction(im, MVT::i1, Legal);
970 setIndexedLoadAction(im, MVT::i8, Legal);
971 setIndexedLoadAction(im, MVT::i16, Legal);
972 setIndexedLoadAction(im, MVT::i32, Legal);
973 setIndexedStoreAction(im, MVT::i1, Legal);
974 setIndexedStoreAction(im, MVT::i8, Legal);
975 setIndexedStoreAction(im, MVT::i16, Legal);
976 setIndexedStoreAction(im, MVT::i32, Legal);
977 }
978 } else {
979 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
982 }
983
988
991 if (Subtarget->hasDSP()) {
1000 }
1001 if (Subtarget->hasBaseDSP()) {
1004 }
1005
1006 // i64 operation support.
1009 if (Subtarget->isThumb1Only()) {
1012 }
1013 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1014 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1016
1024 setOperationAction(ISD::LOAD, MVT::i64, Custom);
1025 setOperationAction(ISD::STORE, MVT::i64, Custom);
1026
1027 // MVE lowers 64 bit shifts to lsll and lsrl
1028 // assuming that ISD::SRL and SRA of i64 are already marked custom
1029 if (Subtarget->hasMVEIntegerOps())
1031
1032 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1033 if (Subtarget->isThumb1Only()) {
1037 }
1038
1039 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1041
1042 // ARM does not have ROTL.
1047 }
1049 // TODO: These two should be set to LibCall, but this currently breaks
1050 // the Linux kernel build. See #101786.
1053 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1056 }
1057
1058 // @llvm.readcyclecounter requires the Performance Monitors extension.
1059 // Default to the 0 expansion on unsupported platforms.
1060 // FIXME: Technically there are older ARM CPUs that have
1061 // implementation-specific ways of obtaining this information.
1062 if (Subtarget->hasPerfMon())
1063 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
1064
1065 // Only ARMv6 has BSWAP.
1066 if (!Subtarget->hasV6Ops())
1068
1069 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1070 : Subtarget->hasDivideInARMMode();
1071 if (!hasDivide) {
1072 // These are expanded into libcalls if the cpu doesn't have HW divider.
1075 }
1076
1077 if (TT.isOSWindows() && !Subtarget->hasDivideInThumbMode()) {
1080
1083 }
1084
1087
1088 // Register based DivRem for AEABI (RTABI 4.2)
1089 if (TT.isTargetAEABI() || TT.isAndroid() || TT.isTargetGNUAEABI() ||
1090 TT.isTargetMuslAEABI() || TT.isOSWindows()) {
1093 HasStandaloneRem = false;
1094
1099 } else {
1102 }
1103
1108
1109 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1110 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
1111
1112 // Use the default implementation.
1113 setOperationAction(ISD::VASTART, MVT::Other, Custom);
1114 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1115 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
1116 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1117 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1118 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1119
1120 if (TT.isOSWindows())
1121 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1122 else
1123 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
1124
1125 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1126 // the default expansion.
1127 InsertFencesForAtomic = false;
1128 if (Subtarget->hasAnyDataBarrier() &&
1129 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1130 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1131 // to ldrex/strex loops already.
1132 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
1133 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1134 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
1135
1136 // On v8, we have particularly efficient implementations of atomic fences
1137 // if they can be combined with nearby atomic loads and stores.
1138 if (!Subtarget->hasAcquireRelease() ||
1139 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1140 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1141 InsertFencesForAtomic = true;
1142 }
1143 } else {
1144 // If there's anything we can use as a barrier, go through custom lowering
1145 // for ATOMIC_FENCE.
1146 // If target has DMB in thumb, Fences can be inserted.
1147 if (Subtarget->hasDataBarrier())
1148 InsertFencesForAtomic = true;
1149
1150 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
1151 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1152
1153 // Set them all for libcall, which will force libcalls.
1154 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
1155 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
1156 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
1157 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall);
1158 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall);
1159 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
1160 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
1161 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall);
1162 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall);
1163 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall);
1164 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall);
1165 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall);
1166 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1167 // Unordered/Monotonic case.
1168 if (!InsertFencesForAtomic) {
1169 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1170 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1171 }
1172 }
1173
1174 // Compute supported atomic widths.
1175 if (TT.isOSLinux() || (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1176 // For targets where __sync_* routines are reliably available, we use them
1177 // if necessary.
1178 //
1179 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1180 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1181 //
1182 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1183 // such targets should provide __sync_* routines, which use the ARM mode
1184 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1185 // encoding; see ARMISD::MEMBARRIER_MCR.)
1187 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1188 Subtarget->hasForced32BitAtomics()) {
1189 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1191 } else {
1192 // We can't assume anything about other targets; just use libatomic
1193 // routines.
1195 }
1196
1198
1199 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1200
1201 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1202 if (!Subtarget->hasV6Ops()) {
1205 }
1207
1208 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1209 !Subtarget->isThumb1Only()) {
1210 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1211 // iff target supports vfp2.
1212 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1214 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
1215 setOperationAction(ISD::GET_FPENV, MVT::i32, Legal);
1216 setOperationAction(ISD::SET_FPENV, MVT::i32, Legal);
1217 setOperationAction(ISD::RESET_FPENV, MVT::Other, Legal);
1218 setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal);
1219 setOperationAction(ISD::SET_FPMODE, MVT::i32, Custom);
1220 setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
1221 }
1222
1223 // We want to custom lower some of our intrinsics.
1228
1238 if (Subtarget->hasFullFP16()) {
1242 }
1243
1245
1246 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1247 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1248 if (Subtarget->hasFullFP16())
1249 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
1250 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1251 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1252 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1253
1254 // We don't support sin/cos/fmod/copysign/pow
1255 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1256 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1257 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1258 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1259 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1260 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1263 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1264 !Subtarget->isThumb1Only()) {
1267 }
1268 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1269 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1270
1271 if (!Subtarget->hasVFP4Base()) {
1274 }
1275
1276 // Various VFP goodness
1277 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1278 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1279 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1280 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1281 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1282 }
1283
1284 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1285 if (!Subtarget->hasFP16()) {
1286 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1287 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1288 }
1289
1290 // Strict floating-point comparisons need custom lowering.
1297 }
1298
1299 // Use __sincos_stret if available.
1300 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1301 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1302 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1303 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1304 }
1305
1306 // FP-ARMv8 implements a lot of rounding-like FP operations.
1307 if (Subtarget->hasFPARMv8Base()) {
1308 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1309 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1310 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1311 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1312 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1313 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1314 setOperationAction(ISD::FROUNDEVEN, MVT::f32, Legal);
1315 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1316 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1317 if (Subtarget->hasNEON()) {
1318 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1319 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1320 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1321 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1322 }
1323
1324 if (Subtarget->hasFP64()) {
1325 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1326 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1327 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1328 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1329 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1330 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1331 setOperationAction(ISD::FROUNDEVEN, MVT::f64, Legal);
1332 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1333 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1334 }
1335 }
1336
1337 // FP16 often need to be promoted to call lib functions
1338 if (Subtarget->hasFullFP16()) {
1341 setOperationAction(ISD::FSIN, MVT::f16, Promote);
1342 setOperationAction(ISD::FCOS, MVT::f16, Promote);
1343 setOperationAction(ISD::FTAN, MVT::f16, Promote);
1344 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
1345 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
1346 setOperationAction(ISD::FPOW, MVT::f16, Promote);
1347 setOperationAction(ISD::FEXP, MVT::f16, Promote);
1348 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
1349 setOperationAction(ISD::FEXP10, MVT::f16, Promote);
1350 setOperationAction(ISD::FLOG, MVT::f16, Promote);
1351 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
1352 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
1353
1354 setOperationAction(ISD::FROUND, MVT::f16, Legal);
1355 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
1356 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
1357 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
1358 setOperationAction(ISD::FRINT, MVT::f16, Legal);
1359 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
1360 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
1361 }
1362
1363 if (Subtarget->hasNEON()) {
1364 // vmin and vmax aren't available in a scalar form, so we can use
1365 // a NEON instruction with an undef lane instead.
1366 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
1367 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
1368 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
1369 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
1370 setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
1371 setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
1372 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
1373 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
1374
1375 if (Subtarget->hasV8Ops()) {
1376 setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
1377 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1378 setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
1379 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1380 setOperationAction(ISD::FROUNDEVEN, MVT::v2f32, Legal);
1381 setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal);
1382 setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
1383 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
1384 setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
1385 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1386 setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
1387 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1388 }
1389
1390 if (Subtarget->hasFullFP16()) {
1391 setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
1392 setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
1393 setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);
1394 setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);
1395
1396 setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal);
1397 setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
1398 setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
1399 setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
1400
1401 setOperationAction(ISD::FFLOOR, MVT::v4f16, Legal);
1402 setOperationAction(ISD::FFLOOR, MVT::v8f16, Legal);
1403 setOperationAction(ISD::FROUND, MVT::v4f16, Legal);
1404 setOperationAction(ISD::FROUND, MVT::v8f16, Legal);
1405 setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Legal);
1406 setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Legal);
1407 setOperationAction(ISD::FCEIL, MVT::v4f16, Legal);
1408 setOperationAction(ISD::FCEIL, MVT::v8f16, Legal);
1409 setOperationAction(ISD::FTRUNC, MVT::v4f16, Legal);
1410 setOperationAction(ISD::FTRUNC, MVT::v8f16, Legal);
1411 setOperationAction(ISD::FRINT, MVT::v4f16, Legal);
1412 setOperationAction(ISD::FRINT, MVT::v8f16, Legal);
1413 }
1414 }
1415
1416 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has
1417 // it, but it's just a wrapper around ldexp.
1418 if (TT.isOSWindows()) {
1419 for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
1420 if (isOperationExpand(Op, MVT::f32))
1421 setOperationAction(Op, MVT::f32, Promote);
1422 }
1423
1424 // LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16
1425 // isn't legal.
1426 for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
1427 if (isOperationExpand(Op, MVT::f16))
1428 setOperationAction(Op, MVT::f16, Promote);
1429
1430 // We have target-specific dag combine patterns for the following nodes:
1431 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1434
1435 if (Subtarget->hasMVEIntegerOps())
1437
1438 if (Subtarget->hasV6Ops())
1440 if (Subtarget->isThumb1Only())
1442 // Attempt to lower smin/smax to ssat/usat
1443 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1444 Subtarget->isThumb2()) {
1446 }
1447
1449
1450 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1451 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1453 else
1455
1456 //// temporary - rewrite interface to use type
1459 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1461 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1463
1464 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1465 // are at least 4 bytes aligned.
1467
1468 // Prefer likely predicted branches to selects on out-of-order cores.
1469 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1470
1471 setPrefLoopAlignment(Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
1473 Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
1474
1475 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1476}
1477
1479 return Subtarget->useSoftFloat();
1480}
1481
1483 return !Subtarget->isThumb1Only() && VT.getSizeInBits() <= 32;
1484}
1485
1486// FIXME: It might make sense to define the representative register class as the
1487// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1488// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1489// SPR's representative would be DPR_VFP2. This should work well if register
1490// pressure tracking were modified such that a register use would increment the
1491// pressure of the register class's representative and all of it's super
1492// classes' representatives transitively. We have not implemented this because
1493// of the difficulty prior to coalescing of modeling operand register classes
1494// due to the common occurrence of cross class copies and subregister insertions
1495// and extractions.
1496std::pair<const TargetRegisterClass *, uint8_t>
1498 MVT VT) const {
1499 const TargetRegisterClass *RRC = nullptr;
1500 uint8_t Cost = 1;
1501 switch (VT.SimpleTy) {
1502 default:
1504 // Use DPR as representative register class for all floating point
1505 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1506 // the cost is 1 for both f32 and f64.
1507 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1508 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1509 RRC = &ARM::DPRRegClass;
1510 // When NEON is used for SP, only half of the register file is available
1511 // because operations that define both SP and DP results will be constrained
1512 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1513 // coalescing by double-counting the SP regs. See the FIXME above.
1514 if (Subtarget->useNEONForSinglePrecisionFP())
1515 Cost = 2;
1516 break;
1517 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1518 case MVT::v4f32: case MVT::v2f64:
1519 RRC = &ARM::DPRRegClass;
1520 Cost = 2;
1521 break;
1522 case MVT::v4i64:
1523 RRC = &ARM::DPRRegClass;
1524 Cost = 4;
1525 break;
1526 case MVT::v8i64:
1527 RRC = &ARM::DPRRegClass;
1528 Cost = 8;
1529 break;
1530 }
1531 return std::make_pair(RRC, Cost);
1532}
1533
1534const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1535#define MAKE_CASE(V) \
1536 case V: \
1537 return #V;
1538 switch ((ARMISD::NodeType)Opcode) {
1540 break;
1743#undef MAKE_CASE
1744 }
1745 return nullptr;
1746}
1747
1749 EVT VT) const {
1750 if (!VT.isVector())
1751 return getPointerTy(DL);
1752
1753 // MVE has a predicate register.
1754 if ((Subtarget->hasMVEIntegerOps() &&
1755 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1756 VT == MVT::v16i8)) ||
1757 (Subtarget->hasMVEFloatOps() &&
1758 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1759 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1761}
1762
1763/// getRegClassFor - Return the register class that should be used for the
1764/// specified value type.
1765const TargetRegisterClass *
1766ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1767 (void)isDivergent;
1768 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1769 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1770 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1771 // MVE Q registers.
1772 if (Subtarget->hasNEON()) {
1773 if (VT == MVT::v4i64)
1774 return &ARM::QQPRRegClass;
1775 if (VT == MVT::v8i64)
1776 return &ARM::QQQQPRRegClass;
1777 }
1778 if (Subtarget->hasMVEIntegerOps()) {
1779 if (VT == MVT::v4i64)
1780 return &ARM::MQQPRRegClass;
1781 if (VT == MVT::v8i64)
1782 return &ARM::MQQQQPRRegClass;
1783 }
1785}
1786
1787// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1788// source/dest is aligned and the copy size is large enough. We therefore want
1789// to align such objects passed to memory intrinsics.
1791 Align &PrefAlign) const {
1792 if (!isa<MemIntrinsic>(CI))
1793 return false;
1794 MinSize = 8;
1795 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1796 // cycle faster than 4-byte aligned LDM.
1797 PrefAlign =
1798 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1799 return true;
1800}
1801
1802// Create a fast isel object.
1803FastISel *
1805 const TargetLibraryInfo *libInfo) const {
1806 return ARM::createFastISel(funcInfo, libInfo);
1807}
1808
1810 unsigned NumVals = N->getNumValues();
1811 if (!NumVals)
1812 return Sched::RegPressure;
1813
1814 for (unsigned i = 0; i != NumVals; ++i) {
1815 EVT VT = N->getValueType(i);
1816 if (VT == MVT::Glue || VT == MVT::Other)
1817 continue;
1818 if (VT.isFloatingPoint() || VT.isVector())
1819 return Sched::ILP;
1820 }
1821
1822 if (!N->isMachineOpcode())
1823 return Sched::RegPressure;
1824
1825 // Load are scheduled for latency even if there instruction itinerary
1826 // is not available.
1827 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1828 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1829
1830 if (MCID.getNumDefs() == 0)
1831 return Sched::RegPressure;
1832 if (!Itins->isEmpty() &&
1833 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
1834 return Sched::ILP;
1835
1836 return Sched::RegPressure;
1837}
1838
1839//===----------------------------------------------------------------------===//
1840// Lowering Code
1841//===----------------------------------------------------------------------===//
1842
1843static bool isSRL16(const SDValue &Op) {
1844 if (Op.getOpcode() != ISD::SRL)
1845 return false;
1846 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1847 return Const->getZExtValue() == 16;
1848 return false;
1849}
1850
1851static bool isSRA16(const SDValue &Op) {
1852 if (Op.getOpcode() != ISD::SRA)
1853 return false;
1854 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1855 return Const->getZExtValue() == 16;
1856 return false;
1857}
1858
1859static bool isSHL16(const SDValue &Op) {
1860 if (Op.getOpcode() != ISD::SHL)
1861 return false;
1862 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1863 return Const->getZExtValue() == 16;
1864 return false;
1865}
1866
1867// Check for a signed 16-bit value. We special case SRA because it makes it
1868// more simple when also looking for SRAs that aren't sign extending a
1869// smaller value. Without the check, we'd need to take extra care with
1870// checking order for some operations.
1871static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1872 if (isSRA16(Op))
1873 return isSHL16(Op.getOperand(0));
1874 return DAG.ComputeNumSignBits(Op) == 17;
1875}
1876
1877/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1879 switch (CC) {
1880 default: llvm_unreachable("Unknown condition code!");
1881 case ISD::SETNE: return ARMCC::NE;
1882 case ISD::SETEQ: return ARMCC::EQ;
1883 case ISD::SETGT: return ARMCC::GT;
1884 case ISD::SETGE: return ARMCC::GE;
1885 case ISD::SETLT: return ARMCC::LT;
1886 case ISD::SETLE: return ARMCC::LE;
1887 case ISD::SETUGT: return ARMCC::HI;
1888 case ISD::SETUGE: return ARMCC::HS;
1889 case ISD::SETULT: return ARMCC::LO;
1890 case ISD::SETULE: return ARMCC::LS;
1891 }
1892}
1893
1894/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1896 ARMCC::CondCodes &CondCode2) {
1897 CondCode2 = ARMCC::AL;
1898 switch (CC) {
1899 default: llvm_unreachable("Unknown FP condition!");
1900 case ISD::SETEQ:
1901 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1902 case ISD::SETGT:
1903 case ISD::SETOGT: CondCode = ARMCC::GT; break;
1904 case ISD::SETGE:
1905 case ISD::SETOGE: CondCode = ARMCC::GE; break;
1906 case ISD::SETOLT: CondCode = ARMCC::MI; break;
1907 case ISD::SETOLE: CondCode = ARMCC::LS; break;
1908 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1909 case ISD::SETO: CondCode = ARMCC::VC; break;
1910 case ISD::SETUO: CondCode = ARMCC::VS; break;
1911 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1912 case ISD::SETUGT: CondCode = ARMCC::HI; break;
1913 case ISD::SETUGE: CondCode = ARMCC::PL; break;
1914 case ISD::SETLT:
1915 case ISD::SETULT: CondCode = ARMCC::LT; break;
1916 case ISD::SETLE:
1917 case ISD::SETULE: CondCode = ARMCC::LE; break;
1918 case ISD::SETNE:
1919 case ISD::SETUNE: CondCode = ARMCC::NE; break;
1920 }
1921}
1922
1923//===----------------------------------------------------------------------===//
1924// Calling Convention Implementation
1925//===----------------------------------------------------------------------===//
1926
1927/// getEffectiveCallingConv - Get the effective calling convention, taking into
1928/// account presence of floating point hardware and calling convention
1929/// limitations, such as support for variadic functions.
1931ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1932 bool isVarArg) const {
1933 switch (CC) {
1934 default:
1935 report_fatal_error("Unsupported calling convention");
1938 case CallingConv::GHC:
1940 return CC;
1946 case CallingConv::Swift:
1949 case CallingConv::C:
1950 case CallingConv::Tail:
1951 if (!getTM().isAAPCS_ABI())
1952 return CallingConv::ARM_APCS;
1953 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
1954 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1955 !isVarArg)
1957 else
1959 case CallingConv::Fast:
1961 if (!getTM().isAAPCS_ABI()) {
1962 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
1963 return CallingConv::Fast;
1964 return CallingConv::ARM_APCS;
1965 } else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
1966 !isVarArg)
1968 else
1970 }
1971}
1972
1974 bool isVarArg) const {
1975 return CCAssignFnForNode(CC, false, isVarArg);
1976}
1977
1979 bool isVarArg) const {
1980 return CCAssignFnForNode(CC, true, isVarArg);
1981}
1982
1983/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1984/// CallingConvention.
1985CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1986 bool Return,
1987 bool isVarArg) const {
1988 switch (getEffectiveCallingConv(CC, isVarArg)) {
1989 default:
1990 report_fatal_error("Unsupported calling convention");
1992 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1994 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1996 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1997 case CallingConv::Fast:
1998 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1999 case CallingConv::GHC:
2000 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2002 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2004 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2006 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2007 }
2008}
2009
2010SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2011 MVT LocVT, MVT ValVT, SDValue Val) const {
2012 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2013 Val);
2014 if (Subtarget->hasFullFP16()) {
2015 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2016 } else {
2017 Val = DAG.getNode(ISD::TRUNCATE, dl,
2018 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2019 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2020 }
2021 return Val;
2022}
2023
2024SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2025 MVT LocVT, MVT ValVT,
2026 SDValue Val) const {
2027 if (Subtarget->hasFullFP16()) {
2028 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2029 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2030 } else {
2031 Val = DAG.getNode(ISD::BITCAST, dl,
2032 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2033 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2034 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2035 }
2036 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2037}
2038
2039/// LowerCallResult - Lower the result values of a call into the
2040/// appropriate copies out of appropriate physical registers.
2041SDValue ARMTargetLowering::LowerCallResult(
2042 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2043 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2044 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2045 SDValue ThisVal, bool isCmseNSCall) const {
2046 // Assign locations to each value returned by this call.
2048 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2049 *DAG.getContext());
2050 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2051
2052 // Copy all of the result registers out of their specified physreg.
2053 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2054 CCValAssign VA = RVLocs[i];
2055
2056 // Pass 'this' value directly from the argument to return value, to avoid
2057 // reg unit interference
2058 if (i == 0 && isThisReturn) {
2059 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2060 "unexpected return calling convention register assignment");
2061 InVals.push_back(ThisVal);
2062 continue;
2063 }
2064
2065 SDValue Val;
2066 if (VA.needsCustom() &&
2067 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2068 // Handle f64 or half of a v2f64.
2069 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2070 InGlue);
2071 Chain = Lo.getValue(1);
2072 InGlue = Lo.getValue(2);
2073 VA = RVLocs[++i]; // skip ahead to next loc
2074 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2075 InGlue);
2076 Chain = Hi.getValue(1);
2077 InGlue = Hi.getValue(2);
2078 if (!Subtarget->isLittle())
2079 std::swap (Lo, Hi);
2080 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2081
2082 if (VA.getLocVT() == MVT::v2f64) {
2083 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2084 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2085 DAG.getConstant(0, dl, MVT::i32));
2086
2087 VA = RVLocs[++i]; // skip ahead to next loc
2088 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2089 Chain = Lo.getValue(1);
2090 InGlue = Lo.getValue(2);
2091 VA = RVLocs[++i]; // skip ahead to next loc
2092 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2093 Chain = Hi.getValue(1);
2094 InGlue = Hi.getValue(2);
2095 if (!Subtarget->isLittle())
2096 std::swap (Lo, Hi);
2097 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2098 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2099 DAG.getConstant(1, dl, MVT::i32));
2100 }
2101 } else {
2102 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2103 InGlue);
2104 Chain = Val.getValue(1);
2105 InGlue = Val.getValue(2);
2106 }
2107
2108 switch (VA.getLocInfo()) {
2109 default: llvm_unreachable("Unknown loc info!");
2110 case CCValAssign::Full: break;
2111 case CCValAssign::BCvt:
2112 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2113 break;
2114 }
2115
2116 // f16 arguments have their size extended to 4 bytes and passed as if they
2117 // had been copied to the LSBs of a 32-bit register.
2118 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2119 if (VA.needsCustom() &&
2120 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2121 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2122
2123 // On CMSE Non-secure Calls, call results (returned values) whose bitwidth
2124 // is less than 32 bits must be sign- or zero-extended after the call for
2125 // security reasons. Although the ABI mandates an extension done by the
2126 // callee, the latter cannot be trusted to follow the rules of the ABI.
2127 const ISD::InputArg &Arg = Ins[VA.getValNo()];
2128 if (isCmseNSCall && Arg.ArgVT.isScalarInteger() &&
2129 VA.getLocVT().isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
2130 Val = handleCMSEValue(Val, Arg, DAG, dl);
2131
2132 InVals.push_back(Val);
2133 }
2134
2135 return Chain;
2136}
2137
2138std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2139 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2140 bool IsTailCall, int SPDiff) const {
2141 SDValue DstAddr;
2142 MachinePointerInfo DstInfo;
2143 int32_t Offset = VA.getLocMemOffset();
2144 MachineFunction &MF = DAG.getMachineFunction();
2145
2146 if (IsTailCall) {
2147 Offset += SPDiff;
2148 auto PtrVT = getPointerTy(DAG.getDataLayout());
2149 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2150 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2151 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2152 DstInfo =
2154 } else {
2155 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2156 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2157 StackPtr, PtrOff);
2158 DstInfo =
2160 }
2161
2162 return std::make_pair(DstAddr, DstInfo);
2163}
2164
2165// Returns the type of copying which is required to set up a byval argument to
2166// a tail-called function. This isn't needed for non-tail calls, because they
2167// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
2168// avoid clobbering another argument (CopyViaTemp), and sometimes can be
2169// optimised to zero copies when forwarding an argument from the caller's
2170// caller (NoCopy).
2171ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(
2172 SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
2173 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
2174 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2175
2176 // Globals are always safe to copy from.
2178 return CopyOnce;
2179
2180 // Can only analyse frame index nodes, conservatively assume we need a
2181 // temporary.
2182 auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
2183 auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
2184 if (!SrcFrameIdxNode || !DstFrameIdxNode)
2185 return CopyViaTemp;
2186
2187 int SrcFI = SrcFrameIdxNode->getIndex();
2188 int DstFI = DstFrameIdxNode->getIndex();
2189 assert(MFI.isFixedObjectIndex(DstFI) &&
2190 "byval passed in non-fixed stack slot");
2191
2192 int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
2193 int64_t DstOffset = MFI.getObjectOffset(DstFI);
2194
2195 // If the source is in the local frame, then the copy to the argument memory
2196 // is always valid.
2197 bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2198 if (!FixedSrc ||
2199 (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))
2200 return CopyOnce;
2201
2202 // In the case of byval arguments split between registers and the stack,
2203 // computeAddrForCallArg returns a FrameIndex which corresponds only to the
2204 // stack portion, but the Src SDValue will refer to the full value, including
2205 // the local stack memory that the register portion gets stored into. We only
2206 // need to compare them for equality, so normalise on the full value version.
2207 uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);
2208 DstOffset -= RegSize;
2209
2210 // If the value is already in the correct location, then no copying is
2211 // needed. If not, then we need to copy via a temporary.
2212 if (SrcOffset == DstOffset)
2213 return NoCopy;
2214 else
2215 return CopyViaTemp;
2216}
2217
2218void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2219 SDValue Chain, SDValue &Arg,
2220 RegsToPassVector &RegsToPass,
2221 CCValAssign &VA, CCValAssign &NextVA,
2222 SDValue &StackPtr,
2223 SmallVectorImpl<SDValue> &MemOpChains,
2224 bool IsTailCall,
2225 int SPDiff) const {
2226 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2227 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2228 unsigned id = Subtarget->isLittle() ? 0 : 1;
2229 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2230
2231 if (NextVA.isRegLoc())
2232 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2233 else {
2234 assert(NextVA.isMemLoc());
2235 if (!StackPtr.getNode())
2236 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2238
2239 SDValue DstAddr;
2240 MachinePointerInfo DstInfo;
2241 std::tie(DstAddr, DstInfo) =
2242 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2243 MemOpChains.push_back(
2244 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2245 }
2246}
2247
2248static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2249 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2251}
2252
2253/// LowerCall - Lowering a call into a callseq_start <-
2254/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2255/// nodes.
2256SDValue
2257ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2258 SmallVectorImpl<SDValue> &InVals) const {
2259 SelectionDAG &DAG = CLI.DAG;
2260 SDLoc &dl = CLI.DL;
2261 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2262 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2263 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2264 SDValue Chain = CLI.Chain;
2265 SDValue Callee = CLI.Callee;
2266 bool &isTailCall = CLI.IsTailCall;
2267 CallingConv::ID CallConv = CLI.CallConv;
2268 bool doesNotRet = CLI.DoesNotReturn;
2269 bool isVarArg = CLI.IsVarArg;
2270 const CallBase *CB = CLI.CB;
2271
2272 MachineFunction &MF = DAG.getMachineFunction();
2273 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2274 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
2275 MachineFunction::CallSiteInfo CSInfo;
2276 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2277 bool isThisReturn = false;
2278 bool isCmseNSCall = false;
2279 bool isSibCall = false;
2280 bool PreferIndirect = false;
2281 bool GuardWithBTI = false;
2282
2283 // Analyze operands of the call, assigning locations to each operand.
2285 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2286 *DAG.getContext());
2287 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2288
2289 // Lower 'returns_twice' calls to a pseudo-instruction.
2290 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2291 !Subtarget->noBTIAtReturnTwice())
2292 GuardWithBTI = AFI->branchTargetEnforcement();
2293
2294 // Set type id for call site info.
2295 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
2296 CSInfo = MachineFunction::CallSiteInfo(*CB);
2297
2298 // Determine whether this is a non-secure function call.
2299 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2300 isCmseNSCall = true;
2301
2302 // Disable tail calls if they're not supported.
2303 if (!Subtarget->supportsTailCall())
2304 isTailCall = false;
2305
2306 // For both the non-secure calls and the returns from a CMSE entry function,
2307 // the function needs to do some extra work after the call, or before the
2308 // return, respectively, thus it cannot end with a tail call
2309 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2310 isTailCall = false;
2311
2312 if (isa<GlobalAddressSDNode>(Callee)) {
2313 // If we're optimizing for minimum size and the function is called three or
2314 // more times in this block, we can improve codesize by calling indirectly
2315 // as BLXr has a 16-bit encoding.
2316 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2317 if (CLI.CB) {
2318 auto *BB = CLI.CB->getParent();
2319 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2320 count_if(GV->users(), [&BB](const User *U) {
2321 return isa<Instruction>(U) &&
2322 cast<Instruction>(U)->getParent() == BB;
2323 }) > 2;
2324 }
2325 }
2326 if (isTailCall) {
2327 // Check if it's really possible to do a tail call.
2328 isTailCall =
2329 IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);
2330
2331 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2332 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2333 isSibCall = true;
2334
2335 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2336 // detected sibcalls.
2337 if (isTailCall)
2338 ++NumTailCalls;
2339 }
2340
2341 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2342 report_fatal_error("failed to perform tail call elimination on a call "
2343 "site marked musttail");
2344
2345 // Get a count of how many bytes are to be pushed on the stack.
2346 unsigned NumBytes = CCInfo.getStackSize();
2347
2348 // SPDiff is the byte offset of the call's argument area from the callee's.
2349 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2350 // by this amount for a tail call. In a sibling call it must be 0 because the
2351 // caller will deallocate the entire stack and the callee still expects its
2352 // arguments to begin at SP+0. Completely unused for non-tail calls.
2353 int SPDiff = 0;
2354
2355 if (isTailCall && !isSibCall) {
2356 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2357 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2358
2359 // Since callee will pop argument stack as a tail call, we must keep the
2360 // popped size 16-byte aligned.
2361 MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
2362 assert(StackAlign && "data layout string is missing stack alignment");
2363 NumBytes = alignTo(NumBytes, *StackAlign);
2364
2365 // SPDiff will be negative if this tail call requires more space than we
2366 // would automatically have in our incoming argument space. Positive if we
2367 // can actually shrink the stack.
2368 SPDiff = NumReusableBytes - NumBytes;
2369
2370 // If this call requires more stack than we have available from
2371 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2372 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2373 AFI->setArgRegsSaveSize(-SPDiff);
2374 }
2375
2376 if (isSibCall) {
2377 // For sibling tail calls, memory operands are available in our caller's stack.
2378 NumBytes = 0;
2379 } else {
2380 // Adjust the stack pointer for the new arguments...
2381 // These operations are automatically eliminated by the prolog/epilog pass
2382 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2383 }
2384
2386 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2387
2388 RegsToPassVector RegsToPass;
2389 SmallVector<SDValue, 8> MemOpChains;
2390
2391 // If we are doing a tail-call, any byval arguments will be written to stack
2392 // space which was used for incoming arguments. If any the values being used
2393 // are incoming byval arguments to this function, then they might be
2394 // overwritten by the stores of the outgoing arguments. To avoid this, we
2395 // need to make a temporary copy of them in local stack space, then copy back
2396 // to the argument area.
2397 DenseMap<unsigned, SDValue> ByValTemporaries;
2398 SDValue ByValTempChain;
2399 if (isTailCall) {
2400 SmallVector<SDValue, 8> ByValCopyChains;
2401 for (const CCValAssign &VA : ArgLocs) {
2402 unsigned ArgIdx = VA.getValNo();
2403 SDValue Src = OutVals[ArgIdx];
2404 ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
2405
2406 if (!Flags.isByVal())
2407 continue;
2408
2409 SDValue Dst;
2410 MachinePointerInfo DstInfo;
2411 std::tie(Dst, DstInfo) =
2412 computeAddrForCallArg(dl, DAG, VA, SDValue(), true, SPDiff);
2413 ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2414
2415 if (Copy == NoCopy) {
2416 // If the argument is already at the correct offset on the stack
2417 // (because we are forwarding a byval argument from our caller), we
2418 // don't need any copying.
2419 continue;
2420 } else if (Copy == CopyOnce) {
2421 // If the argument is in our local stack frame, no other argument
2422 // preparation can clobber it, so we can copy it to the final location
2423 // later.
2424 ByValTemporaries[ArgIdx] = Src;
2425 } else {
2426 assert(Copy == CopyViaTemp && "unexpected enum value");
2427 // If we might be copying this argument from the outgoing argument
2428 // stack area, we need to copy via a temporary in the local stack
2429 // frame.
2430 int TempFrameIdx = MFI.CreateStackObject(
2431 Flags.getByValSize(), Flags.getNonZeroByValAlign(), false);
2432 SDValue Temp =
2433 DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
2434
2435 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2436 SDValue AlignNode =
2437 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2438
2439 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2440 SDValue Ops[] = {Chain, Temp, Src, SizeNode, AlignNode};
2441 ByValCopyChains.push_back(
2442 DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops));
2443 ByValTemporaries[ArgIdx] = Temp;
2444 }
2445 }
2446 if (!ByValCopyChains.empty())
2447 ByValTempChain =
2448 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2449 }
2450
2451 // During a tail call, stores to the argument area must happen after all of
2452 // the function's incoming arguments have been loaded because they may alias.
2453 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2454 // there's no point in doing so repeatedly so this tracks whether that's
2455 // happened yet.
2456 bool AfterFormalArgLoads = false;
2457
2458 // Walk the register/memloc assignments, inserting copies/loads. In the case
2459 // of tail call optimization, arguments are handled later.
2460 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2461 i != e;
2462 ++i, ++realArgIdx) {
2463 CCValAssign &VA = ArgLocs[i];
2464 SDValue Arg = OutVals[realArgIdx];
2465 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2466 bool isByVal = Flags.isByVal();
2467
2468 // Promote the value if needed.
2469 switch (VA.getLocInfo()) {
2470 default: llvm_unreachable("Unknown loc info!");
2471 case CCValAssign::Full: break;
2472 case CCValAssign::SExt:
2473 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2474 break;
2475 case CCValAssign::ZExt:
2476 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2477 break;
2478 case CCValAssign::AExt:
2479 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2480 break;
2481 case CCValAssign::BCvt:
2482 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2483 break;
2484 }
2485
2486 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2487 Chain = DAG.getStackArgumentTokenFactor(Chain);
2488 if (ByValTempChain)
2489 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain,
2490 ByValTempChain);
2491 AfterFormalArgLoads = true;
2492 }
2493
2494 // f16 arguments have their size extended to 4 bytes and passed as if they
2495 // had been copied to the LSBs of a 32-bit register.
2496 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2497 if (VA.needsCustom() &&
2498 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2499 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2500 } else {
2501 // f16 arguments could have been extended prior to argument lowering.
2502 // Mask them arguments if this is a CMSE nonsecure call.
2503 auto ArgVT = Outs[realArgIdx].ArgVT;
2504 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2505 auto LocBits = VA.getLocVT().getSizeInBits();
2506 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2507 SDValue Mask =
2508 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2509 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2510 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2511 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2512 }
2513 }
2514
2515 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2516 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2517 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2518 DAG.getConstant(0, dl, MVT::i32));
2519 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2520 DAG.getConstant(1, dl, MVT::i32));
2521
2522 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2523 StackPtr, MemOpChains, isTailCall, SPDiff);
2524
2525 VA = ArgLocs[++i]; // skip ahead to next loc
2526 if (VA.isRegLoc()) {
2527 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2528 StackPtr, MemOpChains, isTailCall, SPDiff);
2529 } else {
2530 assert(VA.isMemLoc());
2531 SDValue DstAddr;
2532 MachinePointerInfo DstInfo;
2533 std::tie(DstAddr, DstInfo) =
2534 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2535 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2536 }
2537 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2538 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2539 StackPtr, MemOpChains, isTailCall, SPDiff);
2540 } else if (VA.isRegLoc()) {
2541 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2542 Outs[0].VT == MVT::i32) {
2543 assert(VA.getLocVT() == MVT::i32 &&
2544 "unexpected calling convention register assignment");
2545 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2546 "unexpected use of 'returned'");
2547 isThisReturn = true;
2548 }
2549 const TargetOptions &Options = DAG.getTarget().Options;
2550 if (Options.EmitCallSiteInfo)
2551 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2552 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2553 } else if (isByVal) {
2554 assert(VA.isMemLoc());
2555 unsigned offset = 0;
2556
2557 // True if this byval aggregate will be split between registers
2558 // and memory.
2559 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2560 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2561
2562 SDValue ByValSrc;
2563 bool NeedsStackCopy;
2564 if (auto It = ByValTemporaries.find(realArgIdx);
2565 It != ByValTemporaries.end()) {
2566 ByValSrc = It->second;
2567 NeedsStackCopy = true;
2568 } else {
2569 ByValSrc = Arg;
2570 NeedsStackCopy = !isTailCall;
2571 }
2572
2573 // If part of the argument is in registers, load them.
2574 if (CurByValIdx < ByValArgsCount) {
2575 unsigned RegBegin, RegEnd;
2576 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2577
2578 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2579 unsigned int i, j;
2580 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2581 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2582 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, Const);
2583 SDValue Load =
2584 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2585 DAG.InferPtrAlign(AddArg));
2586 MemOpChains.push_back(Load.getValue(1));
2587 RegsToPass.push_back(std::make_pair(j, Load));
2588 }
2589
2590 // If parameter size outsides register area, "offset" value
2591 // helps us to calculate stack slot for remained part properly.
2592 offset = RegEnd - RegBegin;
2593
2594 CCInfo.nextInRegsParam();
2595 }
2596
2597 // If the memory part of the argument isn't already in the correct place
2598 // (which can happen with tail calls), copy it into the argument area.
2599 if (NeedsStackCopy && Flags.getByValSize() > 4 * offset) {
2600 auto PtrVT = getPointerTy(DAG.getDataLayout());
2601 SDValue Dst;
2602 MachinePointerInfo DstInfo;
2603 std::tie(Dst, DstInfo) =
2604 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2605 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2606 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, SrcOffset);
2607 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2608 MVT::i32);
2609 SDValue AlignNode =
2610 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2611
2612 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2613 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2614 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2615 Ops));
2616 }
2617 } else {
2618 assert(VA.isMemLoc());
2619 SDValue DstAddr;
2620 MachinePointerInfo DstInfo;
2621 std::tie(DstAddr, DstInfo) =
2622 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2623
2624 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2625 MemOpChains.push_back(Store);
2626 }
2627 }
2628
2629 if (!MemOpChains.empty())
2630 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2631
2632 // Build a sequence of copy-to-reg nodes chained together with token chain
2633 // and flag operands which copy the outgoing args into the appropriate regs.
2634 SDValue InGlue;
2635 for (const auto &[Reg, N] : RegsToPass) {
2636 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
2637 InGlue = Chain.getValue(1);
2638 }
2639
2640 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2641 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2642 // node so that legalize doesn't hack it.
2643 bool isDirect = false;
2644
2645 const TargetMachine &TM = getTargetMachine();
2646 const GlobalValue *GVal = nullptr;
2647 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2648 GVal = G->getGlobal();
2649 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2650
2651 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2652 bool isLocalARMFunc = false;
2653 auto PtrVt = getPointerTy(DAG.getDataLayout());
2654
2655 if (Subtarget->genLongCalls()) {
2656 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2657 "long-calls codegen is not position independent!");
2658 // Handle a global address or an external symbol. If it's not one of
2659 // those, the target's already in a register, so we don't need to do
2660 // anything extra.
2661 if (isa<GlobalAddressSDNode>(Callee)) {
2662 if (Subtarget->genExecuteOnly()) {
2663 if (Subtarget->useMovt())
2664 ++NumMovwMovt;
2665 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2666 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2667 } else {
2668 // Create a constant pool entry for the callee address
2669 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2670 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
2671 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2672
2673 // Get the address of the callee into a register
2674 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2675 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2676 Callee = DAG.getLoad(
2677 PtrVt, dl, DAG.getEntryNode(), Addr,
2679 }
2680 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2681 const char *Sym = S->getSymbol();
2682
2683 if (Subtarget->genExecuteOnly()) {
2684 if (Subtarget->useMovt())
2685 ++NumMovwMovt;
2686 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2687 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2688 } else {
2689 // Create a constant pool entry for the callee address
2690 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2691 ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
2692 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2693
2694 // Get the address of the callee into a register
2695 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2696 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2697 Callee = DAG.getLoad(
2698 PtrVt, dl, DAG.getEntryNode(), Addr,
2700 }
2701 }
2702 } else if (isa<GlobalAddressSDNode>(Callee)) {
2703 if (!PreferIndirect) {
2704 isDirect = true;
2705 bool isDef = GVal->isStrongDefinitionForLinker();
2706
2707 // ARM call to a local ARM function is predicable.
2708 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2709 // tBX takes a register source operand.
2710 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2711 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2712 Callee = DAG.getNode(
2713 ARMISD::WrapperPIC, dl, PtrVt,
2714 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2715 Callee = DAG.getLoad(
2716 PtrVt, dl, DAG.getEntryNode(), Callee,
2720 } else if (Subtarget->isTargetCOFF()) {
2721 assert(Subtarget->isTargetWindows() &&
2722 "Windows is the only supported COFF target");
2723 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2724 if (GVal->hasDLLImportStorageClass())
2725 TargetFlags = ARMII::MO_DLLIMPORT;
2726 else if (!TM.shouldAssumeDSOLocal(GVal))
2727 TargetFlags = ARMII::MO_COFFSTUB;
2728 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2729 TargetFlags);
2730 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2731 Callee =
2732 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2733 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2735 } else {
2736 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2737 }
2738 }
2739 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2740 isDirect = true;
2741 // tBX takes a register source operand.
2742 const char *Sym = S->getSymbol();
2743 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2744 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2745 ARMConstantPoolValue *CPV =
2747 ARMPCLabelIndex, 4);
2748 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2749 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2750 Callee = DAG.getLoad(
2751 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2753 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2754 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2755 } else {
2756 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2757 }
2758 }
2759
2760 if (isCmseNSCall) {
2761 assert(!isARMFunc && !isDirect &&
2762 "Cannot handle call to ARM function or direct call");
2763 if (NumBytes > 0) {
2764 DAG.getContext()->diagnose(
2765 DiagnosticInfoUnsupported(DAG.getMachineFunction().getFunction(),
2766 "call to non-secure function would require "
2767 "passing arguments on stack",
2768 dl.getDebugLoc()));
2769 }
2770 if (isStructRet) {
2771 DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
2773 "call to non-secure function would return value through pointer",
2774 dl.getDebugLoc()));
2775 }
2776 }
2777
2778 // FIXME: handle tail calls differently.
2779 unsigned CallOpc;
2780 if (Subtarget->isThumb()) {
2781 if (GuardWithBTI)
2782 CallOpc = ARMISD::t2CALL_BTI;
2783 else if (isCmseNSCall)
2784 CallOpc = ARMISD::tSECALL;
2785 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2786 CallOpc = ARMISD::CALL_NOLINK;
2787 else
2788 CallOpc = ARMISD::CALL;
2789 } else {
2790 if (!isDirect && !Subtarget->hasV5TOps())
2791 CallOpc = ARMISD::CALL_NOLINK;
2792 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2793 // Emit regular call when code size is the priority
2794 !Subtarget->hasMinSize())
2795 // "mov lr, pc; b _foo" to avoid confusing the RSP
2796 CallOpc = ARMISD::CALL_NOLINK;
2797 else
2798 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2799 }
2800
2801 // We don't usually want to end the call-sequence here because we would tidy
2802 // the frame up *after* the call, however in the ABI-changing tail-call case
2803 // we've carefully laid out the parameters so that when sp is reset they'll be
2804 // in the correct location.
2805 if (isTailCall && !isSibCall) {
2806 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2807 InGlue = Chain.getValue(1);
2808 }
2809
2810 std::vector<SDValue> Ops;
2811 Ops.push_back(Chain);
2812 Ops.push_back(Callee);
2813
2814 if (isTailCall) {
2815 Ops.push_back(DAG.getSignedTargetConstant(SPDiff, dl, MVT::i32));
2816 }
2817
2818 // Add argument registers to the end of the list so that they are known live
2819 // into the call.
2820 for (const auto &[Reg, N] : RegsToPass)
2821 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2822
2823 // Add a register mask operand representing the call-preserved registers.
2824 const uint32_t *Mask;
2825 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2826 if (isThisReturn) {
2827 // For 'this' returns, use the R0-preserving mask if applicable
2828 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2829 if (!Mask) {
2830 // Set isThisReturn to false if the calling convention is not one that
2831 // allows 'returned' to be modeled in this way, so LowerCallResult does
2832 // not try to pass 'this' straight through
2833 isThisReturn = false;
2834 Mask = ARI->getCallPreservedMask(MF, CallConv);
2835 }
2836 } else
2837 Mask = ARI->getCallPreservedMask(MF, CallConv);
2838
2839 assert(Mask && "Missing call preserved mask for calling convention");
2840 Ops.push_back(DAG.getRegisterMask(Mask));
2841
2842 if (InGlue.getNode())
2843 Ops.push_back(InGlue);
2844
2845 if (isTailCall) {
2847 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, MVT::Other, Ops);
2848 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2849 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2850 return Ret;
2851 }
2852
2853 // Returns a chain and a flag for retval copy to use.
2854 Chain = DAG.getNode(CallOpc, dl, {MVT::Other, MVT::Glue}, Ops);
2855 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2856 InGlue = Chain.getValue(1);
2857 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2858
2859 // If we're guaranteeing tail-calls will be honoured, the callee must
2860 // pop its own argument stack on return. But this call is *not* a tail call so
2861 // we need to undo that after it returns to restore the status-quo.
2862 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2863 uint64_t CalleePopBytes =
2864 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1U;
2865
2866 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
2867 if (!Ins.empty())
2868 InGlue = Chain.getValue(1);
2869
2870 // Handle result values, copying them out of physregs into vregs that we
2871 // return.
2872 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2873 InVals, isThisReturn,
2874 isThisReturn ? OutVals[0] : SDValue(), isCmseNSCall);
2875}
2876
2877/// HandleByVal - Every parameter *after* a byval parameter is passed
2878/// on the stack. Remember the next parameter register to allocate,
2879/// and then confiscate the rest of the parameter registers to insure
2880/// this.
2881void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2882 Align Alignment) const {
2883 // Byval (as with any stack) slots are always at least 4 byte aligned.
2884 Alignment = std::max(Alignment, Align(4));
2885
2886 MCRegister Reg = State->AllocateReg(GPRArgRegs);
2887 if (!Reg)
2888 return;
2889
2890 unsigned AlignInRegs = Alignment.value() / 4;
2891 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2892 for (unsigned i = 0; i < Waste; ++i)
2893 Reg = State->AllocateReg(GPRArgRegs);
2894
2895 if (!Reg)
2896 return;
2897
2898 unsigned Excess = 4 * (ARM::R4 - Reg);
2899
2900 // Special case when NSAA != SP and parameter size greater than size of
2901 // all remained GPR regs. In that case we can't split parameter, we must
2902 // send it to stack. We also must set NCRN to R4, so waste all
2903 // remained registers.
2904 const unsigned NSAAOffset = State->getStackSize();
2905 if (NSAAOffset != 0 && Size > Excess) {
2906 while (State->AllocateReg(GPRArgRegs))
2907 ;
2908 return;
2909 }
2910
2911 // First register for byval parameter is the first register that wasn't
2912 // allocated before this method call, so it would be "reg".
2913 // If parameter is small enough to be saved in range [reg, r4), then
2914 // the end (first after last) register would be reg + param-size-in-regs,
2915 // else parameter would be splitted between registers and stack,
2916 // end register would be r4 in this case.
2917 unsigned ByValRegBegin = Reg;
2918 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2919 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2920 // Note, first register is allocated in the beginning of function already,
2921 // allocate remained amount of registers we need.
2922 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2923 State->AllocateReg(GPRArgRegs);
2924 // A byval parameter that is split between registers and memory needs its
2925 // size truncated here.
2926 // In the case where the entire structure fits in registers, we set the
2927 // size in memory to zero.
2928 Size = std::max<int>(Size - Excess, 0);
2929}
2930
2931/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2932/// for tail call optimization. Targets which want to do tail call
2933/// optimization should implement this function. Note that this function also
2934/// processes musttail calls, so when this function returns false on a valid
2935/// musttail call, a fatal backend error occurs.
2936bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2938 SmallVectorImpl<CCValAssign> &ArgLocs, const bool isIndirect) const {
2939 CallingConv::ID CalleeCC = CLI.CallConv;
2940 SDValue Callee = CLI.Callee;
2941 bool isVarArg = CLI.IsVarArg;
2942 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2943 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2944 const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2945 const SelectionDAG &DAG = CLI.DAG;
2946 MachineFunction &MF = DAG.getMachineFunction();
2947 const Function &CallerF = MF.getFunction();
2948 CallingConv::ID CallerCC = CallerF.getCallingConv();
2949
2950 assert(Subtarget->supportsTailCall());
2951
2952 // Indirect tail-calls require a register to hold the target address. That
2953 // register must be:
2954 // * Allocatable (i.e. r0-r7 if the target is Thumb1).
2955 // * Not callee-saved, so must be one of r0-r3 or r12.
2956 // * Not used to hold an argument to the tail-called function, which might be
2957 // in r0-r3.
2958 // * Not used to hold the return address authentication code, which is in r12
2959 // if enabled.
2960 // Sometimes, no register matches all of these conditions, so we can't do a
2961 // tail-call.
2962 if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {
2963 SmallSet<MCPhysReg, 5> AddressRegisters = {ARM::R0, ARM::R1, ARM::R2,
2964 ARM::R3};
2965 if (!(Subtarget->isThumb1Only() ||
2966 MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)))
2967 AddressRegisters.insert(ARM::R12);
2968 for (const CCValAssign &AL : ArgLocs)
2969 if (AL.isRegLoc())
2970 AddressRegisters.erase(AL.getLocReg());
2971 if (AddressRegisters.empty()) {
2972 LLVM_DEBUG(dbgs() << "false (no reg to hold function pointer)\n");
2973 return false;
2974 }
2975 }
2976
2977 // Look for obvious safe cases to perform tail call optimization that do not
2978 // require ABI changes. This is what gcc calls sibcall.
2979
2980 // Exception-handling functions need a special set of instructions to indicate
2981 // a return to the hardware. Tail-calling another function would probably
2982 // break this.
2983 if (CallerF.hasFnAttribute("interrupt")) {
2984 LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
2985 return false;
2986 }
2987
2988 if (canGuaranteeTCO(CalleeCC,
2989 getTargetMachine().Options.GuaranteedTailCallOpt)) {
2990 LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
2991 << " (guaranteed tail-call CC)\n");
2992 return CalleeCC == CallerCC;
2993 }
2994
2995 // Also avoid sibcall optimization if either caller or callee uses struct
2996 // return semantics.
2997 bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
2998 bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
2999 if (isCalleeStructRet != isCallerStructRet) {
3000 LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
3001 return false;
3002 }
3003
3004 // Externally-defined functions with weak linkage should not be
3005 // tail-called on ARM when the OS does not support dynamic
3006 // pre-emption of symbols, as the AAELF spec requires normal calls
3007 // to undefined weak functions to be replaced with a NOP or jump to the
3008 // next instruction. The behaviour of branch instructions in this
3009 // situation (as used for tail calls) is implementation-defined, so we
3010 // cannot rely on the linker replacing the tail call with a return.
3011 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3012 const GlobalValue *GV = G->getGlobal();
3013 const Triple &TT = getTargetMachine().getTargetTriple();
3014 if (GV->hasExternalWeakLinkage() &&
3015 (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
3016 TT.isOSBinFormatMachO())) {
3017 LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
3018 return false;
3019 }
3020 }
3021
3022 // Check that the call results are passed in the same way.
3023 LLVMContext &C = *DAG.getContext();
3025 getEffectiveCallingConv(CalleeCC, isVarArg),
3026 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3027 CCAssignFnForReturn(CalleeCC, isVarArg),
3028 CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
3029 LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
3030 return false;
3031 }
3032 // The callee has to preserve all registers the caller needs to preserve.
3033 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3034 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3035 if (CalleeCC != CallerCC) {
3036 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3037 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
3038 LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
3039 return false;
3040 }
3041 }
3042
3043 // If Caller's vararg argument has been split between registers and stack, do
3044 // not perform tail call, since part of the argument is in caller's local
3045 // frame.
3046 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3047 if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
3048 LLVM_DEBUG(dbgs() << "false (arg reg save area)\n");
3049 return false;
3050 }
3051
3052 // If the callee takes no arguments then go on to check the results of the
3053 // call.
3054 const MachineRegisterInfo &MRI = MF.getRegInfo();
3055 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
3056 LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
3057 return false;
3058 }
3059
3060 // If the stack arguments for this call do not fit into our own save area then
3061 // the call cannot be made tail.
3062 if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
3063 return false;
3064
3065 LLVM_DEBUG(dbgs() << "true\n");
3066 return true;
3067}
3068
3069bool
3070ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3071 MachineFunction &MF, bool isVarArg,
3073 LLVMContext &Context, const Type *RetTy) const {
3075 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3076 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3077}
3078
3080 const SDLoc &DL, SelectionDAG &DAG) {
3081 const MachineFunction &MF = DAG.getMachineFunction();
3082 const Function &F = MF.getFunction();
3083
3084 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3085
3086 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3087 // version of the "preferred return address". These offsets affect the return
3088 // instruction if this is a return from PL1 without hypervisor extensions.
3089 // IRQ/FIQ: +4 "subs pc, lr, #4"
3090 // SWI: 0 "subs pc, lr, #0"
3091 // ABORT: +4 "subs pc, lr, #4"
3092 // UNDEF: +4/+2 "subs pc, lr, #0"
3093 // UNDEF varies depending on where the exception came from ARM or Thumb
3094 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3095
3096 int64_t LROffset;
3097 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3098 IntKind == "ABORT")
3099 LROffset = 4;
3100 else if (IntKind == "SWI" || IntKind == "UNDEF")
3101 LROffset = 0;
3102 else
3103 report_fatal_error("Unsupported interrupt attribute. If present, value "
3104 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3105
3106 RetOps.insert(RetOps.begin() + 1,
3107 DAG.getConstant(LROffset, DL, MVT::i32, false));
3108
3109 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3110}
3111
3112SDValue
3113ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3114 bool isVarArg,
3116 const SmallVectorImpl<SDValue> &OutVals,
3117 const SDLoc &dl, SelectionDAG &DAG) const {
3118 // CCValAssign - represent the assignment of the return value to a location.
3120
3121 // CCState - Info about the registers and stack slots.
3122 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3123 *DAG.getContext());
3124
3125 // Analyze outgoing return values.
3126 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3127
3128 SDValue Glue;
3130 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3131 bool isLittleEndian = Subtarget->isLittle();
3132
3133 MachineFunction &MF = DAG.getMachineFunction();
3134 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3135 AFI->setReturnRegsCount(RVLocs.size());
3136
3137 // Report error if cmse entry function returns structure through first ptr arg.
3138 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3139 // Note: using an empty SDLoc(), as the first line of the function is a
3140 // better place to report than the last line.
3141 DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
3143 "secure entry function would return value through pointer",
3144 SDLoc().getDebugLoc()));
3145 }
3146
3147 // Copy the result values into the output registers.
3148 for (unsigned i = 0, realRVLocIdx = 0;
3149 i != RVLocs.size();
3150 ++i, ++realRVLocIdx) {
3151 CCValAssign &VA = RVLocs[i];
3152 assert(VA.isRegLoc() && "Can only return in registers!");
3153
3154 SDValue Arg = OutVals[realRVLocIdx];
3155 bool ReturnF16 = false;
3156
3157 if (Subtarget->hasFullFP16() && getTM().isTargetHardFloat()) {
3158 // Half-precision return values can be returned like this:
3159 //
3160 // t11 f16 = fadd ...
3161 // t12: i16 = bitcast t11
3162 // t13: i32 = zero_extend t12
3163 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3164 //
3165 // to avoid code generation for bitcasts, we simply set Arg to the node
3166 // that produces the f16 value, t11 in this case.
3167 //
3168 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3169 SDValue ZE = Arg.getOperand(0);
3170 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3171 SDValue BC = ZE.getOperand(0);
3172 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3173 Arg = BC.getOperand(0);
3174 ReturnF16 = true;
3175 }
3176 }
3177 }
3178 }
3179
3180 switch (VA.getLocInfo()) {
3181 default: llvm_unreachable("Unknown loc info!");
3182 case CCValAssign::Full: break;
3183 case CCValAssign::BCvt:
3184 if (!ReturnF16)
3185 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3186 break;
3187 }
3188
3189 // Mask f16 arguments if this is a CMSE nonsecure entry.
3190 auto RetVT = Outs[realRVLocIdx].ArgVT;
3191 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3192 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3193 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3194 } else {
3195 auto LocBits = VA.getLocVT().getSizeInBits();
3196 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3197 SDValue Mask =
3198 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3199 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3200 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3201 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3202 }
3203 }
3204
3205 if (VA.needsCustom() &&
3206 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3207 if (VA.getLocVT() == MVT::v2f64) {
3208 // Extract the first half and return it in two registers.
3209 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3210 DAG.getConstant(0, dl, MVT::i32));
3211 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3212 DAG.getVTList(MVT::i32, MVT::i32), Half);
3213
3214 Chain =
3215 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3216 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3217 Glue = Chain.getValue(1);
3218 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3219 VA = RVLocs[++i]; // skip ahead to next loc
3220 Chain =
3221 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3222 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3223 Glue = Chain.getValue(1);
3224 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3225 VA = RVLocs[++i]; // skip ahead to next loc
3226
3227 // Extract the 2nd half and fall through to handle it as an f64 value.
3228 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3229 DAG.getConstant(1, dl, MVT::i32));
3230 }
3231 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3232 // available.
3233 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3234 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3235 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3236 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3237 Glue = Chain.getValue(1);
3238 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3239 VA = RVLocs[++i]; // skip ahead to next loc
3240 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3241 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3242 } else
3243 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3244
3245 // Guarantee that all emitted copies are
3246 // stuck together, avoiding something bad.
3247 Glue = Chain.getValue(1);
3248 RetOps.push_back(DAG.getRegister(
3249 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3250 }
3251 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3252 const MCPhysReg *I =
3253 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3254 if (I) {
3255 for (; *I; ++I) {
3256 if (ARM::GPRRegClass.contains(*I))
3257 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3258 else if (ARM::DPRRegClass.contains(*I))
3260 else
3261 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3262 }
3263 }
3264
3265 // Update chain and glue.
3266 RetOps[0] = Chain;
3267 if (Glue.getNode())
3268 RetOps.push_back(Glue);
3269
3270 // CPUs which aren't M-class use a special sequence to return from
3271 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3272 // though we use "subs pc, lr, #N").
3273 //
3274 // M-class CPUs actually use a normal return sequence with a special
3275 // (hardware-provided) value in LR, so the normal code path works.
3276 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3277 !Subtarget->isMClass()) {
3278 if (Subtarget->isThumb1Only())
3279 report_fatal_error("interrupt attribute is not supported in Thumb1");
3280 return LowerInterruptReturn(RetOps, dl, DAG);
3281 }
3282
3285 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3286}
3287
3288bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3289 if (N->getNumValues() != 1)
3290 return false;
3291 if (!N->hasNUsesOfValue(1, 0))
3292 return false;
3293
3294 SDValue TCChain = Chain;
3295 SDNode *Copy = *N->user_begin();
3296 if (Copy->getOpcode() == ISD::CopyToReg) {
3297 // If the copy has a glue operand, we conservatively assume it isn't safe to
3298 // perform a tail call.
3299 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3300 return false;
3301 TCChain = Copy->getOperand(0);
3302 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3303 SDNode *VMov = Copy;
3304 // f64 returned in a pair of GPRs.
3305 SmallPtrSet<SDNode*, 2> Copies;
3306 for (SDNode *U : VMov->users()) {
3307 if (U->getOpcode() != ISD::CopyToReg)
3308 return false;
3309 Copies.insert(U);
3310 }
3311 if (Copies.size() > 2)
3312 return false;
3313
3314 for (SDNode *U : VMov->users()) {
3315 SDValue UseChain = U->getOperand(0);
3316 if (Copies.count(UseChain.getNode()))
3317 // Second CopyToReg
3318 Copy = U;
3319 else {
3320 // We are at the top of this chain.
3321 // If the copy has a glue operand, we conservatively assume it
3322 // isn't safe to perform a tail call.
3323 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3324 return false;
3325 // First CopyToReg
3326 TCChain = UseChain;
3327 }
3328 }
3329 } else if (Copy->getOpcode() == ISD::BITCAST) {
3330 // f32 returned in a single GPR.
3331 if (!Copy->hasOneUse())
3332 return false;
3333 Copy = *Copy->user_begin();
3334 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3335 return false;
3336 // If the copy has a glue operand, we conservatively assume it isn't safe to
3337 // perform a tail call.
3338 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3339 return false;
3340 TCChain = Copy->getOperand(0);
3341 } else {
3342 return false;
3343 }
3344
3345 bool HasRet = false;
3346 for (const SDNode *U : Copy->users()) {
3347 if (U->getOpcode() != ARMISD::RET_GLUE &&
3348 U->getOpcode() != ARMISD::INTRET_GLUE)
3349 return false;
3350 HasRet = true;
3351 }
3352
3353 if (!HasRet)
3354 return false;
3355
3356 Chain = TCChain;
3357 return true;
3358}
3359
3360bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3361 if (!Subtarget->supportsTailCall())
3362 return false;
3363
3364 if (!CI->isTailCall())
3365 return false;
3366
3367 return true;
3368}
3369
3370// Trying to write a 64 bit value so need to split into two 32 bit values first,
3371// and pass the lower and high parts through.
3373 SDLoc DL(Op);
3374 SDValue WriteValue = Op->getOperand(2);
3375
3376 // This function is only supposed to be called for i64 type argument.
3377 assert(WriteValue.getValueType() == MVT::i64
3378 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3379
3380 SDValue Lo, Hi;
3381 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3382 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3383 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3384}
3385
3386// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3387// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3388// one of the above mentioned nodes. It has to be wrapped because otherwise
3389// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3390// be used to form addressing mode. These wrapped nodes will be selected
3391// into MOVi.
3392SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3393 SelectionDAG &DAG) const {
3394 EVT PtrVT = Op.getValueType();
3395 // FIXME there is no actual debug info here
3396 SDLoc dl(Op);
3397 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3398 SDValue Res;
3399
3400 // When generating execute-only code Constant Pools must be promoted to the
3401 // global data section. It's a bit ugly that we can't share them across basic
3402 // blocks, but this way we guarantee that execute-only behaves correct with
3403 // position-independent addressing modes.
3404 if (Subtarget->genExecuteOnly()) {
3405 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3406 auto *T = CP->getType();
3407 auto C = const_cast<Constant*>(CP->getConstVal());
3408 auto M = DAG.getMachineFunction().getFunction().getParent();
3409 auto GV = new GlobalVariable(
3410 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3411 Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3412 Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3413 Twine(AFI->createPICLabelUId())
3414 );
3416 dl, PtrVT);
3417 return LowerGlobalAddress(GA, DAG);
3418 }
3419
3420 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3421 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3422 Align CPAlign = CP->getAlign();
3423 if (Subtarget->isThumb1Only())
3424 CPAlign = std::max(CPAlign, Align(4));
3425 if (CP->isMachineConstantPoolEntry())
3426 Res =
3427 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3428 else
3429 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3430 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3431}
3432
3434 // If we don't have a 32-bit pc-relative branch instruction then the jump
3435 // table consists of block addresses. Usually this is inline, but for
3436 // execute-only it must be placed out-of-line.
3437 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3440}
3441
3442SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3443 SelectionDAG &DAG) const {
3446 unsigned ARMPCLabelIndex = 0;
3447 SDLoc DL(Op);
3448 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3449 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3450 SDValue CPAddr;
3451 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3452 if (!IsPositionIndependent) {
3453 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3454 } else {
3455 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3456 ARMPCLabelIndex = AFI->createPICLabelUId();
3458 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3459 ARMCP::CPBlockAddress, PCAdj);
3460 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3461 }
3462 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3463 SDValue Result = DAG.getLoad(
3464 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3466 if (!IsPositionIndependent)
3467 return Result;
3468 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3469 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3470}
3471
3472/// Convert a TLS address reference into the correct sequence of loads
3473/// and calls to compute the variable's address for Darwin, and return an
3474/// SDValue containing the final node.
3475
3476/// Darwin only has one TLS scheme which must be capable of dealing with the
3477/// fully general situation, in the worst case. This means:
3478/// + "extern __thread" declaration.
3479/// + Defined in a possibly unknown dynamic library.
3480///
3481/// The general system is that each __thread variable has a [3 x i32] descriptor
3482/// which contains information used by the runtime to calculate the address. The
3483/// only part of this the compiler needs to know about is the first word, which
3484/// contains a function pointer that must be called with the address of the
3485/// entire descriptor in "r0".
3486///
3487/// Since this descriptor may be in a different unit, in general access must
3488/// proceed along the usual ARM rules. A common sequence to produce is:
3489///
3490/// movw rT1, :lower16:_var$non_lazy_ptr
3491/// movt rT1, :upper16:_var$non_lazy_ptr
3492/// ldr r0, [rT1]
3493/// ldr rT2, [r0]
3494/// blx rT2
3495/// [...address now in r0...]
3496SDValue
3497ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3498 SelectionDAG &DAG) const {
3499 assert(Subtarget->isTargetDarwin() &&
3500 "This function expects a Darwin target");
3501 SDLoc DL(Op);
3502
3503 // First step is to get the address of the actua global symbol. This is where
3504 // the TLS descriptor lives.
3505 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3506
3507 // The first entry in the descriptor is a function pointer that we must call
3508 // to obtain the address of the variable.
3509 SDValue Chain = DAG.getEntryNode();
3510 SDValue FuncTLVGet = DAG.getLoad(
3511 MVT::i32, DL, Chain, DescAddr,
3515 Chain = FuncTLVGet.getValue(1);
3516
3517 MachineFunction &F = DAG.getMachineFunction();
3518 MachineFrameInfo &MFI = F.getFrameInfo();
3519 MFI.setAdjustsStack(true);
3520
3521 // TLS calls preserve all registers except those that absolutely must be
3522 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3523 // silly).
3524 auto TRI =
3526 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3527 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3528
3529 // Finally, we can make the call. This is just a degenerate version of a
3530 // normal AArch64 call node: r0 takes the address of the descriptor, and
3531 // returns the address of the variable in this thread.
3532 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3533 Chain =
3534 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3535 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3536 DAG.getRegisterMask(Mask), Chain.getValue(1));
3537 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3538}
3539
3540SDValue
3541ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3542 SelectionDAG &DAG) const {
3543 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3544
3545 SDValue Chain = DAG.getEntryNode();
3546 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3547 SDLoc DL(Op);
3548
3549 // Load the current TEB (thread environment block)
3550 SDValue Ops[] = {Chain,
3551 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3552 DAG.getTargetConstant(15, DL, MVT::i32),
3553 DAG.getTargetConstant(0, DL, MVT::i32),
3554 DAG.getTargetConstant(13, DL, MVT::i32),
3555 DAG.getTargetConstant(0, DL, MVT::i32),
3556 DAG.getTargetConstant(2, DL, MVT::i32)};
3557 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3558 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3559
3560 SDValue TEB = CurrentTEB.getValue(0);
3561 Chain = CurrentTEB.getValue(1);
3562
3563 // Load the ThreadLocalStoragePointer from the TEB
3564 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3565 SDValue TLSArray =
3566 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3567 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3568
3569 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3570 // offset into the TLSArray.
3571
3572 // Load the TLS index from the C runtime
3573 SDValue TLSIndex =
3574 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3575 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3576 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3577
3578 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3579 DAG.getConstant(2, DL, MVT::i32));
3580 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3581 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3582 MachinePointerInfo());
3583
3584 // Get the offset of the start of the .tls section (section base)
3585 const auto *GA = cast<GlobalAddressSDNode>(Op);
3586 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3587 SDValue Offset = DAG.getLoad(
3588 PtrVT, DL, Chain,
3589 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3590 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3592
3593 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3594}
3595
3596// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3597SDValue
3598ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3599 SelectionDAG &DAG) const {
3600 SDLoc dl(GA);
3601 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3602 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3603 MachineFunction &MF = DAG.getMachineFunction();
3604 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3605 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3606 ARMConstantPoolValue *CPV =
3607 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3608 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3609 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3610 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3611 Argument = DAG.getLoad(
3612 PtrVT, dl, DAG.getEntryNode(), Argument,
3614 SDValue Chain = Argument.getValue(1);
3615
3616 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3617 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3618
3619 // call __tls_get_addr.
3621 Args.emplace_back(Argument, Type::getInt32Ty(*DAG.getContext()));
3622
3623 // FIXME: is there useful debug info available here?
3624 TargetLowering::CallLoweringInfo CLI(DAG);
3625 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3627 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3628
3629 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3630 return CallResult.first;
3631}
3632
3633// Lower ISD::GlobalTLSAddress using the "initial exec" or
3634// "local exec" model.
3635SDValue
3636ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3637 SelectionDAG &DAG,
3638 TLSModel::Model model) const {
3639 const GlobalValue *GV = GA->getGlobal();
3640 SDLoc dl(GA);
3642 SDValue Chain = DAG.getEntryNode();
3643 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3644 // Get the Thread Pointer
3646
3647 if (model == TLSModel::InitialExec) {
3648 MachineFunction &MF = DAG.getMachineFunction();
3649 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3650 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3651 // Initial exec model.
3652 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3653 ARMConstantPoolValue *CPV =
3654 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3656 true);
3657 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3658 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3659 Offset = DAG.getLoad(
3660 PtrVT, dl, Chain, Offset,
3662 Chain = Offset.getValue(1);
3663
3664 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3665 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3666
3667 Offset = DAG.getLoad(
3668 PtrVT, dl, Chain, Offset,
3670 } else {
3671 // local exec model
3672 assert(model == TLSModel::LocalExec);
3673 ARMConstantPoolValue *CPV =
3675 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3676 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3677 Offset = DAG.getLoad(
3678 PtrVT, dl, Chain, Offset,
3680 }
3681
3682 // The address of the thread local variable is the add of the thread
3683 // pointer with the offset of the variable.
3684 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3685}
3686
3687SDValue
3688ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3689 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3690 if (DAG.getTarget().useEmulatedTLS())
3691 return LowerToTLSEmulatedModel(GA, DAG);
3692
3693 if (Subtarget->isTargetDarwin())
3694 return LowerGlobalTLSAddressDarwin(Op, DAG);
3695
3696 if (Subtarget->isTargetWindows())
3697 return LowerGlobalTLSAddressWindows(Op, DAG);
3698
3699 // TODO: implement the "local dynamic" model
3700 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3702
3703 switch (model) {
3706 return LowerToTLSGeneralDynamicModel(GA, DAG);
3709 return LowerToTLSExecModels(GA, DAG, model);
3710 }
3711 llvm_unreachable("bogus TLS model");
3712}
3713
3714/// Return true if all users of V are within function F, looking through
3715/// ConstantExprs.
3716static bool allUsersAreInFunction(const Value *V, const Function *F) {
3717 SmallVector<const User*,4> Worklist(V->users());
3718 while (!Worklist.empty()) {
3719 auto *U = Worklist.pop_back_val();
3720 if (isa<ConstantExpr>(U)) {
3721 append_range(Worklist, U->users());
3722 continue;
3723 }
3724
3725 auto *I = dyn_cast<Instruction>(U);
3726 if (!I || I->getParent()->getParent() != F)
3727 return false;
3728 }
3729 return true;
3730}
3731
3733 const GlobalValue *GV, SelectionDAG &DAG,
3734 EVT PtrVT, const SDLoc &dl) {
3735 // If we're creating a pool entry for a constant global with unnamed address,
3736 // and the global is small enough, we can emit it inline into the constant pool
3737 // to save ourselves an indirection.
3738 //
3739 // This is a win if the constant is only used in one function (so it doesn't
3740 // need to be duplicated) or duplicating the constant wouldn't increase code
3741 // size (implying the constant is no larger than 4 bytes).
3742 const Function &F = DAG.getMachineFunction().getFunction();
3743
3744 // We rely on this decision to inline being idemopotent and unrelated to the
3745 // use-site. We know that if we inline a variable at one use site, we'll
3746 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3747 // doesn't know about this optimization, so bail out if it's enabled else
3748 // we could decide to inline here (and thus never emit the GV) but require
3749 // the GV from fast-isel generated code.
3752 return SDValue();
3753
3754 auto *GVar = dyn_cast<GlobalVariable>(GV);
3755 if (!GVar || !GVar->hasInitializer() ||
3756 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3757 !GVar->hasLocalLinkage())
3758 return SDValue();
3759
3760 // If we inline a value that contains relocations, we move the relocations
3761 // from .data to .text. This is not allowed in position-independent code.
3762 auto *Init = GVar->getInitializer();
3763 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3764 Init->needsDynamicRelocation())
3765 return SDValue();
3766
3767 // The constant islands pass can only really deal with alignment requests
3768 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3769 // any type wanting greater alignment requirements than 4 bytes. We also
3770 // can only promote constants that are multiples of 4 bytes in size or
3771 // are paddable to a multiple of 4. Currently we only try and pad constants
3772 // that are strings for simplicity.
3773 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3774 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3775 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3776 unsigned RequiredPadding = 4 - (Size % 4);
3777 bool PaddingPossible =
3778 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3779 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3780 Size == 0)
3781 return SDValue();
3782
3783 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3785 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3786
3787 // We can't bloat the constant pool too much, else the ConstantIslands pass
3788 // may fail to converge. If we haven't promoted this global yet (it may have
3789 // multiple uses), and promoting it would increase the constant pool size (Sz
3790 // > 4), ensure we have space to do so up to MaxTotal.
3791 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3792 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3794 return SDValue();
3795
3796 // This is only valid if all users are in a single function; we can't clone
3797 // the constant in general. The LLVM IR unnamed_addr allows merging
3798 // constants, but not cloning them.
3799 //
3800 // We could potentially allow cloning if we could prove all uses of the
3801 // constant in the current function don't care about the address, like
3802 // printf format strings. But that isn't implemented for now.
3803 if (!allUsersAreInFunction(GVar, &F))
3804 return SDValue();
3805
3806 // We're going to inline this global. Pad it out if needed.
3807 if (RequiredPadding != 4) {
3808 StringRef S = CDAInit->getAsString();
3809
3811 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3812 while (RequiredPadding--)
3813 V.push_back(0);
3815 }
3816
3817 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3818 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3819 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3822 PaddedSize - 4);
3823 }
3824 ++NumConstpoolPromoted;
3825 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3826}
3827
3829 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3830 if (!(GV = GA->getAliaseeObject()))
3831 return false;
3832 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3833 return V->isConstant();
3834 return isa<Function>(GV);
3835}
3836
3837SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3838 SelectionDAG &DAG) const {
3839 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3840 default: llvm_unreachable("unknown object format");
3841 case Triple::COFF:
3842 return LowerGlobalAddressWindows(Op, DAG);
3843 case Triple::ELF:
3844 return LowerGlobalAddressELF(Op, DAG);
3845 case Triple::MachO:
3846 return LowerGlobalAddressDarwin(Op, DAG);
3847 }
3848}
3849
3850SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3851 SelectionDAG &DAG) const {
3852 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3853 SDLoc dl(Op);
3854 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3855 bool IsRO = isReadOnly(GV);
3856
3857 // promoteToConstantPool only if not generating XO text section
3858 if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
3859 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3860 return V;
3861
3862 if (isPositionIndependent()) {
3864 GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
3865 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3866 if (!GV->isDSOLocal())
3867 Result =
3868 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3870 return Result;
3871 } else if (Subtarget->isROPI() && IsRO) {
3872 // PC-relative.
3873 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3874 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3875 return Result;
3876 } else if (Subtarget->isRWPI() && !IsRO) {
3877 // SB-relative.
3878 SDValue RelAddr;
3879 if (Subtarget->useMovt()) {
3880 ++NumMovwMovt;
3881 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3882 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3883 } else { // use literal pool for address constant
3884 ARMConstantPoolValue *CPV =
3886 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3887 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3888 RelAddr = DAG.getLoad(
3889 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3891 }
3892 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3893 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3894 return Result;
3895 }
3896
3897 // If we have T2 ops, we can materialize the address directly via movt/movw
3898 // pair. This is always cheaper. If need to generate Execute Only code, and we
3899 // only have Thumb1 available, we can't use a constant pool and are forced to
3900 // use immediate relocations.
3901 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
3902 if (Subtarget->useMovt())
3903 ++NumMovwMovt;
3904 // FIXME: Once remat is capable of dealing with instructions with register
3905 // operands, expand this into two nodes.
3906 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3907 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3908 } else {
3909 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
3910 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3911 return DAG.getLoad(
3912 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3914 }
3915}
3916
3917SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3918 SelectionDAG &DAG) const {
3919 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3920 "ROPI/RWPI not currently supported for Darwin");
3921 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3922 SDLoc dl(Op);
3923 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3924
3925 if (Subtarget->useMovt())
3926 ++NumMovwMovt;
3927
3928 // FIXME: Once remat is capable of dealing with instructions with register
3929 // operands, expand this into multiple nodes
3930 unsigned Wrapper =
3932
3933 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3934 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3935
3936 if (Subtarget->isGVIndirectSymbol(GV))
3937 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3939 return Result;
3940}
3941
3942SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3943 SelectionDAG &DAG) const {
3944 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
3945 assert(Subtarget->useMovt() &&
3946 "Windows on ARM expects to use movw/movt");
3947 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3948 "ROPI/RWPI not currently supported for Windows");
3949
3950 const TargetMachine &TM = getTargetMachine();
3951 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3952 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3953 if (GV->hasDLLImportStorageClass())
3954 TargetFlags = ARMII::MO_DLLIMPORT;
3955 else if (!TM.shouldAssumeDSOLocal(GV))
3956 TargetFlags = ARMII::MO_COFFSTUB;
3957 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3959 SDLoc DL(Op);
3960
3961 ++NumMovwMovt;
3962
3963 // FIXME: Once remat is capable of dealing with instructions with register
3964 // operands, expand this into two nodes.
3965 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3966 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3967 TargetFlags));
3968 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3969 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3971 return Result;
3972}
3973
3974SDValue
3975ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3976 SDLoc dl(Op);
3977 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3978 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3979 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3980 Op.getOperand(1), Val);
3981}
3982
3983SDValue
3984ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3985 SDLoc dl(Op);
3986 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3987 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3988}
3989
3990SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3991 SelectionDAG &DAG) const {
3992 SDLoc dl(Op);
3993 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3994 Op.getOperand(0));
3995}
3996
3997SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
3998 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
3999 unsigned IntNo =
4000 Op.getConstantOperandVal(Op.getOperand(0).getValueType() == MVT::Other);
4001 switch (IntNo) {
4002 default:
4003 return SDValue(); // Don't custom lower most intrinsics.
4004 case Intrinsic::arm_gnu_eabi_mcount: {
4005 MachineFunction &MF = DAG.getMachineFunction();
4006 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4007 SDLoc dl(Op);
4008 SDValue Chain = Op.getOperand(0);
4009 // call "\01__gnu_mcount_nc"
4010 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4011 const uint32_t *Mask =
4013 assert(Mask && "Missing call preserved mask for calling convention");
4014 // Mark LR an implicit live-in.
4015 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4016 SDValue ReturnAddress =
4017 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4018 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4019 SDValue Callee =
4020 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4022 if (Subtarget->isThumb())
4023 return SDValue(
4024 DAG.getMachineNode(
4025 ARM::tBL_PUSHLR, dl, ResultTys,
4026 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4027 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4028 0);
4029 return SDValue(
4030 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4031 {ReturnAddress, Callee, RegisterMask, Chain}),
4032 0);
4033 }
4034 }
4035}
4036
4037SDValue
4038ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4039 const ARMSubtarget *Subtarget) const {
4040 unsigned IntNo = Op.getConstantOperandVal(0);
4041 SDLoc dl(Op);
4042 switch (IntNo) {
4043 default: return SDValue(); // Don't custom lower most intrinsics.
4044 case Intrinsic::thread_pointer: {
4045 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4046 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4047 }
4048 case Intrinsic::arm_cls: {
4049 const SDValue &Operand = Op.getOperand(1);
4050 const EVT VTy = Op.getValueType();
4051 SDValue SRA =
4052 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4053 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4054 SDValue SHL =
4055 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4056 SDValue OR =
4057 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4058 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4059 return Result;
4060 }
4061 case Intrinsic::arm_cls64: {
4062 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4063 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4064 const SDValue &Operand = Op.getOperand(1);
4065 const EVT VTy = Op.getValueType();
4066 SDValue Lo, Hi;
4067 std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
4068 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4069 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4070 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4071 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4072 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4073 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4074 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4075 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4076 SDValue CheckLo =
4077 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4078 SDValue HiIsZero =
4079 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4080 SDValue AdjustedLo =
4081 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4082 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4083 SDValue Result =
4084 DAG.getSelect(dl, VTy, CheckLo,
4085 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4086 return Result;
4087 }
4088 case Intrinsic::eh_sjlj_lsda: {
4089 MachineFunction &MF = DAG.getMachineFunction();
4090 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4091 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4092 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4093 SDValue CPAddr;
4094 bool IsPositionIndependent = isPositionIndependent();
4095 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4096 ARMConstantPoolValue *CPV =
4097 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4098 ARMCP::CPLSDA, PCAdj);
4099 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4100 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4101 SDValue Result = DAG.getLoad(
4102 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4104
4105 if (IsPositionIndependent) {
4106 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4107 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4108 }
4109 return Result;
4110 }
4111 case Intrinsic::arm_neon_vabs:
4112 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4113 Op.getOperand(1));
4114 case Intrinsic::arm_neon_vabds:
4115 if (Op.getValueType().isInteger())
4116 return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(),
4117 Op.getOperand(1), Op.getOperand(2));
4118 return SDValue();
4119 case Intrinsic::arm_neon_vabdu:
4120 return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(),
4121 Op.getOperand(1), Op.getOperand(2));
4122 case Intrinsic::arm_neon_vmulls:
4123 case Intrinsic::arm_neon_vmullu: {
4124 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4125 ? ARMISD::VMULLs : ARMISD::VMULLu;
4126 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4127 Op.getOperand(1), Op.getOperand(2));
4128 }
4129 case Intrinsic::arm_neon_vminnm:
4130 case Intrinsic::arm_neon_vmaxnm: {
4131 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4132 ? ISD::FMINNUM : ISD::FMAXNUM;
4133 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4134 Op.getOperand(1), Op.getOperand(2));
4135 }
4136 case Intrinsic::arm_neon_vminu:
4137 case Intrinsic::arm_neon_vmaxu: {
4138 if (Op.getValueType().isFloatingPoint())
4139 return SDValue();
4140 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4141 ? ISD::UMIN : ISD::UMAX;
4142 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4143 Op.getOperand(1), Op.getOperand(2));
4144 }
4145 case Intrinsic::arm_neon_vmins:
4146 case Intrinsic::arm_neon_vmaxs: {
4147 // v{min,max}s is overloaded between signed integers and floats.
4148 if (!Op.getValueType().isFloatingPoint()) {
4149 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4150 ? ISD::SMIN : ISD::SMAX;
4151 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4152 Op.getOperand(1), Op.getOperand(2));
4153 }
4154 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4155 ? ISD::FMINIMUM : ISD::FMAXIMUM;
4156 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4157 Op.getOperand(1), Op.getOperand(2));
4158 }
4159 case Intrinsic::arm_neon_vtbl1:
4160 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4161 Op.getOperand(1), Op.getOperand(2));
4162 case Intrinsic::arm_neon_vtbl2:
4163 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4164 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4165 case Intrinsic::arm_mve_pred_i2v:
4166 case Intrinsic::arm_mve_pred_v2i:
4167 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4168 Op.getOperand(1));
4169 case Intrinsic::arm_mve_vreinterpretq:
4170 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4171 Op.getOperand(1));
4172 case Intrinsic::arm_mve_lsll:
4173 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4174 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4175 case Intrinsic::arm_mve_asrl:
4176 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4177 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4178 }
4179}
4180
4182 const ARMSubtarget *Subtarget) {
4183 SDLoc dl(Op);
4184 auto SSID = static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4185 if (SSID == SyncScope::SingleThread)
4186 return Op;
4187
4188 if (!Subtarget->hasDataBarrier()) {
4189 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4190 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4191 // here.
4192 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4193 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
4194 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4195 DAG.getConstant(0, dl, MVT::i32));
4196 }
4197
4198 AtomicOrdering Ord =
4199 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4201 if (Subtarget->isMClass()) {
4202 // Only a full system barrier exists in the M-class architectures.
4204 } else if (Subtarget->preferISHSTBarriers() &&
4205 Ord == AtomicOrdering::Release) {
4206 // Swift happens to implement ISHST barriers in a way that's compatible with
4207 // Release semantics but weaker than ISH so we'd be fools not to use
4208 // it. Beware: other processors probably don't!
4210 }
4211
4212 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4213 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4214 DAG.getConstant(Domain, dl, MVT::i32));
4215}
4216
4218 const ARMSubtarget *Subtarget) {
4219 // ARM pre v5TE and Thumb1 does not have preload instructions.
4220 if (!(Subtarget->isThumb2() ||
4221 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4222 // Just preserve the chain.
4223 return Op.getOperand(0);
4224
4225 SDLoc dl(Op);
4226 unsigned isRead = ~Op.getConstantOperandVal(2) & 1;
4227 if (!isRead &&
4228 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4229 // ARMv7 with MP extension has PLDW.
4230 return Op.getOperand(0);
4231
4232 unsigned isData = Op.getConstantOperandVal(4);
4233 if (Subtarget->isThumb()) {
4234 // Invert the bits.
4235 isRead = ~isRead & 1;
4236 isData = ~isData & 1;
4237 }
4238
4239 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4240 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4241 DAG.getConstant(isData, dl, MVT::i32));
4242}
4243
4246 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4247
4248 // vastart just stores the address of the VarArgsFrameIndex slot into the
4249 // memory location argument.
4250 SDLoc dl(Op);
4252 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4253 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4254 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4255 MachinePointerInfo(SV));
4256}
4257
4258SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4259 CCValAssign &NextVA,
4260 SDValue &Root,
4261 SelectionDAG &DAG,
4262 const SDLoc &dl) const {
4263 MachineFunction &MF = DAG.getMachineFunction();
4264 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4265
4266 const TargetRegisterClass *RC;
4267 if (AFI->isThumb1OnlyFunction())
4268 RC = &ARM::tGPRRegClass;
4269 else
4270 RC = &ARM::GPRRegClass;
4271
4272 // Transform the arguments stored in physical registers into virtual ones.
4273 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4274 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4275
4276 SDValue ArgValue2;
4277 if (NextVA.isMemLoc()) {
4278 MachineFrameInfo &MFI = MF.getFrameInfo();
4279 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4280
4281 // Create load node to retrieve arguments from the stack.
4282 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4283 ArgValue2 = DAG.getLoad(
4284 MVT::i32, dl, Root, FIN,
4286 } else {
4287 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4288 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4289 }
4290 if (!Subtarget->isLittle())
4291 std::swap (ArgValue, ArgValue2);
4292 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4293}
4294
4295// The remaining GPRs hold either the beginning of variable-argument
4296// data, or the beginning of an aggregate passed by value (usually
4297// byval). Either way, we allocate stack slots adjacent to the data
4298// provided by our caller, and store the unallocated registers there.
4299// If this is a variadic function, the va_list pointer will begin with
4300// these values; otherwise, this reassembles a (byval) structure that
4301// was split between registers and memory.
4302// Return: The frame index registers were stored into.
4303int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4304 const SDLoc &dl, SDValue &Chain,
4305 const Value *OrigArg,
4306 unsigned InRegsParamRecordIdx,
4307 int ArgOffset, unsigned ArgSize) const {
4308 // Currently, two use-cases possible:
4309 // Case #1. Non-var-args function, and we meet first byval parameter.
4310 // Setup first unallocated register as first byval register;
4311 // eat all remained registers
4312 // (these two actions are performed by HandleByVal method).
4313 // Then, here, we initialize stack frame with
4314 // "store-reg" instructions.
4315 // Case #2. Var-args function, that doesn't contain byval parameters.
4316 // The same: eat all remained unallocated registers,
4317 // initialize stack frame.
4318
4319 MachineFunction &MF = DAG.getMachineFunction();
4320 MachineFrameInfo &MFI = MF.getFrameInfo();
4321 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4322 unsigned RBegin, REnd;
4323 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4324 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4325 } else {
4326 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4327 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4328 REnd = ARM::R4;
4329 }
4330
4331 if (REnd != RBegin)
4332 ArgOffset = -4 * (ARM::R4 - RBegin);
4333
4334 auto PtrVT = getPointerTy(DAG.getDataLayout());
4335 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4336 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4337
4339 const TargetRegisterClass *RC =
4340 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4341
4342 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4343 Register VReg = MF.addLiveIn(Reg, RC);
4344 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4345 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4346 MachinePointerInfo(OrigArg, 4 * i));
4347 MemOps.push_back(Store);
4348 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4349 }
4350
4351 if (!MemOps.empty())
4352 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4353 return FrameIndex;
4354}
4355
4356// Setup stack frame, the va_list pointer will start from.
4357void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4358 const SDLoc &dl, SDValue &Chain,
4359 unsigned ArgOffset,
4360 unsigned TotalArgRegsSaveSize,
4361 bool ForceMutable) const {
4362 MachineFunction &MF = DAG.getMachineFunction();
4363 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4364
4365 // Try to store any remaining integer argument regs
4366 // to their spots on the stack so that they may be loaded by dereferencing
4367 // the result of va_next.
4368 // If there is no regs to be stored, just point address after last
4369 // argument passed via stack.
4370 int FrameIndex = StoreByValRegs(
4371 CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),
4372 CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4373 AFI->setVarArgsFrameIndex(FrameIndex);
4374}
4375
4376bool ARMTargetLowering::splitValueIntoRegisterParts(
4377 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4378 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4379 EVT ValueVT = Val.getValueType();
4380 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4381 unsigned ValueBits = ValueVT.getSizeInBits();
4382 unsigned PartBits = PartVT.getSizeInBits();
4383 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4384 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4385 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4386 Parts[0] = Val;
4387 return true;
4388 }
4389 return false;
4390}
4391
4392SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4393 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4394 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4395 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4396 unsigned ValueBits = ValueVT.getSizeInBits();
4397 unsigned PartBits = PartVT.getSizeInBits();
4398 SDValue Val = Parts[0];
4399
4400 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4401 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4402 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4403 return Val;
4404 }
4405 return SDValue();
4406}
4407
4408SDValue ARMTargetLowering::LowerFormalArguments(
4409 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4410 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4411 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4412 MachineFunction &MF = DAG.getMachineFunction();
4413 MachineFrameInfo &MFI = MF.getFrameInfo();
4414
4415 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4416
4417 // Assign locations to all of the incoming arguments.
4419 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4420 *DAG.getContext());
4421 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4422
4424 unsigned CurArgIdx = 0;
4425
4426 // Initially ArgRegsSaveSize is zero.
4427 // Then we increase this value each time we meet byval parameter.
4428 // We also increase this value in case of varargs function.
4429 AFI->setArgRegsSaveSize(0);
4430
4431 // Calculate the amount of stack space that we need to allocate to store
4432 // byval and variadic arguments that are passed in registers.
4433 // We need to know this before we allocate the first byval or variadic
4434 // argument, as they will be allocated a stack slot below the CFA (Canonical
4435 // Frame Address, the stack pointer at entry to the function).
4436 unsigned ArgRegBegin = ARM::R4;
4437 for (const CCValAssign &VA : ArgLocs) {
4438 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4439 break;
4440
4441 unsigned Index = VA.getValNo();
4442 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4443 if (!Flags.isByVal())
4444 continue;
4445
4446 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4447 unsigned RBegin, REnd;
4448 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4449 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4450
4451 CCInfo.nextInRegsParam();
4452 }
4453 CCInfo.rewindByValRegsInfo();
4454
4455 int lastInsIndex = -1;
4456 if (isVarArg && MFI.hasVAStart()) {
4457 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4458 if (RegIdx != std::size(GPRArgRegs))
4459 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4460 }
4461
4462 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4463 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4464 auto PtrVT = getPointerTy(DAG.getDataLayout());
4465
4466 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4467 CCValAssign &VA = ArgLocs[i];
4468 if (Ins[VA.getValNo()].isOrigArg()) {
4469 std::advance(CurOrigArg,
4470 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4471 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4472 }
4473 // Arguments stored in registers.
4474 if (VA.isRegLoc()) {
4475 EVT RegVT = VA.getLocVT();
4476 SDValue ArgValue;
4477
4478 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4479 // f64 and vector types are split up into multiple registers or
4480 // combinations of registers and stack slots.
4481 SDValue ArgValue1 =
4482 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4483 VA = ArgLocs[++i]; // skip ahead to next loc
4484 SDValue ArgValue2;
4485 if (VA.isMemLoc()) {
4486 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4487 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4488 ArgValue2 = DAG.getLoad(
4489 MVT::f64, dl, Chain, FIN,
4491 } else {
4492 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4493 }
4494 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4495 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4496 ArgValue1, DAG.getIntPtrConstant(0, dl));
4497 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4498 ArgValue2, DAG.getIntPtrConstant(1, dl));
4499 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4500 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4501 } else {
4502 const TargetRegisterClass *RC;
4503
4504 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4505 RC = &ARM::HPRRegClass;
4506 else if (RegVT == MVT::f32)
4507 RC = &ARM::SPRRegClass;
4508 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4509 RegVT == MVT::v4bf16)
4510 RC = &ARM::DPRRegClass;
4511 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4512 RegVT == MVT::v8bf16)
4513 RC = &ARM::QPRRegClass;
4514 else if (RegVT == MVT::i32)
4515 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4516 : &ARM::GPRRegClass;
4517 else
4518 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4519
4520 // Transform the arguments in physical registers into virtual ones.
4521 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4522 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4523
4524 // If this value is passed in r0 and has the returned attribute (e.g.
4525 // C++ 'structors), record this fact for later use.
4526 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4527 AFI->setPreservesR0();
4528 }
4529 }
4530
4531 // If this is an 8 or 16-bit value, it is really passed promoted
4532 // to 32 bits. Insert an assert[sz]ext to capture this, then
4533 // truncate to the right size.
4534 switch (VA.getLocInfo()) {
4535 default: llvm_unreachable("Unknown loc info!");
4536 case CCValAssign::Full: break;
4537 case CCValAssign::BCvt:
4538 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4539 break;
4540 }
4541
4542 // f16 arguments have their size extended to 4 bytes and passed as if they
4543 // had been copied to the LSBs of a 32-bit register.
4544 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4545 if (VA.needsCustom() &&
4546 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4547 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4548
4549 // On CMSE Entry Functions, formal integer arguments whose bitwidth is
4550 // less than 32 bits must be sign- or zero-extended in the callee for
4551 // security reasons. Although the ABI mandates an extension done by the
4552 // caller, the latter cannot be trusted to follow the rules of the ABI.
4553 const ISD::InputArg &Arg = Ins[VA.getValNo()];
4554 if (AFI->isCmseNSEntryFunction() && Arg.ArgVT.isScalarInteger() &&
4555 RegVT.isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))
4556 ArgValue = handleCMSEValue(ArgValue, Arg, DAG, dl);
4557
4558 InVals.push_back(ArgValue);
4559 } else { // VA.isRegLoc()
4560 // Only arguments passed on the stack should make it here.
4561 assert(VA.isMemLoc());
4562 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4563
4564 int index = VA.getValNo();
4565
4566 // Some Ins[] entries become multiple ArgLoc[] entries.
4567 // Process them only once.
4568 if (index != lastInsIndex)
4569 {
4570 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4571 // FIXME: For now, all byval parameter objects are marked mutable.
4572 // This can be changed with more analysis.
4573 // In case of tail call optimization mark all arguments mutable.
4574 // Since they could be overwritten by lowering of arguments in case of
4575 // a tail call.
4576 if (Flags.isByVal()) {
4577 assert(Ins[index].isOrigArg() &&
4578 "Byval arguments cannot be implicit");
4579 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4580
4581 int FrameIndex = StoreByValRegs(
4582 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4583 VA.getLocMemOffset(), Flags.getByValSize());
4584 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4585 CCInfo.nextInRegsParam();
4586 } else if (VA.needsCustom() && (VA.getValVT() == MVT::f16 ||
4587 VA.getValVT() == MVT::bf16)) {
4588 // f16 and bf16 values are passed in the least-significant half of
4589 // a 4 byte stack slot. This is done as-if the extension was done
4590 // in a 32-bit register, so the actual bytes used for the value
4591 // differ between little and big endian.
4592 assert(VA.getLocVT().getSizeInBits() == 32);
4593 unsigned FIOffset = VA.getLocMemOffset();
4594 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits() / 8,
4595 FIOffset, true);
4596
4597 SDValue Addr = DAG.getFrameIndex(FI, PtrVT);
4598 if (DAG.getDataLayout().isBigEndian())
4599 Addr = DAG.getObjectPtrOffset(dl, Addr, TypeSize::getFixed(2));
4600
4601 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, Addr,
4603 DAG.getMachineFunction(), FI)));
4604
4605 } else {
4606 unsigned FIOffset = VA.getLocMemOffset();
4607 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4608 FIOffset, true);
4609
4610 // Create load nodes to retrieve arguments from the stack.
4611 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4612 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4614 DAG.getMachineFunction(), FI)));
4615 }
4616 lastInsIndex = index;
4617 }
4618 }
4619 }
4620
4621 // varargs
4622 if (isVarArg && MFI.hasVAStart()) {
4623 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),
4624 TotalArgRegsSaveSize);
4625 if (AFI->isCmseNSEntryFunction()) {
4626 DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
4628 "secure entry function must not be variadic", dl.getDebugLoc()));
4629 }
4630 }
4631
4632 unsigned StackArgSize = CCInfo.getStackSize();
4633 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4634 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4635 // The only way to guarantee a tail call is if the callee restores its
4636 // argument area, but it must also keep the stack aligned when doing so.
4637 MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
4638 assert(StackAlign && "data layout string is missing stack alignment");
4639 StackArgSize = alignTo(StackArgSize, *StackAlign);
4640
4641 AFI->setArgumentStackToRestore(StackArgSize);
4642 }
4643 AFI->setArgumentStackSize(StackArgSize);
4644
4645 if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4646 DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
4648 "secure entry function requires arguments on stack", dl.getDebugLoc()));
4649 }
4650
4651 return Chain;
4652}
4653
4654/// isFloatingPointZero - Return true if this is +0.0.
4657 return CFP->getValueAPF().isPosZero();
4658 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4659 // Maybe this has already been legalized into the constant pool?
4660 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4661 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4663 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4664 return CFP->getValueAPF().isPosZero();
4665 }
4666 } else if (Op->getOpcode() == ISD::BITCAST &&
4667 Op->getValueType(0) == MVT::f64) {
4668 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4669 // created by LowerConstantFP().
4670 SDValue BitcastOp = Op->getOperand(0);
4671 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4672 isNullConstant(BitcastOp->getOperand(0)))
4673 return true;
4674 }
4675 return false;
4676}
4677
4678/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4679/// the given operands.
4680SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4681 SDValue &ARMcc, SelectionDAG &DAG,
4682 const SDLoc &dl) const {
4683 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4684 unsigned C = RHSC->getZExtValue();
4685 if (!isLegalICmpImmediate((int32_t)C)) {
4686 // Constant does not fit, try adjusting it by one.
4687 switch (CC) {
4688 default: break;
4689 case ISD::SETLT:
4690 case ISD::SETGE:
4691 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4692 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
4693 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4694 }
4695 break;
4696 case ISD::SETULT:
4697 case ISD::SETUGE:
4698 if (C != 0 && isLegalICmpImmediate(C-1)) {
4699 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
4700 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4701 }
4702 break;
4703 case ISD::SETLE:
4704 case ISD::SETGT:
4705 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4706 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
4707 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4708 }
4709 break;
4710 case ISD::SETULE:
4711 case ISD::SETUGT:
4712 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4713 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4714 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4715 }
4716 break;
4717 }
4718 }
4719 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4721 // In ARM and Thumb-2, the compare instructions can shift their second
4722 // operand.
4724 std::swap(LHS, RHS);
4725 }
4726
4727 // Thumb1 has very limited immediate modes, so turning an "and" into a
4728 // shift can save multiple instructions.
4729 //
4730 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4731 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4732 // own. If it's the operand to an unsigned comparison with an immediate,
4733 // we can eliminate one of the shifts: we transform
4734 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4735 //
4736 // We avoid transforming cases which aren't profitable due to encoding
4737 // details:
4738 //
4739 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4740 // would not; in that case, we're essentially trading one immediate load for
4741 // another.
4742 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4743 // 3. C2 is zero; we have other code for this special case.
4744 //
4745 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4746 // instruction, since the AND is always one instruction anyway, but we could
4747 // use narrow instructions in some cases.
4748 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4749 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4750 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4751 !isSignedIntSetCC(CC)) {
4752 unsigned Mask = LHS.getConstantOperandVal(1);
4753 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4754 uint64_t RHSV = RHSC->getZExtValue();
4755 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4756 unsigned ShiftBits = llvm::countl_zero(Mask);
4757 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4758 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4759 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4760 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4761 }
4762 }
4763 }
4764
4765 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4766 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4767 // way a cmp would.
4768 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4769 // some tweaks to the heuristics for the previous and->shift transform.
4770 // FIXME: Optimize cases where the LHS isn't a shift.
4771 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4772 isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4773 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4774 LHS.getConstantOperandVal(1) < 31) {
4775 unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
4776 SDValue Shift =
4777 DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, FlagsVT),
4778 LHS.getOperand(0), DAG.getConstant(ShiftAmt, dl, MVT::i32));
4779 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4780 return Shift.getValue(1);
4781 }
4782
4784
4785 // If the RHS is a constant zero then the V (overflow) flag will never be
4786 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4787 // simpler for other passes (like the peephole optimiser) to deal with.
4788 if (isNullConstant(RHS)) {
4789 switch (CondCode) {
4790 default: break;
4791 case ARMCC::GE:
4793 break;
4794 case ARMCC::LT:
4796 break;
4797 }
4798 }
4799
4800 ARMISD::NodeType CompareType;
4801 switch (CondCode) {
4802 default:
4803 CompareType = ARMISD::CMP;
4804 break;
4805 case ARMCC::EQ:
4806 case ARMCC::NE:
4807 // Uses only Z Flag
4808 CompareType = ARMISD::CMPZ;
4809 break;
4810 }
4811 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4812 return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS);
4813}
4814
4815/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4816SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4817 SelectionDAG &DAG, const SDLoc &dl,
4818 bool Signaling) const {
4819 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4820 SDValue Flags;
4822 Flags = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, dl, FlagsVT,
4823 LHS, RHS);
4824 else
4825 Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
4826 FlagsVT, LHS);
4827 return DAG.getNode(ARMISD::FMSTAT, dl, FlagsVT, Flags);
4828}
4829
4830// This function returns three things: the arithmetic computation itself
4831// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4832// comparison and the condition code define the case in which the arithmetic
4833// computation *does not* overflow.
4834std::pair<SDValue, SDValue>
4835ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4836 SDValue &ARMcc) const {
4837 assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4838
4839 SDValue Value, OverflowCmp;
4840 SDValue LHS = Op.getOperand(0);
4841 SDValue RHS = Op.getOperand(1);
4842 SDLoc dl(Op);
4843
4844 // FIXME: We are currently always generating CMPs because we don't support
4845 // generating CMN through the backend. This is not as good as the natural
4846 // CMP case because it causes a register dependency and cannot be folded
4847 // later.
4848
4849 switch (Op.getOpcode()) {
4850 default:
4851 llvm_unreachable("Unknown overflow instruction!");
4852 case ISD::SADDO:
4853 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4854 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4855 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
4856 break;
4857 case ISD::UADDO:
4858 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4859 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4860 // We do not use it in the USUBO case as Value may not be used.
4861 Value = DAG.getNode(ARMISD::ADDC, dl,
4862 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4863 .getValue(0);
4864 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
4865 break;
4866 case ISD::SSUBO:
4867 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4868 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4869 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
4870 break;
4871 case ISD::USUBO:
4872 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4873 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4874 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
4875 break;
4876 case ISD::UMULO:
4877 // We generate a UMUL_LOHI and then check if the high word is 0.
4878 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4879 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4880 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4881 LHS, RHS);
4882 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
4883 DAG.getConstant(0, dl, MVT::i32));
4884 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4885 break;
4886 case ISD::SMULO:
4887 // We generate a SMUL_LOHI and then check if all the bits of the high word
4888 // are the same as the sign bit of the low word.
4889 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4890 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4891 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4892 LHS, RHS);
4893 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
4894 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4895 Value.getValue(0),
4896 DAG.getConstant(31, dl, MVT::i32)));
4897 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4898 break;
4899 } // switch (...)
4900
4901 return std::make_pair(Value, OverflowCmp);
4902}
4903
4904SDValue
4905ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4906 // Let legalize expand this if it isn't a legal type yet.
4907 if (!isTypeLegal(Op.getValueType()))
4908 return SDValue();
4909
4910 SDValue Value, OverflowCmp;
4911 SDValue ARMcc;
4912 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4913 SDLoc dl(Op);
4914 // We use 0 and 1 as false and true values.
4915 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4916 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4917 EVT VT = Op.getValueType();
4918
4919 SDValue Overflow =
4920 DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
4921
4922 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4923 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4924}
4925
4927 SelectionDAG &DAG) {
4928 SDLoc DL(BoolCarry);
4929 EVT CarryVT = BoolCarry.getValueType();
4930
4931 // This converts the boolean value carry into the carry flag by doing
4932 // ARMISD::SUBC Carry, 1
4933 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4934 DAG.getVTList(CarryVT, MVT::i32),
4935 BoolCarry, DAG.getConstant(1, DL, CarryVT));
4936 return Carry.getValue(1);
4937}
4938
4940 SelectionDAG &DAG) {
4941 SDLoc DL(Flags);
4942
4943 // Now convert the carry flag into a boolean carry. We do this
4944 // using ARMISD:ADDE 0, 0, Carry
4945 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4946 DAG.getConstant(0, DL, MVT::i32),
4947 DAG.getConstant(0, DL, MVT::i32), Flags);
4948}
4949
4950SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
4951 SelectionDAG &DAG) const {
4952 // Let legalize expand this if it isn't a legal type yet.
4953 if (!isTypeLegal(Op.getValueType()))
4954 return SDValue();
4955
4956 SDValue LHS = Op.getOperand(0);
4957 SDValue RHS = Op.getOperand(1);
4958 SDLoc dl(Op);
4959
4960 EVT VT = Op.getValueType();
4961 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4962 SDValue Value;
4963 SDValue Overflow;
4964 switch (Op.getOpcode()) {
4965 default:
4966 llvm_unreachable("Unknown overflow instruction!");
4967 case ISD::UADDO:
4968 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
4969 // Convert the carry flag into a boolean value.
4970 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4971 break;
4972 case ISD::USUBO: {
4973 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
4974 // Convert the carry flag into a boolean value.
4975 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4976 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
4977 // value. So compute 1 - C.
4978 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
4979 DAG.getConstant(1, dl, MVT::i32), Overflow);
4980 break;
4981 }
4982 }
4983
4984 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4985}
4986
4988 const ARMSubtarget *Subtarget) {
4989 EVT VT = Op.getValueType();
4990 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
4991 return SDValue();
4992 if (!VT.isSimple())
4993 return SDValue();
4994
4995 unsigned NewOpcode;
4996 switch (VT.getSimpleVT().SimpleTy) {
4997 default:
4998 return SDValue();
4999 case MVT::i8:
5000 switch (Op->getOpcode()) {
5001 case ISD::UADDSAT:
5002 NewOpcode = ARMISD::UQADD8b;
5003 break;
5004 case ISD::SADDSAT:
5005 NewOpcode = ARMISD::QADD8b;
5006 break;
5007 case ISD::USUBSAT:
5008 NewOpcode = ARMISD::UQSUB8b;
5009 break;
5010 case ISD::SSUBSAT:
5011 NewOpcode = ARMISD::QSUB8b;
5012 break;
5013 }
5014 break;
5015 case MVT::i16:
5016 switch (Op->getOpcode()) {
5017 case ISD::UADDSAT:
5018 NewOpcode = ARMISD::UQADD16b;
5019 break;
5020 case ISD::SADDSAT:
5021 NewOpcode = ARMISD::QADD16b;
5022 break;
5023 case ISD::USUBSAT:
5024 NewOpcode = ARMISD::UQSUB16b;
5025 break;
5026 case ISD::SSUBSAT:
5027 NewOpcode = ARMISD::QSUB16b;
5028 break;
5029 }
5030 break;
5031 }
5032
5033 SDLoc dl(Op);
5034 SDValue Add =
5035 DAG.getNode(NewOpcode, dl, MVT::i32,
5036 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5037 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5038 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5039}
5040
5041SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5042 SDValue Cond = Op.getOperand(0);
5043 SDValue SelectTrue = Op.getOperand(1);
5044 SDValue SelectFalse = Op.getOperand(2);
5045 SDLoc dl(Op);
5046 unsigned Opc = Cond.getOpcode();
5047
5048 if (Cond.getResNo() == 1 &&
5049 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5050 Opc == ISD::USUBO)) {
5051 if (!isTypeLegal(Cond->getValueType(0)))
5052 return SDValue();
5053
5054 SDValue Value, OverflowCmp;
5055 SDValue ARMcc;
5056 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5057 EVT VT = Op.getValueType();
5058
5059 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);
5060 }
5061
5062 // Convert:
5063 //
5064 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5065 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5066 //
5067 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5068 const ConstantSDNode *CMOVTrue =
5069 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5070 const ConstantSDNode *CMOVFalse =
5071 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5072
5073 if (CMOVTrue && CMOVFalse) {
5074 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5075 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5076
5077 SDValue True;
5078 SDValue False;
5079 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5080 True = SelectTrue;
5081 False = SelectFalse;
5082 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5083 True = SelectFalse;
5084 False = SelectTrue;
5085 }
5086
5087 if (True.getNode() && False.getNode())
5088 return getCMOV(dl, Op.getValueType(), True, False, Cond.getOperand(2),
5089 Cond.getOperand(3), DAG);
5090 }
5091 }
5092
5093 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5094 // undefined bits before doing a full-word comparison with zero.
5095 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5096 DAG.getConstant(1, dl, Cond.getValueType()));
5097
5098 return DAG.getSelectCC(dl, Cond,
5099 DAG.getConstant(0, dl, Cond.getValueType()),
5100 SelectTrue, SelectFalse, ISD::SETNE);
5101}
5102
5104 bool &swpCmpOps, bool &swpVselOps) {
5105 // Start by selecting the GE condition code for opcodes that return true for
5106 // 'equality'
5107 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5108 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5109 CondCode = ARMCC::GE;
5110
5111 // and GT for opcodes that return false for 'equality'.
5112 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5113 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5114 CondCode = ARMCC::GT;
5115
5116 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5117 // to swap the compare operands.
5118 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5119 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5120 swpCmpOps = true;
5121
5122 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5123 // If we have an unordered opcode, we need to swap the operands to the VSEL
5124 // instruction (effectively negating the condition).
5125 //
5126 // This also has the effect of swapping which one of 'less' or 'greater'
5127 // returns true, so we also swap the compare operands. It also switches
5128 // whether we return true for 'equality', so we compensate by picking the
5129 // opposite condition code to our original choice.
5130 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5131 CC == ISD::SETUGT) {
5132 swpCmpOps = !swpCmpOps;
5133 swpVselOps = !swpVselOps;
5134 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5135 }
5136
5137 // 'ordered' is 'anything but unordered', so use the VS condition code and
5138 // swap the VSEL operands.
5139 if (CC == ISD::SETO) {
5140 CondCode = ARMCC::VS;
5141 swpVselOps = true;
5142 }
5143
5144 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5145 // code and swap the VSEL operands. Also do this if we don't care about the
5146 // unordered case.
5147 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5148 CondCode = ARMCC::EQ;
5149 swpVselOps = true;
5150 }
5151}
5152
5153SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5154 SDValue TrueVal, SDValue ARMcc,
5155 SDValue Flags, SelectionDAG &DAG) const {
5156 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5158 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5160 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5161
5162 SDValue TrueLow = TrueVal.getValue(0);
5163 SDValue TrueHigh = TrueVal.getValue(1);
5164 SDValue FalseLow = FalseVal.getValue(0);
5165 SDValue FalseHigh = FalseVal.getValue(1);
5166
5167 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5168 ARMcc, Flags);
5169 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5170 ARMcc, Flags);
5171
5172 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5173 }
5174 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags);
5175}
5176
5177static bool isGTorGE(ISD::CondCode CC) {
5178 return CC == ISD::SETGT || CC == ISD::SETGE;
5179}
5180
5181static bool isLTorLE(ISD::CondCode CC) {
5182 return CC == ISD::SETLT || CC == ISD::SETLE;
5183}
5184
5185// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5186// All of these conditions (and their <= and >= counterparts) will do:
5187// x < k ? k : x
5188// x > k ? x : k
5189// k < x ? x : k
5190// k > x ? k : x
5191static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5192 const SDValue TrueVal, const SDValue FalseVal,
5193 const ISD::CondCode CC, const SDValue K) {
5194 return (isGTorGE(CC) &&
5195 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5196 (isLTorLE(CC) &&
5197 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5198}
5199
5200// Check if two chained conditionals could be converted into SSAT or USAT.
5201//
5202// SSAT can replace a set of two conditional selectors that bound a number to an
5203// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5204//
5205// x < -k ? -k : (x > k ? k : x)
5206// x < -k ? -k : (x < k ? x : k)
5207// x > -k ? (x > k ? k : x) : -k
5208// x < k ? (x < -k ? -k : x) : k
5209// etc.
5210//
5211// LLVM canonicalizes these to either a min(max()) or a max(min())
5212// pattern. This function tries to match one of these and will return a SSAT
5213// node if successful.
5214//
5215// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5216// is a power of 2.
5218 EVT VT = Op.getValueType();
5219 SDValue V1 = Op.getOperand(0);
5220 SDValue K1 = Op.getOperand(1);
5221 SDValue TrueVal1 = Op.getOperand(2);
5222 SDValue FalseVal1 = Op.getOperand(3);
5223 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5224
5225 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5226 if (Op2.getOpcode() != ISD::SELECT_CC)
5227 return SDValue();
5228
5229 SDValue V2 = Op2.getOperand(0);
5230 SDValue K2 = Op2.getOperand(1);
5231 SDValue TrueVal2 = Op2.getOperand(2);
5232 SDValue FalseVal2 = Op2.getOperand(3);
5233 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5234
5235 SDValue V1Tmp = V1;
5236 SDValue V2Tmp = V2;
5237
5238 // Check that the registers and the constants match a max(min()) or min(max())
5239 // pattern
5240 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5241 K2 != FalseVal2 ||
5242 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5243 return SDValue();
5244
5245 // Check that the constant in the lower-bound check is
5246 // the opposite of the constant in the upper-bound check
5247 // in 1's complement.
5249 return SDValue();
5250
5251 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5252 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5253 int64_t PosVal = std::max(Val1, Val2);
5254 int64_t NegVal = std::min(Val1, Val2);
5255
5256 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5257 !isPowerOf2_64(PosVal + 1))
5258 return SDValue();
5259
5260 // Handle the difference between USAT (unsigned) and SSAT (signed)
5261 // saturation
5262 // At this point, PosVal is guaranteed to be positive
5263 uint64_t K = PosVal;
5264 SDLoc dl(Op);
5265 if (Val1 == ~Val2)
5266 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5267 DAG.getConstant(llvm::countr_one(K), dl, VT));
5268 if (NegVal == 0)
5269 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5270 DAG.getConstant(llvm::countr_one(K), dl, VT));
5271
5272 return SDValue();
5273}
5274
5275// Check if a condition of the type x < k ? k : x can be converted into a
5276// bit operation instead of conditional moves.
5277// Currently this is allowed given:
5278// - The conditions and values match up
5279// - k is 0 or -1 (all ones)
5280// This function will not check the last condition, thats up to the caller
5281// It returns true if the transformation can be made, and in such case
5282// returns x in V, and k in SatK.
5284 SDValue &SatK)
5285{
5286 SDValue LHS = Op.getOperand(0);
5287 SDValue RHS = Op.getOperand(1);
5288 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5289 SDValue TrueVal = Op.getOperand(2);
5290 SDValue FalseVal = Op.getOperand(3);
5291
5293 ? &RHS
5294 : nullptr;
5295
5296 // No constant operation in comparison, early out
5297 if (!K)
5298 return false;
5299
5300 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5301 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5302 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5303
5304 // If the constant on left and right side, or variable on left and right,
5305 // does not match, early out
5306 if (*K != KTmp || V != VTmp)
5307 return false;
5308
5309 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5310 SatK = *K;
5311 return true;
5312 }
5313
5314 return false;
5315}
5316
5317bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5318 if (VT == MVT::f32)
5319 return !Subtarget->hasVFP2Base();
5320 if (VT == MVT::f64)
5321 return !Subtarget->hasFP64();
5322 if (VT == MVT::f16)
5323 return !Subtarget->hasFullFP16();
5324 return false;
5325}
5326
5327SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5328 EVT VT = Op.getValueType();
5329 SDLoc dl(Op);
5330
5331 // Try to convert two saturating conditional selects into a single SSAT
5332 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5333 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5334 return SatValue;
5335
5336 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5337 // into more efficient bit operations, which is possible when k is 0 or -1
5338 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5339 // single instructions. On Thumb the shift and the bit operation will be two
5340 // instructions.
5341 // Only allow this transformation on full-width (32-bit) operations
5342 SDValue LowerSatConstant;
5343 SDValue SatValue;
5344 if (VT == MVT::i32 &&
5345 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5346 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5347 DAG.getConstant(31, dl, VT));
5348 if (isNullConstant(LowerSatConstant)) {
5349 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5350 DAG.getAllOnesConstant(dl, VT));
5351 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5352 } else if (isAllOnesConstant(LowerSatConstant))
5353 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5354 }
5355
5356 SDValue LHS = Op.getOperand(0);
5357 SDValue RHS = Op.getOperand(1);
5358 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5359 SDValue TrueVal = Op.getOperand(2);
5360 SDValue FalseVal = Op.getOperand(3);
5361 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5362 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5363 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
5364 if (Op.getValueType().isInteger()) {
5365
5366 // Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns.
5367 // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
5368 // (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
5369 // Both require less instructions than compare and conditional select.
5370 if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TrueVal && RHSC &&
5371 RHSC->isZero() && CFVal && CFVal->isZero() &&
5372 LHS.getValueType() == RHS.getValueType()) {
5373 EVT VT = LHS.getValueType();
5374 SDValue Shift =
5375 DAG.getNode(ISD::SRA, dl, VT, LHS,
5376 DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
5377
5378 if (CC == ISD::SETGT)
5379 Shift = DAG.getNOT(dl, Shift, VT);
5380
5381 return DAG.getNode(ISD::AND, dl, VT, LHS, Shift);
5382 }
5383 }
5384
5385 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5386 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5387 unsigned TVal = CTVal->getZExtValue();
5388 unsigned FVal = CFVal->getZExtValue();
5389 unsigned Opcode = 0;
5390
5391 if (TVal == ~FVal) {
5392 Opcode = ARMISD::CSINV;
5393 } else if (TVal == ~FVal + 1) {
5394 Opcode = ARMISD::CSNEG;
5395 } else if (TVal + 1 == FVal) {
5396 Opcode = ARMISD::CSINC;
5397 } else if (TVal == FVal + 1) {
5398 Opcode = ARMISD::CSINC;
5399 std::swap(TrueVal, FalseVal);
5400 std::swap(TVal, FVal);
5401 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5402 }
5403
5404 if (Opcode) {
5405 // If one of the constants is cheaper than another, materialise the
5406 // cheaper one and let the csel generate the other.
5407 if (Opcode != ARMISD::CSINC &&
5408 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5409 std::swap(TrueVal, FalseVal);
5410 std::swap(TVal, FVal);
5411 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5412 }
5413
5414 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5415 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5416 // -(-a) == a, but (a+1)+1 != a).
5417 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5418 std::swap(TrueVal, FalseVal);
5419 std::swap(TVal, FVal);
5420 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5421 }
5422
5423 // Drops F's value because we can get it by inverting/negating TVal.
5424 FalseVal = TrueVal;
5425
5426 SDValue ARMcc;
5427 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5428 EVT VT = TrueVal.getValueType();
5429 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5430 }
5431 }
5432
5433 if (isUnsupportedFloatingType(LHS.getValueType())) {
5434 softenSetCCOperands(DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5435
5436 // If softenSetCCOperands only returned one value, we should compare it to
5437 // zero.
5438 if (!RHS.getNode()) {
5439 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5440 CC = ISD::SETNE;
5441 }
5442 }
5443
5444 if (LHS.getValueType() == MVT::i32) {
5445 // Try to generate VSEL on ARMv8.
5446 // The VSEL instruction can't use all the usual ARM condition
5447 // codes: it only has two bits to select the condition code, so it's
5448 // constrained to use only GE, GT, VS and EQ.
5449 //
5450 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5451 // swap the operands of the previous compare instruction (effectively
5452 // inverting the compare condition, swapping 'less' and 'greater') and
5453 // sometimes need to swap the operands to the VSEL (which inverts the
5454 // condition in the sense of firing whenever the previous condition didn't)
5455 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5456 TrueVal.getValueType() == MVT::f32 ||
5457 TrueVal.getValueType() == MVT::f64)) {
5459 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5460 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5461 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5462 std::swap(TrueVal, FalseVal);
5463 }
5464 }
5465
5466 SDValue ARMcc;
5467 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5468 // Choose GE over PL, which vsel does now support
5469 if (ARMcc->getAsZExtVal() == ARMCC::PL)
5470 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5471 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5472 }
5473
5474 ARMCC::CondCodes CondCode, CondCode2;
5475 FPCCToARMCC(CC, CondCode, CondCode2);
5476
5477 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5478 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5479 // must use VSEL (limited condition codes), due to not having conditional f16
5480 // moves.
5481 if (Subtarget->hasFPARMv8Base() &&
5482 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5483 (TrueVal.getValueType() == MVT::f16 ||
5484 TrueVal.getValueType() == MVT::f32 ||
5485 TrueVal.getValueType() == MVT::f64)) {
5486 bool swpCmpOps = false;
5487 bool swpVselOps = false;
5488 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5489
5490 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5491 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5492 if (swpCmpOps)
5493 std::swap(LHS, RHS);
5494 if (swpVselOps)
5495 std::swap(TrueVal, FalseVal);
5496 }
5497 }
5498
5499 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5500 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5501 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
5502 if (CondCode2 != ARMCC::AL) {
5503 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5504 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG);
5505 }
5506 return Result;
5507}
5508
5509/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5510/// to morph to an integer compare sequence.
5511static bool canChangeToInt(SDValue Op, bool &SeenZero,
5512 const ARMSubtarget *Subtarget) {
5513 SDNode *N = Op.getNode();
5514 if (!N->hasOneUse())
5515 // Otherwise it requires moving the value from fp to integer registers.
5516 return false;
5517 if (!N->getNumValues())
5518 return false;
5519 EVT VT = Op.getValueType();
5520 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5521 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5522 // vmrs are very slow, e.g. cortex-a8.
5523 return false;
5524
5525 if (isFloatingPointZero(Op)) {
5526 SeenZero = true;
5527 return true;
5528 }
5529 return ISD::isNormalLoad(N);
5530}
5531
5534 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5535
5537 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5538 Ld->getPointerInfo(), Ld->getAlign(),
5539 Ld->getMemOperand()->getFlags());
5540
5541 llvm_unreachable("Unknown VFP cmp argument!");
5542}
5543
5545 SDValue &RetVal1, SDValue &RetVal2) {
5546 SDLoc dl(Op);
5547
5548 if (isFloatingPointZero(Op)) {
5549 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5550 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5551 return;
5552 }
5553
5554 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5555 SDValue Ptr = Ld->getBasePtr();
5556 RetVal1 =
5557 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5558 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5559
5560 EVT PtrType = Ptr.getValueType();
5561 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5562 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5563 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5564 Ld->getPointerInfo().getWithOffset(4),
5565 commonAlignment(Ld->getAlign(), 4),
5566 Ld->getMemOperand()->getFlags());
5567 return;
5568 }
5569
5570 llvm_unreachable("Unknown VFP cmp argument!");
5571}
5572
5573/// OptimizeVFPBrcond - With nnan, it's legal to optimize some
5574/// f32 and even f64 comparisons to integer ones.
5575SDValue
5576ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5577 SDValue Chain = Op.getOperand(0);
5578 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5579 SDValue LHS = Op.getOperand(2);
5580 SDValue RHS = Op.getOperand(3);
5581 SDValue Dest = Op.getOperand(4);
5582 SDLoc dl(Op);
5583
5584 bool LHSSeenZero = false;
5585 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5586 bool RHSSeenZero = false;
5587 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5588 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5589 // If unsafe fp math optimization is enabled and there are no other uses of
5590 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5591 // to an integer comparison.
5592 if (CC == ISD::SETOEQ)
5593 CC = ISD::SETEQ;
5594 else if (CC == ISD::SETUNE)
5595 CC = ISD::SETNE;
5596
5597 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5598 SDValue ARMcc;
5599 if (LHS.getValueType() == MVT::f32) {
5600 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5601 bitcastf32Toi32(LHS, DAG), Mask);
5602 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5603 bitcastf32Toi32(RHS, DAG), Mask);
5604 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5605 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5606 Cmp);
5607 }
5608
5609 SDValue LHS1, LHS2;
5610 SDValue RHS1, RHS2;
5611 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5612 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5613 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5614 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5616 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5617 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5618 return DAG.getNode(ARMISD::BCC_i64, dl, MVT::Other, Ops);
5619 }
5620
5621 return SDValue();
5622}
5623
5624SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5625 SDValue Chain = Op.getOperand(0);
5626 SDValue Cond = Op.getOperand(1);
5627 SDValue Dest = Op.getOperand(2);
5628 SDLoc dl(Op);
5629
5630 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5631 // instruction.
5632 unsigned Opc = Cond.getOpcode();
5633 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5634 !Subtarget->isThumb1Only();
5635 if (Cond.getResNo() == 1 &&
5636 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5637 Opc == ISD::USUBO || OptimizeMul)) {
5638 // Only lower legal XALUO ops.
5639 if (!isTypeLegal(Cond->getValueType(0)))
5640 return SDValue();
5641
5642 // The actual operation with overflow check.
5643 SDValue Value, OverflowCmp;
5644 SDValue ARMcc;
5645 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5646
5647 // Reverse the condition code.
5649 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5651 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5652
5653 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5654 OverflowCmp);
5655 }
5656
5657 return SDValue();
5658}
5659
5660SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5661 SDValue Chain = Op.getOperand(0);
5662 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5663 SDValue LHS = Op.getOperand(2);
5664 SDValue RHS = Op.getOperand(3);
5665 SDValue Dest = Op.getOperand(4);
5666 SDLoc dl(Op);
5667
5668 if (isUnsupportedFloatingType(LHS.getValueType())) {
5669 softenSetCCOperands(DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5670
5671 // If softenSetCCOperands only returned one value, we should compare it to
5672 // zero.
5673 if (!RHS.getNode()) {
5674 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5675 CC = ISD::SETNE;
5676 }
5677 }
5678
5679 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5680 // instruction.
5681 unsigned Opc = LHS.getOpcode();
5682 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5683 !Subtarget->isThumb1Only();
5684 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5685 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5686 Opc == ISD::USUBO || OptimizeMul) &&
5687 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5688 // Only lower legal XALUO ops.
5689 if (!isTypeLegal(LHS->getValueType(0)))
5690 return SDValue();
5691
5692 // The actual operation with overflow check.
5693 SDValue Value, OverflowCmp;
5694 SDValue ARMcc;
5695 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5696
5697 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5698 // Reverse the condition code.
5700 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5702 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5703 }
5704
5705 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
5706 OverflowCmp);
5707 }
5708
5709 if (LHS.getValueType() == MVT::i32) {
5710 SDValue ARMcc;
5711 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5712 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp);
5713 }
5714
5715 SDNodeFlags Flags = Op->getFlags();
5716 if ((getTargetMachine().Options.UnsafeFPMath || Flags.hasNoNaNs()) &&
5717 (DAG.getDenormalMode(MVT::f32) == DenormalMode::getIEEE() &&
5718 DAG.getDenormalMode(MVT::f64) == DenormalMode::getIEEE()) &&
5719 (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE ||
5720 CC == ISD::SETUNE)) {
5721 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5722 return Result;
5723 }
5724
5725 ARMCC::CondCodes CondCode, CondCode2;
5726 FPCCToARMCC(CC, CondCode, CondCode2);
5727
5728 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5729 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5730 SDValue Ops[] = {Chain, Dest, ARMcc, Cmp};
5731 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
5732 if (CondCode2 != ARMCC::AL) {
5733 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5734 SDValue Ops[] = {Res, Dest, ARMcc, Cmp};
5735 Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
5736 }
5737 return Res;
5738}
5739
5740SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5741 SDValue Chain = Op.getOperand(0);
5742 SDValue Table = Op.getOperand(1);
5743 SDValue Index = Op.getOperand(2);
5744 SDLoc dl(Op);
5745
5746 EVT PTy = getPointerTy(DAG.getDataLayout());
5747 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5748 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5749 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5750 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5751 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5752 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5753 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5754 // which does another jump to the destination. This also makes it easier
5755 // to translate it to TBB / TBH later (Thumb2 only).
5756 // FIXME: This might not work if the function is extremely large.
5757 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5758 Addr, Op.getOperand(2), JTI);
5759 }
5760 if (isPositionIndependent() || Subtarget->isROPI()) {
5761 Addr =
5762 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5764 Chain = Addr.getValue(1);
5765 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5766 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5767 } else {
5768 Addr =
5769 DAG.getLoad(PTy, dl, Chain, Addr,
5771 Chain = Addr.getValue(1);
5772 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5773 }
5774}
5775
5777 EVT VT = Op.getValueType();
5778 SDLoc dl(Op);
5779
5780 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5781 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5782 return Op;
5783 return DAG.UnrollVectorOp(Op.getNode());
5784 }
5785
5786 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5787
5788 EVT NewTy;
5789 const EVT OpTy = Op.getOperand(0).getValueType();
5790 if (OpTy == MVT::v4f32)
5791 NewTy = MVT::v4i32;
5792 else if (OpTy == MVT::v4f16 && HasFullFP16)
5793 NewTy = MVT::v4i16;
5794 else if (OpTy == MVT::v8f16 && HasFullFP16)
5795 NewTy = MVT::v8i16;
5796 else
5797 llvm_unreachable("Invalid type for custom lowering!");
5798
5799 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5800 return DAG.UnrollVectorOp(Op.getNode());
5801
5802 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5803 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5804}
5805
5806SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5807 EVT VT = Op.getValueType();
5808 if (VT.isVector())
5809 return LowerVectorFP_TO_INT(Op, DAG);
5810
5811 bool IsStrict = Op->isStrictFPOpcode();
5812 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5813
5814 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5815 RTLIB::Libcall LC;
5816 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5817 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5818 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5819 Op.getValueType());
5820 else
5821 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5822 Op.getValueType());
5823 SDLoc Loc(Op);
5824 MakeLibCallOptions CallOptions;
5825 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5827 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5828 CallOptions, Loc, Chain);
5829 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5830 }
5831
5832 // FIXME: Remove this when we have strict fp instruction selection patterns
5833 if (IsStrict) {
5834 SDLoc Loc(Op);
5835 SDValue Result =
5838 Loc, Op.getValueType(), SrcVal);
5839 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5840 }
5841
5842 return Op;
5843}
5844
5846 const ARMSubtarget *Subtarget) {
5847 EVT VT = Op.getValueType();
5848 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5849 EVT FromVT = Op.getOperand(0).getValueType();
5850
5851 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5852 return Op;
5853 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5854 Subtarget->hasFP64())
5855 return Op;
5856 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5857 Subtarget->hasFullFP16())
5858 return Op;
5859 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5860 Subtarget->hasMVEFloatOps())
5861 return Op;
5862 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5863 Subtarget->hasMVEFloatOps())
5864 return Op;
5865
5866 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5867 return SDValue();
5868
5869 SDLoc DL(Op);
5870 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
5871 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5872 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
5873 DAG.getValueType(VT.getScalarType()));
5874 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
5875 DAG.getConstant((1 << BW) - 1, DL, VT));
5876 if (IsSigned)
5877 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
5878 DAG.getSignedConstant(-(1 << BW), DL, VT));
5879 return Max;
5880}
5881
5883 EVT VT = Op.getValueType();
5884 SDLoc dl(Op);
5885
5886 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5887 if (VT.getVectorElementType() == MVT::f32)
5888 return Op;
5889 return DAG.UnrollVectorOp(Op.getNode());
5890 }
5891
5892 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
5893 Op.getOperand(0).getValueType() == MVT::v8i16) &&
5894 "Invalid type for custom lowering!");
5895
5896 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5897
5898 EVT DestVecType;
5899 if (VT == MVT::v4f32)
5900 DestVecType = MVT::v4i32;
5901 else if (VT == MVT::v4f16 && HasFullFP16)
5902 DestVecType = MVT::v4i16;
5903 else if (VT == MVT::v8f16 && HasFullFP16)
5904 DestVecType = MVT::v8i16;
5905 else
5906 return DAG.UnrollVectorOp(Op.getNode());
5907
5908 unsigned CastOpc;
5909 unsigned Opc;
5910 switch (Op.getOpcode()) {
5911 default: llvm_unreachable("Invalid opcode!");
5912 case ISD::SINT_TO_FP:
5913 CastOpc = ISD::SIGN_EXTEND;
5915 break;
5916 case ISD::UINT_TO_FP:
5917 CastOpc = ISD::ZERO_EXTEND;
5919 break;
5920 }
5921
5922 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5923 return DAG.getNode(Opc, dl, VT, Op);
5924}
5925
5926SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5927 EVT VT = Op.getValueType();
5928 if (VT.isVector())
5929 return LowerVectorINT_TO_FP(Op, DAG);
5930 if (isUnsupportedFloatingType(VT)) {
5931 RTLIB::Libcall LC;
5932 if (Op.getOpcode() == ISD::SINT_TO_FP)
5933 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
5934 Op.getValueType());
5935 else
5936 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
5937 Op.getValueType());
5938 MakeLibCallOptions CallOptions;
5939 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
5940 CallOptions, SDLoc(Op)).first;
5941 }
5942
5943 return Op;
5944}
5945
5946SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
5947 // Implement fcopysign with a fabs and a conditional fneg.
5948 SDValue Tmp0 = Op.getOperand(0);
5949 SDValue Tmp1 = Op.getOperand(1);
5950 SDLoc dl(Op);
5951 EVT VT = Op.getValueType();
5952 EVT SrcVT = Tmp1.getValueType();
5953 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
5954 Tmp0.getOpcode() == ARMISD::VMOVDRR;
5955 bool UseNEON = !InGPR && Subtarget->hasNEON();
5956
5957 if (UseNEON) {
5958 // Use VBSL to copy the sign bit.
5959 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
5960 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
5961 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
5962 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
5963 if (VT == MVT::f64)
5964 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5965 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
5966 DAG.getConstant(32, dl, MVT::i32));
5967 else /*if (VT == MVT::f32)*/
5968 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
5969 if (SrcVT == MVT::f32) {
5970 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
5971 if (VT == MVT::f64)
5972 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5973 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
5974 DAG.getConstant(32, dl, MVT::i32));
5975 } else if (VT == MVT::f32)
5976 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
5977 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
5978 DAG.getConstant(32, dl, MVT::i32));
5979 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
5980 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
5981
5983 dl, MVT::i32);
5984 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
5985 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
5986 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
5987
5988 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
5989 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
5990 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
5991 if (VT == MVT::f32) {
5992 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
5993 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
5994 DAG.getConstant(0, dl, MVT::i32));
5995 } else {
5996 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
5997 }
5998
5999 return Res;
6000 }
6001
6002 // Bitcast operand 1 to i32.
6003 if (SrcVT == MVT::f64)
6004 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6005 Tmp1).getValue(1);
6006 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6007
6008 // Or in the signbit with integer operations.
6009 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6010 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6011 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6012 if (VT == MVT::f32) {
6013 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6014 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6015 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6016 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6017 }
6018
6019 // f64: Or the high part with signbit and then combine two parts.
6020 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6021 Tmp0);
6022 SDValue Lo = Tmp0.getValue(0);
6023 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6024 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6025 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6026}
6027
6028SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6029 MachineFunction &MF = DAG.getMachineFunction();
6030 MachineFrameInfo &MFI = MF.getFrameInfo();
6031 MFI.setReturnAddressIsTaken(true);
6032
6033 EVT VT = Op.getValueType();
6034 SDLoc dl(Op);
6035 unsigned Depth = Op.getConstantOperandVal(0);
6036 if (Depth) {
6037 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6038 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6039 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6040 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6041 MachinePointerInfo());
6042 }
6043
6044 // Return LR, which contains the return address. Mark it an implicit live-in.
6045 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6046 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6047}
6048
6049SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6050 const ARMBaseRegisterInfo &ARI =
6051 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6052 MachineFunction &MF = DAG.getMachineFunction();
6053 MachineFrameInfo &MFI = MF.getFrameInfo();
6054 MFI.setFrameAddressIsTaken(true);
6055
6056 EVT VT = Op.getValueType();
6057 SDLoc dl(Op); // FIXME probably not meaningful
6058 unsigned Depth = Op.getConstantOperandVal(0);
6059 Register FrameReg = ARI.getFrameRegister(MF);
6060 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6061 while (Depth--)
6062 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6063 MachinePointerInfo());
6064 return FrameAddr;
6065}
6066
6067// FIXME? Maybe this could be a TableGen attribute on some registers and
6068// this table could be generated automatically from RegInfo.
6069Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6070 const MachineFunction &MF) const {
6071 return StringSwitch<Register>(RegName)
6072 .Case("sp", ARM::SP)
6073 .Default(Register());
6074}
6075
6076// Result is 64 bit value so split into two 32 bit values and return as a
6077// pair of values.
6079 SelectionDAG &DAG) {
6080 SDLoc DL(N);
6081
6082 // This function is only supposed to be called for i64 type destination.
6083 assert(N->getValueType(0) == MVT::i64
6084 && "ExpandREAD_REGISTER called for non-i64 type result.");
6085
6087 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6088 N->getOperand(0),
6089 N->getOperand(1));
6090
6091 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6092 Read.getValue(1)));
6093 Results.push_back(Read.getValue(2)); // Chain
6094}
6095
6096/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6097/// When \p DstVT, the destination type of \p BC, is on the vector
6098/// register bank and the source of bitcast, \p Op, operates on the same bank,
6099/// it might be possible to combine them, such that everything stays on the
6100/// vector register bank.
6101/// \p return The node that would replace \p BT, if the combine
6102/// is possible.
6104 SelectionDAG &DAG) {
6105 SDValue Op = BC->getOperand(0);
6106 EVT DstVT = BC->getValueType(0);
6107
6108 // The only vector instruction that can produce a scalar (remember,
6109 // since the bitcast was about to be turned into VMOVDRR, the source
6110 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6111 // Moreover, we can do this combine only if there is one use.
6112 // Finally, if the destination type is not a vector, there is not
6113 // much point on forcing everything on the vector bank.
6114 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6115 !Op.hasOneUse())
6116 return SDValue();
6117
6118 // If the index is not constant, we will introduce an additional
6119 // multiply that will stick.
6120 // Give up in that case.
6121 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6122 if (!Index)
6123 return SDValue();
6124 unsigned DstNumElt = DstVT.getVectorNumElements();
6125
6126 // Compute the new index.
6127 const APInt &APIntIndex = Index->getAPIntValue();
6128 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6129 NewIndex *= APIntIndex;
6130 // Check if the new constant index fits into i32.
6131 if (NewIndex.getBitWidth() > 32)
6132 return SDValue();
6133
6134 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6135 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6136 SDLoc dl(Op);
6137 SDValue ExtractSrc = Op.getOperand(0);
6138 EVT VecVT = EVT::getVectorVT(
6139 *DAG.getContext(), DstVT.getScalarType(),
6140 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6141 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6142 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6143 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6144}
6145
6146/// ExpandBITCAST - If the target supports VFP, this function is called to
6147/// expand a bit convert where either the source or destination type is i64 to
6148/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6149/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6150/// vectors), since the legalizer won't know what to do with that.
6151SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6152 const ARMSubtarget *Subtarget) const {
6153 SDLoc dl(N);
6154 SDValue Op = N->getOperand(0);
6155
6156 // This function is only supposed to be called for i16 and i64 types, either
6157 // as the source or destination of the bit convert.
6158 EVT SrcVT = Op.getValueType();
6159 EVT DstVT = N->getValueType(0);
6160
6161 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6162 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6163 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6164 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6165
6166 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6167 (SrcVT == MVT::f16 || SrcVT == MVT::bf16)) {
6168 if (Subtarget->hasFullFP16() && !Subtarget->hasBF16())
6169 Op = DAG.getBitcast(MVT::f16, Op);
6170 return DAG.getNode(
6171 ISD::TRUNCATE, SDLoc(N), DstVT,
6172 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6173 }
6174
6175 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6176 return SDValue();
6177
6178 // Turn i64->f64 into VMOVDRR.
6179 if (SrcVT == MVT::i64 && isTypeLegal(DstVT)) {
6180 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6181 // if we can combine the bitcast with its source.
6183 return Val;
6184 SDValue Lo, Hi;
6185 std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
6186 return DAG.getNode(ISD::BITCAST, dl, DstVT,
6187 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6188 }
6189
6190 // Turn f64->i64 into VMOVRRD.
6191 if (DstVT == MVT::i64 && isTypeLegal(SrcVT)) {
6192 SDValue Cvt;
6193 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6194 SrcVT.getVectorNumElements() > 1)
6195 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6196 DAG.getVTList(MVT::i32, MVT::i32),
6197 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6198 else
6199 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6200 DAG.getVTList(MVT::i32, MVT::i32), Op);
6201 // Merge the pieces into a single i64 value.
6202 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6203 }
6204
6205 return SDValue();
6206}
6207
6208/// getZeroVector - Returns a vector of specified type with all zero elements.
6209/// Zero vectors are used to represent vector negation and in those cases
6210/// will be implemented with the NEON VNEG instruction. However, VNEG does
6211/// not support i64 elements, so sometimes the zero vectors will need to be
6212/// explicitly constructed. Regardless, use a canonical VMOV to create the
6213/// zero vector.
6214static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6215 assert(VT.isVector() && "Expected a vector type");
6216 // The canonical modified immediate encoding of a zero vector is....0!
6217 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6218 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6219 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6220 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6221}
6222
6223/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6224/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6225SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6226 SelectionDAG &DAG) const {
6227 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6228 EVT VT = Op.getValueType();
6229 unsigned VTBits = VT.getSizeInBits();
6230 SDLoc dl(Op);
6231 SDValue ShOpLo = Op.getOperand(0);
6232 SDValue ShOpHi = Op.getOperand(1);
6233 SDValue ShAmt = Op.getOperand(2);
6234 SDValue ARMcc;
6235 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6236
6237 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
6238
6239 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6240 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6241 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6242 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6243 DAG.getConstant(VTBits, dl, MVT::i32));
6244 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6245 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6246 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6247 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6248 ISD::SETGE, ARMcc, DAG, dl);
6249 SDValue Lo =
6250 DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo);
6251
6252 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6253 SDValue HiBigShift = Opc == ISD::SRA
6254 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6255 DAG.getConstant(VTBits - 1, dl, VT))
6256 : DAG.getConstant(0, dl, VT);
6257 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6258 ISD::SETGE, ARMcc, DAG, dl);
6259 SDValue Hi =
6260 DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6261
6262 SDValue Ops[2] = { Lo, Hi };
6263 return DAG.getMergeValues(Ops, dl);
6264}
6265
6266/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6267/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6268SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6269 SelectionDAG &DAG) const {
6270 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6271 EVT VT = Op.getValueType();
6272 unsigned VTBits = VT.getSizeInBits();
6273 SDLoc dl(Op);
6274 SDValue ShOpLo = Op.getOperand(0);
6275 SDValue ShOpHi = Op.getOperand(1);
6276 SDValue ShAmt = Op.getOperand(2);
6277 SDValue ARMcc;
6278
6279 assert(Op.getOpcode() == ISD::SHL_PARTS);
6280 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6281 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6282 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6283 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6284 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6285
6286 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6287 DAG.getConstant(VTBits, dl, MVT::i32));
6288 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6289 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6290 ISD::SETGE, ARMcc, DAG, dl);
6291 SDValue Hi =
6292 DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
6293
6294 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6295 ISD::SETGE, ARMcc, DAG, dl);
6296 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6297 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6298 DAG.getConstant(0, dl, VT), ARMcc, CmpLo);
6299
6300 SDValue Ops[2] = { Lo, Hi };
6301 return DAG.getMergeValues(Ops, dl);
6302}
6303
6304SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6305 SelectionDAG &DAG) const {
6306 // The rounding mode is in bits 23:22 of the FPSCR.
6307 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6308 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6309 // so that the shift + and get folded into a bitfield extract.
6310 SDLoc dl(Op);
6311 SDValue Chain = Op.getOperand(0);
6312 SDValue Ops[] = {Chain,
6313 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6314
6315 SDValue FPSCR =
6316 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6317 Chain = FPSCR.getValue(1);
6318 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6319 DAG.getConstant(1U << 22, dl, MVT::i32));
6320 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6321 DAG.getConstant(22, dl, MVT::i32));
6322 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6323 DAG.getConstant(3, dl, MVT::i32));
6324 return DAG.getMergeValues({And, Chain}, dl);
6325}
6326
6327SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6328 SelectionDAG &DAG) const {
6329 SDLoc DL(Op);
6330 SDValue Chain = Op->getOperand(0);
6331 SDValue RMValue = Op->getOperand(1);
6332
6333 // The rounding mode is in bits 23:22 of the FPSCR.
6334 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6335 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6336 // ((arg - 1) & 3) << 22).
6337 //
6338 // It is expected that the argument of llvm.set.rounding is within the
6339 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6340 // responsibility of the code generated llvm.set.rounding to ensure this
6341 // condition.
6342
6343 // Calculate new value of FPSCR[23:22].
6344 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6345 DAG.getConstant(1, DL, MVT::i32));
6346 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6347 DAG.getConstant(0x3, DL, MVT::i32));
6348 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6349 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6350
6351 // Get current value of FPSCR.
6352 SDValue Ops[] = {Chain,
6353 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6354 SDValue FPSCR =
6355 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6356 Chain = FPSCR.getValue(1);
6357 FPSCR = FPSCR.getValue(0);
6358
6359 // Put new rounding mode into FPSCR[23:22].
6360 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6361 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6362 DAG.getConstant(RMMask, DL, MVT::i32));
6363 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6364 SDValue Ops2[] = {
6365 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6366 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6367}
6368
6369SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
6370 SelectionDAG &DAG) const {
6371 SDLoc DL(Op);
6372 SDValue Chain = Op->getOperand(0);
6373 SDValue Mode = Op->getOperand(1);
6374
6375 // Generate nodes to build:
6376 // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
6377 SDValue Ops[] = {Chain,
6378 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6379 SDValue FPSCR =
6380 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6381 Chain = FPSCR.getValue(1);
6382 FPSCR = FPSCR.getValue(0);
6383
6384 SDValue FPSCRMasked =
6385 DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6386 DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
6387 SDValue InputMasked =
6388 DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
6389 DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
6390 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
6391
6392 SDValue Ops2[] = {
6393 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6394 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6395}
6396
6397SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
6398 SelectionDAG &DAG) const {
6399 SDLoc DL(Op);
6400 SDValue Chain = Op->getOperand(0);
6401
6402 // To get the default FP mode all control bits are cleared:
6403 // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
6404 SDValue Ops[] = {Chain,
6405 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6406 SDValue FPSCR =
6407 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6408 Chain = FPSCR.getValue(1);
6409 FPSCR = FPSCR.getValue(0);
6410
6411 SDValue FPSCRMasked = DAG.getNode(
6412 ISD::AND, DL, MVT::i32, FPSCR,
6414 SDValue Ops2[] = {Chain,
6415 DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
6416 FPSCRMasked};
6417 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6418}
6419
6421 const ARMSubtarget *ST) {
6422 SDLoc dl(N);
6423 EVT VT = N->getValueType(0);
6424 if (VT.isVector() && ST->hasNEON()) {
6425
6426 // Compute the least significant set bit: LSB = X & -X
6427 SDValue X = N->getOperand(0);
6428 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6429 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6430
6431 EVT ElemTy = VT.getVectorElementType();
6432
6433 if (ElemTy == MVT::i8) {
6434 // Compute with: cttz(x) = ctpop(lsb - 1)
6435 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6436 DAG.getTargetConstant(1, dl, ElemTy));
6437 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6438 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6439 }
6440
6441 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6442 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6443 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6444 unsigned NumBits = ElemTy.getSizeInBits();
6445 SDValue WidthMinus1 =
6446 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6447 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6448 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6449 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6450 }
6451
6452 // Compute with: cttz(x) = ctpop(lsb - 1)
6453
6454 // Compute LSB - 1.
6455 SDValue Bits;
6456 if (ElemTy == MVT::i64) {
6457 // Load constant 0xffff'ffff'ffff'ffff to register.
6458 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6459 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6460 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6461 } else {
6462 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6463 DAG.getTargetConstant(1, dl, ElemTy));
6464 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6465 }
6466 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6467 }
6468
6469 if (!ST->hasV6T2Ops())
6470 return SDValue();
6471
6472 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6473 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6474}
6475
6477 const ARMSubtarget *ST) {
6478 EVT VT = N->getValueType(0);
6479 SDLoc DL(N);
6480
6481 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6482 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6483 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6484 "Unexpected type for custom ctpop lowering");
6485
6486 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6487 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6488 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6489 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6490
6491 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6492 unsigned EltSize = 8;
6493 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6494 while (EltSize != VT.getScalarSizeInBits()) {
6496 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6497 TLI.getPointerTy(DAG.getDataLayout())));
6498 Ops.push_back(Res);
6499
6500 EltSize *= 2;
6501 NumElts /= 2;
6502 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6503 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6504 }
6505
6506 return Res;
6507}
6508
6509/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6510/// operand of a vector shift operation, where all the elements of the
6511/// build_vector must have the same constant integer value.
6512static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6513 // Ignore bit_converts.
6514 while (Op.getOpcode() == ISD::BITCAST)
6515 Op = Op.getOperand(0);
6517 APInt SplatBits, SplatUndef;
6518 unsigned SplatBitSize;
6519 bool HasAnyUndefs;
6520 if (!BVN ||
6521 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6522 ElementBits) ||
6523 SplatBitSize > ElementBits)
6524 return false;
6525 Cnt = SplatBits.getSExtValue();
6526 return true;
6527}
6528
6529/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6530/// operand of a vector shift left operation. That value must be in the range:
6531/// 0 <= Value < ElementBits for a left shift; or
6532/// 0 <= Value <= ElementBits for a long left shift.
6533static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6534 assert(VT.isVector() && "vector shift count is not a vector type");
6535 int64_t ElementBits = VT.getScalarSizeInBits();
6536 if (!getVShiftImm(Op, ElementBits, Cnt))
6537 return false;
6538 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6539}
6540
6541/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6542/// operand of a vector shift right operation. For a shift opcode, the value
6543/// is positive, but for an intrinsic the value count must be negative. The
6544/// absolute value must be in the range:
6545/// 1 <= |Value| <= ElementBits for a right shift; or
6546/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6547static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6548 int64_t &Cnt) {
6549 assert(VT.isVector() && "vector shift count is not a vector type");
6550 int64_t ElementBits = VT.getScalarSizeInBits();
6551 if (!getVShiftImm(Op, ElementBits, Cnt))
6552 return false;
6553 if (!isIntrinsic)
6554 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6555 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6556 Cnt = -Cnt;
6557 return true;
6558 }
6559 return false;
6560}
6561
6563 const ARMSubtarget *ST) {
6564 EVT VT = N->getValueType(0);
6565 SDLoc dl(N);
6566 int64_t Cnt;
6567
6568 if (!VT.isVector())
6569 return SDValue();
6570
6571 // We essentially have two forms here. Shift by an immediate and shift by a
6572 // vector register (there are also shift by a gpr, but that is just handled
6573 // with a tablegen pattern). We cannot easily match shift by an immediate in
6574 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6575 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6576 // signed or unsigned, and a negative shift indicates a shift right).
6577 if (N->getOpcode() == ISD::SHL) {
6578 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6579 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6580 DAG.getConstant(Cnt, dl, MVT::i32));
6581 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6582 N->getOperand(1));
6583 }
6584
6585 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6586 "unexpected vector shift opcode");
6587
6588 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6589 unsigned VShiftOpc =
6590 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6591 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6592 DAG.getConstant(Cnt, dl, MVT::i32));
6593 }
6594
6595 // Other right shifts we don't have operations for (we use a shift left by a
6596 // negative number).
6597 EVT ShiftVT = N->getOperand(1).getValueType();
6598 SDValue NegatedCount = DAG.getNode(
6599 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6600 unsigned VShiftOpc =
6601 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6602 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6603}
6604
6606 const ARMSubtarget *ST) {
6607 EVT VT = N->getValueType(0);
6608 SDLoc dl(N);
6609
6610 // We can get here for a node like i32 = ISD::SHL i32, i64
6611 if (VT != MVT::i64)
6612 return SDValue();
6613
6614 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6615 N->getOpcode() == ISD::SHL) &&
6616 "Unknown shift to lower!");
6617
6618 unsigned ShOpc = N->getOpcode();
6619 if (ST->hasMVEIntegerOps()) {
6620 SDValue ShAmt = N->getOperand(1);
6621 unsigned ShPartsOpc = ARMISD::LSLL;
6623
6624 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6625 // then do the default optimisation
6626 if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6627 (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6628 return SDValue();
6629
6630 // Extract the lower 32 bits of the shift amount if it's not an i32
6631 if (ShAmt->getValueType(0) != MVT::i32)
6632 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6633
6634 if (ShOpc == ISD::SRL) {
6635 if (!Con)
6636 // There is no t2LSRLr instruction so negate and perform an lsll if the
6637 // shift amount is in a register, emulating a right shift.
6638 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6639 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6640 else
6641 // Else generate an lsrl on the immediate shift amount
6642 ShPartsOpc = ARMISD::LSRL;
6643 } else if (ShOpc == ISD::SRA)
6644 ShPartsOpc = ARMISD::ASRL;
6645
6646 // Split Lower/Upper 32 bits of the destination/source
6647 SDValue Lo, Hi;
6648 std::tie(Lo, Hi) =
6649 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6650 // Generate the shift operation as computed above
6651 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6652 ShAmt);
6653 // The upper 32 bits come from the second return value of lsll
6654 Hi = SDValue(Lo.getNode(), 1);
6655 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6656 }
6657
6658 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6659 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6660 return SDValue();
6661
6662 // If we are in thumb mode, we don't have RRX.
6663 if (ST->isThumb1Only())
6664 return SDValue();
6665
6666 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6667 SDValue Lo, Hi;
6668 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6669
6670 // First, build a LSRS1/ASRS1 op, which shifts the top part by one and
6671 // captures the shifted out bit into a carry flag.
6672 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;
6673 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);
6674
6675 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6676 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6677
6678 // Merge the pieces into a single i64 value.
6679 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6680}
6681
6683 const ARMSubtarget *ST) {
6684 bool Invert = false;
6685 bool Swap = false;
6686 unsigned Opc = ARMCC::AL;
6687
6688 SDValue Op0 = Op.getOperand(0);
6689 SDValue Op1 = Op.getOperand(1);
6690 SDValue CC = Op.getOperand(2);
6691 EVT VT = Op.getValueType();
6692 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6693 SDLoc dl(Op);
6694
6695 EVT CmpVT;
6696 if (ST->hasNEON())
6698 else {
6699 assert(ST->hasMVEIntegerOps() &&
6700 "No hardware support for integer vector comparison!");
6701
6702 if (Op.getValueType().getVectorElementType() != MVT::i1)
6703 return SDValue();
6704
6705 // Make sure we expand floating point setcc to scalar if we do not have
6706 // mve.fp, so that we can handle them from there.
6707 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6708 return SDValue();
6709
6710 CmpVT = VT;
6711 }
6712
6713 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6714 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6715 // Special-case integer 64-bit equality comparisons. They aren't legal,
6716 // but they can be lowered with a few vector instructions.
6717 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6718 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6719 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6720 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6721 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6722 DAG.getCondCode(ISD::SETEQ));
6723 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6724 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6725 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6726 if (SetCCOpcode == ISD::SETNE)
6727 Merged = DAG.getNOT(dl, Merged, CmpVT);
6728 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6729 return Merged;
6730 }
6731
6732 if (CmpVT.getVectorElementType() == MVT::i64)
6733 // 64-bit comparisons are not legal in general.
6734 return SDValue();
6735
6736 if (Op1.getValueType().isFloatingPoint()) {
6737 switch (SetCCOpcode) {
6738 default: llvm_unreachable("Illegal FP comparison");
6739 case ISD::SETUNE:
6740 case ISD::SETNE:
6741 if (ST->hasMVEFloatOps()) {
6742 Opc = ARMCC::NE; break;
6743 } else {
6744 Invert = true; [[fallthrough]];
6745 }
6746 case ISD::SETOEQ:
6747 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6748 case ISD::SETOLT:
6749 case ISD::SETLT: Swap = true; [[fallthrough]];
6750 case ISD::SETOGT:
6751 case ISD::SETGT: Opc = ARMCC::GT; break;
6752 case ISD::SETOLE:
6753 case ISD::SETLE: Swap = true; [[fallthrough]];
6754 case ISD::SETOGE:
6755 case ISD::SETGE: Opc = ARMCC::GE; break;
6756 case ISD::SETUGE: Swap = true; [[fallthrough]];
6757 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6758 case ISD::SETUGT: Swap = true; [[fallthrough]];
6759 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6760 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6761 case ISD::SETONE: {
6762 // Expand this to (OLT | OGT).
6763 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6764 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6765 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6766 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6767 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6768 if (Invert)
6769 Result = DAG.getNOT(dl, Result, VT);
6770 return Result;
6771 }
6772 case ISD::SETUO: Invert = true; [[fallthrough]];
6773 case ISD::SETO: {
6774 // Expand this to (OLT | OGE).
6775 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6776 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6777 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6778 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6779 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6780 if (Invert)
6781 Result = DAG.getNOT(dl, Result, VT);
6782 return Result;
6783 }
6784 }
6785 } else {
6786 // Integer comparisons.
6787 switch (SetCCOpcode) {
6788 default: llvm_unreachable("Illegal integer comparison");
6789 case ISD::SETNE:
6790 if (ST->hasMVEIntegerOps()) {
6791 Opc = ARMCC::NE; break;
6792 } else {
6793 Invert = true; [[fallthrough]];
6794 }
6795 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6796 case ISD::SETLT: Swap = true; [[fallthrough]];
6797 case ISD::SETGT: Opc = ARMCC::GT; break;
6798 case ISD::SETLE: Swap = true; [[fallthrough]];
6799 case ISD::SETGE: Opc = ARMCC::GE; break;
6800 case ISD::SETULT: Swap = true; [[fallthrough]];
6801 case ISD::SETUGT: Opc = ARMCC::HI; break;
6802 case ISD::SETULE: Swap = true; [[fallthrough]];
6803 case ISD::SETUGE: Opc = ARMCC::HS; break;
6804 }
6805
6806 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6807 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6808 SDValue AndOp;
6810 AndOp = Op0;
6811 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6812 AndOp = Op1;
6813
6814 // Ignore bitconvert.
6815 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6816 AndOp = AndOp.getOperand(0);
6817
6818 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6819 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6820 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6821 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6822 if (!Invert)
6823 Result = DAG.getNOT(dl, Result, VT);
6824 return Result;
6825 }
6826 }
6827 }
6828
6829 if (Swap)
6830 std::swap(Op0, Op1);
6831
6832 // If one of the operands is a constant vector zero, attempt to fold the
6833 // comparison to a specialized compare-against-zero form.
6835 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6836 Opc == ARMCC::NE)) {
6837 if (Opc == ARMCC::GE)
6838 Opc = ARMCC::LE;
6839 else if (Opc == ARMCC::GT)
6840 Opc = ARMCC::LT;
6841 std::swap(Op0, Op1);
6842 }
6843
6844 SDValue Result;
6846 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6847 Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6848 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6849 DAG.getConstant(Opc, dl, MVT::i32));
6850 else
6851 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6852 DAG.getConstant(Opc, dl, MVT::i32));
6853
6854 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6855
6856 if (Invert)
6857 Result = DAG.getNOT(dl, Result, VT);
6858
6859 return Result;
6860}
6861
6863 SDValue LHS = Op.getOperand(0);
6864 SDValue RHS = Op.getOperand(1);
6865 SDValue Carry = Op.getOperand(2);
6866 SDValue Cond = Op.getOperand(3);
6867 SDLoc DL(Op);
6868
6869 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
6870
6871 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
6872 // have to invert the carry first.
6873 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6874 DAG.getConstant(1, DL, MVT::i32), Carry);
6875 // This converts the boolean value carry into the carry flag.
6876 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6877
6878 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6879 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6880
6881 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6882 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6883 SDValue ARMcc = DAG.getConstant(
6884 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6885 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6886 Cmp.getValue(1));
6887}
6888
6889/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
6890/// valid vector constant for a NEON or MVE instruction with a "modified
6891/// immediate" operand (e.g., VMOV). If so, return the encoded value.
6892static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
6893 unsigned SplatBitSize, SelectionDAG &DAG,
6894 const SDLoc &dl, EVT &VT, EVT VectorVT,
6895 VMOVModImmType type) {
6896 unsigned OpCmode, Imm;
6897 bool is128Bits = VectorVT.is128BitVector();
6898
6899 // SplatBitSize is set to the smallest size that splats the vector, so a
6900 // zero vector will always have SplatBitSize == 8. However, NEON modified
6901 // immediate instructions others than VMOV do not support the 8-bit encoding
6902 // of a zero vector, and the default encoding of zero is supposed to be the
6903 // 32-bit version.
6904 if (SplatBits == 0)
6905 SplatBitSize = 32;
6906
6907 switch (SplatBitSize) {
6908 case 8:
6909 if (type != VMOVModImm)
6910 return SDValue();
6911 // Any 1-byte value is OK. Op=0, Cmode=1110.
6912 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
6913 OpCmode = 0xe;
6914 Imm = SplatBits;
6915 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6916 break;
6917
6918 case 16:
6919 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
6920 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6921 if ((SplatBits & ~0xff) == 0) {
6922 // Value = 0x00nn: Op=x, Cmode=100x.
6923 OpCmode = 0x8;
6924 Imm = SplatBits;
6925 break;
6926 }
6927 if ((SplatBits & ~0xff00) == 0) {
6928 // Value = 0xnn00: Op=x, Cmode=101x.
6929 OpCmode = 0xa;
6930 Imm = SplatBits >> 8;
6931 break;
6932 }
6933 return SDValue();
6934
6935 case 32:
6936 // NEON's 32-bit VMOV supports splat values where:
6937 // * only one byte is nonzero, or
6938 // * the least significant byte is 0xff and the second byte is nonzero, or
6939 // * the least significant 2 bytes are 0xff and the third is nonzero.
6940 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
6941 if ((SplatBits & ~0xff) == 0) {
6942 // Value = 0x000000nn: Op=x, Cmode=000x.
6943 OpCmode = 0;
6944 Imm = SplatBits;
6945 break;
6946 }
6947 if ((SplatBits & ~0xff00) == 0) {
6948 // Value = 0x0000nn00: Op=x, Cmode=001x.
6949 OpCmode = 0x2;
6950 Imm = SplatBits >> 8;
6951 break;
6952 }
6953 if ((SplatBits & ~0xff0000) == 0) {
6954 // Value = 0x00nn0000: Op=x, Cmode=010x.
6955 OpCmode = 0x4;
6956 Imm = SplatBits >> 16;
6957 break;
6958 }
6959 if ((SplatBits & ~0xff000000) == 0) {
6960 // Value = 0xnn000000: Op=x, Cmode=011x.
6961 OpCmode = 0x6;
6962 Imm = SplatBits >> 24;
6963 break;
6964 }
6965
6966 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
6967 if (type == OtherModImm) return SDValue();
6968
6969 if ((SplatBits & ~0xffff) == 0 &&
6970 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
6971 // Value = 0x0000nnff: Op=x, Cmode=1100.
6972 OpCmode = 0xc;
6973 Imm = SplatBits >> 8;
6974 break;
6975 }
6976
6977 // cmode == 0b1101 is not supported for MVE VMVN
6978 if (type == MVEVMVNModImm)
6979 return SDValue();
6980
6981 if ((SplatBits & ~0xffffff) == 0 &&
6982 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
6983 // Value = 0x00nnffff: Op=x, Cmode=1101.
6984 OpCmode = 0xd;
6985 Imm = SplatBits >> 16;
6986 break;
6987 }
6988
6989 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
6990 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
6991 // VMOV.I32. A (very) minor optimization would be to replicate the value
6992 // and fall through here to test for a valid 64-bit splat. But, then the
6993 // caller would also need to check and handle the change in size.
6994 return SDValue();
6995
6996 case 64: {
6997 if (type != VMOVModImm)
6998 return SDValue();
6999 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7000 uint64_t BitMask = 0xff;
7001 unsigned ImmMask = 1;
7002 Imm = 0;
7003 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7004 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7005 Imm |= ImmMask;
7006 } else if ((SplatBits & BitMask) != 0) {
7007 return SDValue();
7008 }
7009 BitMask <<= 8;
7010 ImmMask <<= 1;
7011 }
7012
7013 // Op=1, Cmode=1110.
7014 OpCmode = 0x1e;
7015 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7016 break;
7017 }
7018
7019 default:
7020 llvm_unreachable("unexpected size for isVMOVModifiedImm");
7021 }
7022
7023 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7024 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7025}
7026
7027SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7028 const ARMSubtarget *ST) const {
7029 EVT VT = Op.getValueType();
7030 bool IsDouble = (VT == MVT::f64);
7031 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7032 const APFloat &FPVal = CFP->getValueAPF();
7033
7034 // Prevent floating-point constants from using literal loads
7035 // when execute-only is enabled.
7036 if (ST->genExecuteOnly()) {
7037 // We shouldn't trigger this for v6m execute-only
7038 assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
7039 "Unexpected architecture");
7040
7041 // If we can represent the constant as an immediate, don't lower it
7042 if (isFPImmLegal(FPVal, VT))
7043 return Op;
7044 // Otherwise, construct as integer, and move to float register
7045 APInt INTVal = FPVal.bitcastToAPInt();
7046 SDLoc DL(CFP);
7047 switch (VT.getSimpleVT().SimpleTy) {
7048 default:
7049 llvm_unreachable("Unknown floating point type!");
7050 break;
7051 case MVT::f64: {
7052 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7053 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7054 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7055 }
7056 case MVT::f32:
7057 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7058 DAG.getConstant(INTVal, DL, MVT::i32));
7059 }
7060 }
7061
7062 if (!ST->hasVFP3Base())
7063 return SDValue();
7064
7065 // Use the default (constant pool) lowering for double constants when we have
7066 // an SP-only FPU
7067 if (IsDouble && !Subtarget->hasFP64())
7068 return SDValue();
7069
7070 // Try splatting with a VMOV.f32...
7071 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7072
7073 if (ImmVal != -1) {
7074 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7075 // We have code in place to select a valid ConstantFP already, no need to
7076 // do any mangling.
7077 return Op;
7078 }
7079
7080 // It's a float and we are trying to use NEON operations where
7081 // possible. Lower it to a splat followed by an extract.
7082 SDLoc DL(Op);
7083 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7084 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7085 NewVal);
7086 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7087 DAG.getConstant(0, DL, MVT::i32));
7088 }
7089
7090 // The rest of our options are NEON only, make sure that's allowed before
7091 // proceeding..
7092 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7093 return SDValue();
7094
7095 EVT VMovVT;
7096 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7097
7098 // It wouldn't really be worth bothering for doubles except for one very
7099 // important value, which does happen to match: 0.0. So make sure we don't do
7100 // anything stupid.
7101 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7102 return SDValue();
7103
7104 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7105 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7106 VMovVT, VT, VMOVModImm);
7107 if (NewVal != SDValue()) {
7108 SDLoc DL(Op);
7109 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7110 NewVal);
7111 if (IsDouble)
7112 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7113
7114 // It's a float: cast and extract a vector element.
7115 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7116 VecConstant);
7117 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7118 DAG.getConstant(0, DL, MVT::i32));
7119 }
7120
7121 // Finally, try a VMVN.i32
7122 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7123 VT, VMVNModImm);
7124 if (NewVal != SDValue()) {
7125 SDLoc DL(Op);
7126 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7127
7128 if (IsDouble)
7129 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7130
7131 // It's a float: cast and extract a vector element.
7132 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7133 VecConstant);
7134 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7135 DAG.getConstant(0, DL, MVT::i32));
7136 }
7137
7138 return SDValue();
7139}
7140
7141// check if an VEXT instruction can handle the shuffle mask when the
7142// vector sources of the shuffle are the same.
7143static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7144 unsigned NumElts = VT.getVectorNumElements();
7145
7146 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7147 if (M[0] < 0)
7148 return false;
7149
7150 Imm = M[0];
7151
7152 // If this is a VEXT shuffle, the immediate value is the index of the first
7153 // element. The other shuffle indices must be the successive elements after
7154 // the first one.
7155 unsigned ExpectedElt = Imm;
7156 for (unsigned i = 1; i < NumElts; ++i) {
7157 // Increment the expected index. If it wraps around, just follow it
7158 // back to index zero and keep going.
7159 ++ExpectedElt;
7160 if (ExpectedElt == NumElts)
7161 ExpectedElt = 0;
7162
7163 if (M[i] < 0) continue; // ignore UNDEF indices
7164 if (ExpectedElt != static_cast<unsigned>(M[i]))
7165 return false;
7166 }
7167
7168 return true;
7169}
7170
7171static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7172 bool &ReverseVEXT, unsigned &Imm) {
7173 unsigned NumElts = VT.getVectorNumElements();
7174 ReverseVEXT = false;
7175
7176 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7177 if (M[0] < 0)
7178 return false;
7179
7180 Imm = M[0];
7181
7182 // If this is a VEXT shuffle, the immediate value is the index of the first
7183 // element. The other shuffle indices must be the successive elements after
7184 // the first one.
7185 unsigned ExpectedElt = Imm;
7186 for (unsigned i = 1; i < NumElts; ++i) {
7187 // Increment the expected index. If it wraps around, it may still be
7188 // a VEXT but the source vectors must be swapped.
7189 ExpectedElt += 1;
7190 if (ExpectedElt == NumElts * 2) {
7191 ExpectedElt = 0;
7192 ReverseVEXT = true;
7193 }
7194
7195 if (M[i] < 0) continue; // ignore UNDEF indices
7196 if (ExpectedElt != static_cast<unsigned>(M[i]))
7197 return false;
7198 }
7199
7200 // Adjust the index value if the source operands will be swapped.
7201 if (ReverseVEXT)
7202 Imm -= NumElts;
7203
7204 return true;
7205}
7206
7207static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7208 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7209 // range, then 0 is placed into the resulting vector. So pretty much any mask
7210 // of 8 elements can work here.
7211 return VT == MVT::v8i8 && M.size() == 8;
7212}
7213
7214static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7215 unsigned Index) {
7216 if (Mask.size() == Elements * 2)
7217 return Index / Elements;
7218 return Mask[Index] == 0 ? 0 : 1;
7219}
7220
7221// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7222// checking that pairs of elements in the shuffle mask represent the same index
7223// in each vector, incrementing the expected index by 2 at each step.
7224// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7225// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7226// v2={e,f,g,h}
7227// WhichResult gives the offset for each element in the mask based on which
7228// of the two results it belongs to.
7229//
7230// The transpose can be represented either as:
7231// result1 = shufflevector v1, v2, result1_shuffle_mask
7232// result2 = shufflevector v1, v2, result2_shuffle_mask
7233// where v1/v2 and the shuffle masks have the same number of elements
7234// (here WhichResult (see below) indicates which result is being checked)
7235//
7236// or as:
7237// results = shufflevector v1, v2, shuffle_mask
7238// where both results are returned in one vector and the shuffle mask has twice
7239// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7240// want to check the low half and high half of the shuffle mask as if it were
7241// the other case
7242static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7243 unsigned EltSz = VT.getScalarSizeInBits();
7244 if (EltSz == 64)
7245 return false;
7246
7247 unsigned NumElts = VT.getVectorNumElements();
7248 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7249 return false;
7250
7251 // If the mask is twice as long as the input vector then we need to check the
7252 // upper and lower parts of the mask with a matching value for WhichResult
7253 // FIXME: A mask with only even values will be rejected in case the first
7254 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7255 // M[0] is used to determine WhichResult
7256 for (unsigned i = 0; i < M.size(); i += NumElts) {
7257 WhichResult = SelectPairHalf(NumElts, M, i);
7258 for (unsigned j = 0; j < NumElts; j += 2) {
7259 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7260 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7261 return false;
7262 }
7263 }
7264
7265 if (M.size() == NumElts*2)
7266 WhichResult = 0;
7267
7268 return true;
7269}
7270
7271/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7272/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7273/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7274static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7275 unsigned EltSz = VT.getScalarSizeInBits();
7276 if (EltSz == 64)
7277 return false;
7278
7279 unsigned NumElts = VT.getVectorNumElements();
7280 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7281 return false;
7282
7283 for (unsigned i = 0; i < M.size(); i += NumElts) {
7284 WhichResult = SelectPairHalf(NumElts, M, i);
7285 for (unsigned j = 0; j < NumElts; j += 2) {
7286 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7287 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7288 return false;
7289 }
7290 }
7291
7292 if (M.size() == NumElts*2)
7293 WhichResult = 0;
7294
7295 return true;
7296}
7297
7298// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7299// that the mask elements are either all even and in steps of size 2 or all odd
7300// and in steps of size 2.
7301// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7302// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7303// v2={e,f,g,h}
7304// Requires similar checks to that of isVTRNMask with
7305// respect the how results are returned.
7306static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7307 unsigned EltSz = VT.getScalarSizeInBits();
7308 if (EltSz == 64)
7309 return false;
7310
7311 unsigned NumElts = VT.getVectorNumElements();
7312 if (M.size() != NumElts && M.size() != NumElts*2)
7313 return false;
7314
7315 for (unsigned i = 0; i < M.size(); i += NumElts) {
7316 WhichResult = SelectPairHalf(NumElts, M, i);
7317 for (unsigned j = 0; j < NumElts; ++j) {
7318 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7319 return false;
7320 }
7321 }
7322
7323 if (M.size() == NumElts*2)
7324 WhichResult = 0;
7325
7326 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7327 if (VT.is64BitVector() && EltSz == 32)
7328 return false;
7329
7330 return true;
7331}
7332
7333/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7334/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7335/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7336static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7337 unsigned EltSz = VT.getScalarSizeInBits();
7338 if (EltSz == 64)
7339 return false;
7340
7341 unsigned NumElts = VT.getVectorNumElements();
7342 if (M.size() != NumElts && M.size() != NumElts*2)
7343 return false;
7344
7345 unsigned Half = NumElts / 2;
7346 for (unsigned i = 0; i < M.size(); i += NumElts) {
7347 WhichResult = SelectPairHalf(NumElts, M, i);
7348 for (unsigned j = 0; j < NumElts; j += Half) {
7349 unsigned Idx = WhichResult;
7350 for (unsigned k = 0; k < Half; ++k) {
7351 int MIdx = M[i + j + k];
7352 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7353 return false;
7354 Idx += 2;
7355 }
7356 }
7357 }
7358
7359 if (M.size() == NumElts*2)
7360 WhichResult = 0;
7361
7362 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7363 if (VT.is64BitVector() && EltSz == 32)
7364 return false;
7365
7366 return true;
7367}
7368
7369// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7370// that pairs of elements of the shufflemask represent the same index in each
7371// vector incrementing sequentially through the vectors.
7372// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7373// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7374// v2={e,f,g,h}
7375// Requires similar checks to that of isVTRNMask with respect the how results
7376// are returned.
7377static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7378 unsigned EltSz = VT.getScalarSizeInBits();
7379 if (EltSz == 64)
7380 return false;
7381
7382 unsigned NumElts = VT.getVectorNumElements();
7383 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7384 return false;
7385
7386 for (unsigned i = 0; i < M.size(); i += NumElts) {
7387 WhichResult = SelectPairHalf(NumElts, M, i);
7388 unsigned Idx = WhichResult * NumElts / 2;
7389 for (unsigned j = 0; j < NumElts; j += 2) {
7390 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7391 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7392 return false;
7393 Idx += 1;
7394 }
7395 }
7396
7397 if (M.size() == NumElts*2)
7398 WhichResult = 0;
7399
7400 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7401 if (VT.is64BitVector() && EltSz == 32)
7402 return false;
7403
7404 return true;
7405}
7406
7407/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7408/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7409/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7410static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7411 unsigned EltSz = VT.getScalarSizeInBits();
7412 if (EltSz == 64)
7413 return false;
7414
7415 unsigned NumElts = VT.getVectorNumElements();
7416 if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
7417 return false;
7418
7419 for (unsigned i = 0; i < M.size(); i += NumElts) {
7420 WhichResult = SelectPairHalf(NumElts, M, i);
7421 unsigned Idx = WhichResult * NumElts / 2;
7422 for (unsigned j = 0; j < NumElts; j += 2) {
7423 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7424 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7425 return false;
7426 Idx += 1;
7427 }
7428 }
7429
7430 if (M.size() == NumElts*2)
7431 WhichResult = 0;
7432
7433 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7434 if (VT.is64BitVector() && EltSz == 32)
7435 return false;
7436
7437 return true;
7438}
7439
7440/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7441/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7442static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7443 unsigned &WhichResult,
7444 bool &isV_UNDEF) {
7445 isV_UNDEF = false;
7446 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7447 return ARMISD::VTRN;
7448 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7449 return ARMISD::VUZP;
7450 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7451 return ARMISD::VZIP;
7452
7453 isV_UNDEF = true;
7454 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7455 return ARMISD::VTRN;
7456 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7457 return ARMISD::VUZP;
7458 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7459 return ARMISD::VZIP;
7460
7461 return 0;
7462}
7463
7464/// \return true if this is a reverse operation on an vector.
7465static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7466 unsigned NumElts = VT.getVectorNumElements();
7467 // Make sure the mask has the right size.
7468 if (NumElts != M.size())
7469 return false;
7470
7471 // Look for <15, ..., 3, -1, 1, 0>.
7472 for (unsigned i = 0; i != NumElts; ++i)
7473 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7474 return false;
7475
7476 return true;
7477}
7478
7479static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7480 unsigned NumElts = VT.getVectorNumElements();
7481 // Make sure the mask has the right size.
7482 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7483 return false;
7484
7485 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7486 // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7487 // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7488 // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7489 // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7490 int Ofs = Top ? 1 : 0;
7491 int Upper = SingleSource ? 0 : NumElts;
7492 for (int i = 0, e = NumElts / 2; i != e; ++i) {
7493 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7494 return false;
7495 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7496 return false;
7497 }
7498 return true;
7499}
7500
7501static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7502 unsigned NumElts = VT.getVectorNumElements();
7503 // Make sure the mask has the right size.
7504 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7505 return false;
7506
7507 // If Top
7508 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7509 // This inserts Input2 into Input1
7510 // else if not Top
7511 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7512 // This inserts Input1 into Input2
7513 unsigned Offset = Top ? 0 : 1;
7514 unsigned N = SingleSource ? 0 : NumElts;
7515 for (unsigned i = 0; i < NumElts; i += 2) {
7516 if (M[i] >= 0 && M[i] != (int)i)
7517 return false;
7518 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7519 return false;
7520 }
7521
7522 return true;
7523}
7524
7525static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7526 unsigned NumElts = ToVT.getVectorNumElements();
7527 if (NumElts != M.size())
7528 return false;
7529
7530 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7531 // looking for patterns of:
7532 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7533 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7534
7535 unsigned Off0 = rev ? NumElts / 2 : 0;
7536 unsigned Off1 = rev ? 0 : NumElts / 2;
7537 for (unsigned i = 0; i < NumElts; i += 2) {
7538 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7539 return false;
7540 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7541 return false;
7542 }
7543
7544 return true;
7545}
7546
7547// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7548// from a pair of inputs. For example:
7549// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7550// FP_ROUND(EXTRACT_ELT(Y, 0),
7551// FP_ROUND(EXTRACT_ELT(X, 1),
7552// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7554 const ARMSubtarget *ST) {
7555 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7556 if (!ST->hasMVEFloatOps())
7557 return SDValue();
7558
7559 SDLoc dl(BV);
7560 EVT VT = BV.getValueType();
7561 if (VT != MVT::v8f16)
7562 return SDValue();
7563
7564 // We are looking for a buildvector of fptrunc elements, where all the
7565 // elements are interleavingly extracted from two sources. Check the first two
7566 // items are valid enough and extract some info from them (they are checked
7567 // properly in the loop below).
7568 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7571 return SDValue();
7572 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7575 return SDValue();
7576 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7577 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7578 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7579 return SDValue();
7580
7581 // Check all the values in the BuildVector line up with our expectations.
7582 for (unsigned i = 1; i < 4; i++) {
7583 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7584 return Trunc.getOpcode() == ISD::FP_ROUND &&
7586 Trunc.getOperand(0).getOperand(0) == Op &&
7587 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7588 };
7589 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7590 return SDValue();
7591 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7592 return SDValue();
7593 }
7594
7595 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7596 DAG.getConstant(0, dl, MVT::i32));
7597 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7598 DAG.getConstant(1, dl, MVT::i32));
7599}
7600
7601// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7602// from a single input on alternating lanes. For example:
7603// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7604// FP_ROUND(EXTRACT_ELT(X, 2),
7605// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7607 const ARMSubtarget *ST) {
7608 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7609 if (!ST->hasMVEFloatOps())
7610 return SDValue();
7611
7612 SDLoc dl(BV);
7613 EVT VT = BV.getValueType();
7614 if (VT != MVT::v4f32)
7615 return SDValue();
7616
7617 // We are looking for a buildvector of fptext elements, where all the
7618 // elements are alternating lanes from a single source. For example <0,2,4,6>
7619 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7620 // info from them (they are checked properly in the loop below).
7621 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7623 return SDValue();
7624 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7626 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7627 return SDValue();
7628
7629 // Check all the values in the BuildVector line up with our expectations.
7630 for (unsigned i = 1; i < 4; i++) {
7631 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7632 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7634 Trunc.getOperand(0).getOperand(0) == Op &&
7635 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7636 };
7637 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7638 return SDValue();
7639 }
7640
7641 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7642 DAG.getConstant(Offset, dl, MVT::i32));
7643}
7644
7645// If N is an integer constant that can be moved into a register in one
7646// instruction, return an SDValue of such a constant (will become a MOV
7647// instruction). Otherwise return null.
7649 const ARMSubtarget *ST, const SDLoc &dl) {
7650 uint64_t Val;
7651 if (!isa<ConstantSDNode>(N))
7652 return SDValue();
7653 Val = N->getAsZExtVal();
7654
7655 if (ST->isThumb1Only()) {
7656 if (Val <= 255 || ~Val <= 255)
7657 return DAG.getConstant(Val, dl, MVT::i32);
7658 } else {
7659 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7660 return DAG.getConstant(Val, dl, MVT::i32);
7661 }
7662 return SDValue();
7663}
7664
7666 const ARMSubtarget *ST) {
7667 SDLoc dl(Op);
7668 EVT VT = Op.getValueType();
7669
7670 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7671
7672 unsigned NumElts = VT.getVectorNumElements();
7673 unsigned BoolMask;
7674 unsigned BitsPerBool;
7675 if (NumElts == 2) {
7676 BitsPerBool = 8;
7677 BoolMask = 0xff;
7678 } else if (NumElts == 4) {
7679 BitsPerBool = 4;
7680 BoolMask = 0xf;
7681 } else if (NumElts == 8) {
7682 BitsPerBool = 2;
7683 BoolMask = 0x3;
7684 } else if (NumElts == 16) {
7685 BitsPerBool = 1;
7686 BoolMask = 0x1;
7687 } else
7688 return SDValue();
7689
7690 // If this is a single value copied into all lanes (a splat), we can just sign
7691 // extend that single value
7692 SDValue FirstOp = Op.getOperand(0);
7693 if (!isa<ConstantSDNode>(FirstOp) &&
7694 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7695 return U.get().isUndef() || U.get() == FirstOp;
7696 })) {
7697 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7698 DAG.getValueType(MVT::i1));
7699 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7700 }
7701
7702 // First create base with bits set where known
7703 unsigned Bits32 = 0;
7704 for (unsigned i = 0; i < NumElts; ++i) {
7705 SDValue V = Op.getOperand(i);
7706 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7707 continue;
7708 bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7709 if (BitSet)
7710 Bits32 |= BoolMask << (i * BitsPerBool);
7711 }
7712
7713 // Add in unknown nodes
7715 DAG.getConstant(Bits32, dl, MVT::i32));
7716 for (unsigned i = 0; i < NumElts; ++i) {
7717 SDValue V = Op.getOperand(i);
7718 if (isa<ConstantSDNode>(V) || V.isUndef())
7719 continue;
7720 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7721 DAG.getConstant(i, dl, MVT::i32));
7722 }
7723
7724 return Base;
7725}
7726
7728 const ARMSubtarget *ST) {
7729 if (!ST->hasMVEIntegerOps())
7730 return SDValue();
7731
7732 // We are looking for a buildvector where each element is Op[0] + i*N
7733 EVT VT = Op.getValueType();
7734 SDValue Op0 = Op.getOperand(0);
7735 unsigned NumElts = VT.getVectorNumElements();
7736
7737 // Get the increment value from operand 1
7738 SDValue Op1 = Op.getOperand(1);
7739 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7741 return SDValue();
7742 unsigned N = Op1.getConstantOperandVal(1);
7743 if (N != 1 && N != 2 && N != 4 && N != 8)
7744 return SDValue();
7745
7746 // Check that each other operand matches
7747 for (unsigned I = 2; I < NumElts; I++) {
7748 SDValue OpI = Op.getOperand(I);
7749 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7751 OpI.getConstantOperandVal(1) != I * N)
7752 return SDValue();
7753 }
7754
7755 SDLoc DL(Op);
7756 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7757 DAG.getConstant(N, DL, MVT::i32));
7758}
7759
7760// Returns true if the operation N can be treated as qr instruction variant at
7761// operand Op.
7762static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7763 switch (N->getOpcode()) {
7764 case ISD::ADD:
7765 case ISD::MUL:
7766 case ISD::SADDSAT:
7767 case ISD::UADDSAT:
7768 case ISD::AVGFLOORS:
7769 case ISD::AVGFLOORU:
7770 return true;
7771 case ISD::SUB:
7772 case ISD::SSUBSAT:
7773 case ISD::USUBSAT:
7774 return N->getOperand(1).getNode() == Op;
7776 switch (N->getConstantOperandVal(0)) {
7777 case Intrinsic::arm_mve_add_predicated:
7778 case Intrinsic::arm_mve_mul_predicated:
7779 case Intrinsic::arm_mve_qadd_predicated:
7780 case Intrinsic::arm_mve_vhadd:
7781 case Intrinsic::arm_mve_hadd_predicated:
7782 case Intrinsic::arm_mve_vqdmulh:
7783 case Intrinsic::arm_mve_qdmulh_predicated:
7784 case Intrinsic::arm_mve_vqrdmulh:
7785 case Intrinsic::arm_mve_qrdmulh_predicated:
7786 case Intrinsic::arm_mve_vqdmull:
7787 case Intrinsic::arm_mve_vqdmull_predicated:
7788 return true;
7789 case Intrinsic::arm_mve_sub_predicated:
7790 case Intrinsic::arm_mve_qsub_predicated:
7791 case Intrinsic::arm_mve_vhsub:
7792 case Intrinsic::arm_mve_hsub_predicated:
7793 return N->getOperand(2).getNode() == Op;
7794 default:
7795 return false;
7796 }
7797 default:
7798 return false;
7799 }
7800}
7801
7802// If this is a case we can't handle, return null and let the default
7803// expansion code take care of it.
7804SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7805 const ARMSubtarget *ST) const {
7806 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7807 SDLoc dl(Op);
7808 EVT VT = Op.getValueType();
7809
7810 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7811 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7812
7813 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7814 return R;
7815
7816 APInt SplatBits, SplatUndef;
7817 unsigned SplatBitSize;
7818 bool HasAnyUndefs;
7819 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7820 if (SplatUndef.isAllOnes())
7821 return DAG.getUNDEF(VT);
7822
7823 // If all the users of this constant splat are qr instruction variants,
7824 // generate a vdup of the constant.
7825 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7826 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7827 all_of(BVN->users(),
7828 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7829 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7830 : SplatBitSize == 16 ? MVT::v8i16
7831 : MVT::v16i8;
7832 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7833 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7834 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7835 }
7836
7837 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7838 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7839 // Check if an immediate VMOV works.
7840 EVT VmovVT;
7841 SDValue Val =
7842 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7843 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7844
7845 if (Val.getNode()) {
7846 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7847 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
7848 }
7849
7850 // Try an immediate VMVN.
7851 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7852 Val = isVMOVModifiedImm(
7853 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7854 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7855 if (Val.getNode()) {
7856 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7857 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
7858 }
7859
7860 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7861 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7862 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7863 if (ImmVal != -1) {
7864 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7865 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7866 }
7867 }
7868
7869 // If we are under MVE, generate a VDUP(constant), bitcast to the original
7870 // type.
7871 if (ST->hasMVEIntegerOps() &&
7872 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7873 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7874 : SplatBitSize == 16 ? MVT::v8i16
7875 : MVT::v16i8;
7876 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7877 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7878 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7879 }
7880 }
7881 }
7882
7883 // Scan through the operands to see if only one value is used.
7884 //
7885 // As an optimisation, even if more than one value is used it may be more
7886 // profitable to splat with one value then change some lanes.
7887 //
7888 // Heuristically we decide to do this if the vector has a "dominant" value,
7889 // defined as splatted to more than half of the lanes.
7890 unsigned NumElts = VT.getVectorNumElements();
7891 bool isOnlyLowElement = true;
7892 bool usesOnlyOneValue = true;
7893 bool hasDominantValue = false;
7894 bool isConstant = true;
7895
7896 // Map of the number of times a particular SDValue appears in the
7897 // element list.
7898 DenseMap<SDValue, unsigned> ValueCounts;
7899 SDValue Value;
7900 for (unsigned i = 0; i < NumElts; ++i) {
7901 SDValue V = Op.getOperand(i);
7902 if (V.isUndef())
7903 continue;
7904 if (i > 0)
7905 isOnlyLowElement = false;
7907 isConstant = false;
7908
7909 unsigned &Count = ValueCounts[V];
7910
7911 // Is this value dominant? (takes up more than half of the lanes)
7912 if (++Count > (NumElts / 2)) {
7913 hasDominantValue = true;
7914 Value = V;
7915 }
7916 }
7917 if (ValueCounts.size() != 1)
7918 usesOnlyOneValue = false;
7919 if (!Value.getNode() && !ValueCounts.empty())
7920 Value = ValueCounts.begin()->first;
7921
7922 if (ValueCounts.empty())
7923 return DAG.getUNDEF(VT);
7924
7925 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
7926 // Keep going if we are hitting this case.
7927 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
7928 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
7929
7930 unsigned EltSize = VT.getScalarSizeInBits();
7931
7932 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
7933 // i32 and try again.
7934 if (hasDominantValue && EltSize <= 32) {
7935 if (!isConstant) {
7936 SDValue N;
7937
7938 // If we are VDUPing a value that comes directly from a vector, that will
7939 // cause an unnecessary move to and from a GPR, where instead we could
7940 // just use VDUPLANE. We can only do this if the lane being extracted
7941 // is at a constant index, as the VDUP from lane instructions only have
7942 // constant-index forms.
7943 ConstantSDNode *constIndex;
7944 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7945 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
7946 // We need to create a new undef vector to use for the VDUPLANE if the
7947 // size of the vector from which we get the value is different than the
7948 // size of the vector that we need to create. We will insert the element
7949 // such that the register coalescer will remove unnecessary copies.
7950 if (VT != Value->getOperand(0).getValueType()) {
7951 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
7953 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7954 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
7955 Value, DAG.getConstant(index, dl, MVT::i32)),
7956 DAG.getConstant(index, dl, MVT::i32));
7957 } else
7958 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7959 Value->getOperand(0), Value->getOperand(1));
7960 } else
7961 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
7962
7963 if (!usesOnlyOneValue) {
7964 // The dominant value was splatted as 'N', but we now have to insert
7965 // all differing elements.
7966 for (unsigned I = 0; I < NumElts; ++I) {
7967 if (Op.getOperand(I) == Value)
7968 continue;
7970 Ops.push_back(N);
7971 Ops.push_back(Op.getOperand(I));
7972 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
7973 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
7974 }
7975 }
7976 return N;
7977 }
7980 MVT FVT = VT.getVectorElementType().getSimpleVT();
7981 assert(FVT == MVT::f32 || FVT == MVT::f16);
7982 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
7983 for (unsigned i = 0; i < NumElts; ++i)
7984 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
7985 Op.getOperand(i)));
7986 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
7987 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
7988 Val = LowerBUILD_VECTOR(Val, DAG, ST);
7989 if (Val.getNode())
7990 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
7991 }
7992 if (usesOnlyOneValue) {
7993 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
7994 if (isConstant && Val.getNode())
7995 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
7996 }
7997 }
7998
7999 // If all elements are constants and the case above didn't get hit, fall back
8000 // to the default expansion, which will generate a load from the constant
8001 // pool.
8002 if (isConstant)
8003 return SDValue();
8004
8005 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
8006 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
8007 // length <= 2.
8008 if (NumElts >= 4)
8009 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
8010 return shuffle;
8011
8012 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8013 // VCVT's
8014 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8015 return VCVT;
8016 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8017 return VCVT;
8018
8019 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8020 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8021 // into two 64-bit vectors; we might discover a better way to lower it.
8022 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8023 EVT ExtVT = VT.getVectorElementType();
8024 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8025 SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
8026 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8027 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8028 SDValue Upper =
8029 DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
8030 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8031 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8032 if (Lower && Upper)
8033 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8034 }
8035
8036 // Vectors with 32- or 64-bit elements can be built by directly assigning
8037 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8038 // will be legalized.
8039 if (EltSize >= 32) {
8040 // Do the expansion with floating-point types, since that is what the VFP
8041 // registers are defined to use, and since i64 is not legal.
8042 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8043 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8045 for (unsigned i = 0; i < NumElts; ++i)
8046 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8047 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8048 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8049 }
8050
8051 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8052 // know the default expansion would otherwise fall back on something even
8053 // worse. For a vector with one or two non-undef values, that's
8054 // scalar_to_vector for the elements followed by a shuffle (provided the
8055 // shuffle is valid for the target) and materialization element by element
8056 // on the stack followed by a load for everything else.
8057 if (!isConstant && !usesOnlyOneValue) {
8058 SDValue Vec = DAG.getUNDEF(VT);
8059 for (unsigned i = 0 ; i < NumElts; ++i) {
8060 SDValue V = Op.getOperand(i);
8061 if (V.isUndef())
8062 continue;
8063 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8064 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8065 }
8066 return Vec;
8067 }
8068
8069 return SDValue();
8070}
8071
8072// Gather data to see if the operation can be modelled as a
8073// shuffle in combination with VEXTs.
8074SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8075 SelectionDAG &DAG) const {
8076 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
8077 SDLoc dl(Op);
8078 EVT VT = Op.getValueType();
8079 unsigned NumElts = VT.getVectorNumElements();
8080
8081 struct ShuffleSourceInfo {
8082 SDValue Vec;
8083 unsigned MinElt = std::numeric_limits<unsigned>::max();
8084 unsigned MaxElt = 0;
8085
8086 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8087 // be compatible with the shuffle we intend to construct. As a result
8088 // ShuffleVec will be some sliding window into the original Vec.
8089 SDValue ShuffleVec;
8090
8091 // Code should guarantee that element i in Vec starts at element "WindowBase
8092 // + i * WindowScale in ShuffleVec".
8093 int WindowBase = 0;
8094 int WindowScale = 1;
8095
8096 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8097
8098 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8099 };
8100
8101 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8102 // node.
8104 for (unsigned i = 0; i < NumElts; ++i) {
8105 SDValue V = Op.getOperand(i);
8106 if (V.isUndef())
8107 continue;
8108 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8109 // A shuffle can only come from building a vector from various
8110 // elements of other vectors.
8111 return SDValue();
8112 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8113 // Furthermore, shuffles require a constant mask, whereas extractelts
8114 // accept variable indices.
8115 return SDValue();
8116 }
8117
8118 // Add this element source to the list if it's not already there.
8119 SDValue SourceVec = V.getOperand(0);
8120 auto Source = llvm::find(Sources, SourceVec);
8121 if (Source == Sources.end())
8122 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8123
8124 // Update the minimum and maximum lane number seen.
8125 unsigned EltNo = V.getConstantOperandVal(1);
8126 Source->MinElt = std::min(Source->MinElt, EltNo);
8127 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8128 }
8129
8130 // Currently only do something sane when at most two source vectors
8131 // are involved.
8132 if (Sources.size() > 2)
8133 return SDValue();
8134
8135 // Find out the smallest element size among result and two sources, and use
8136 // it as element size to build the shuffle_vector.
8137 EVT SmallestEltTy = VT.getVectorElementType();
8138 for (auto &Source : Sources) {
8139 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8140 if (SrcEltTy.bitsLT(SmallestEltTy))
8141 SmallestEltTy = SrcEltTy;
8142 }
8143 unsigned ResMultiplier =
8144 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8145 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8146 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8147
8148 // If the source vector is too wide or too narrow, we may nevertheless be able
8149 // to construct a compatible shuffle either by concatenating it with UNDEF or
8150 // extracting a suitable range of elements.
8151 for (auto &Src : Sources) {
8152 EVT SrcVT = Src.ShuffleVec.getValueType();
8153
8154 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8155 uint64_t VTSize = VT.getFixedSizeInBits();
8156 if (SrcVTSize == VTSize)
8157 continue;
8158
8159 // This stage of the search produces a source with the same element type as
8160 // the original, but with a total width matching the BUILD_VECTOR output.
8161 EVT EltVT = SrcVT.getVectorElementType();
8162 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8163 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8164
8165 if (SrcVTSize < VTSize) {
8166 if (2 * SrcVTSize != VTSize)
8167 return SDValue();
8168 // We can pad out the smaller vector for free, so if it's part of a
8169 // shuffle...
8170 Src.ShuffleVec =
8171 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8172 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8173 continue;
8174 }
8175
8176 if (SrcVTSize != 2 * VTSize)
8177 return SDValue();
8178
8179 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8180 // Span too large for a VEXT to cope
8181 return SDValue();
8182 }
8183
8184 if (Src.MinElt >= NumSrcElts) {
8185 // The extraction can just take the second half
8186 Src.ShuffleVec =
8187 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8188 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8189 Src.WindowBase = -NumSrcElts;
8190 } else if (Src.MaxElt < NumSrcElts) {
8191 // The extraction can just take the first half
8192 Src.ShuffleVec =
8193 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8194 DAG.getConstant(0, dl, MVT::i32));
8195 } else {
8196 // An actual VEXT is needed
8197 SDValue VEXTSrc1 =
8198 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8199 DAG.getConstant(0, dl, MVT::i32));
8200 SDValue VEXTSrc2 =
8201 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8202 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8203
8204 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8205 VEXTSrc2,
8206 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8207 Src.WindowBase = -Src.MinElt;
8208 }
8209 }
8210
8211 // Another possible incompatibility occurs from the vector element types. We
8212 // can fix this by bitcasting the source vectors to the same type we intend
8213 // for the shuffle.
8214 for (auto &Src : Sources) {
8215 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8216 if (SrcEltTy == SmallestEltTy)
8217 continue;
8218 assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8219 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8220 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8221 Src.WindowBase *= Src.WindowScale;
8222 }
8223
8224 // Final check before we try to actually produce a shuffle.
8225 LLVM_DEBUG({
8226 for (auto Src : Sources)
8227 assert(Src.ShuffleVec.getValueType() == ShuffleVT);
8228 });
8229
8230 // The stars all align, our next step is to produce the mask for the shuffle.
8231 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
8232 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8233 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8234 SDValue Entry = Op.getOperand(i);
8235 if (Entry.isUndef())
8236 continue;
8237
8238 auto Src = llvm::find(Sources, Entry.getOperand(0));
8239 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8240
8241 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8242 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8243 // segment.
8244 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8245 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8246 VT.getScalarSizeInBits());
8247 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8248
8249 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8250 // starting at the appropriate offset.
8251 int *LaneMask = &Mask[i * ResMultiplier];
8252
8253 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8254 ExtractBase += NumElts * (Src - Sources.begin());
8255 for (int j = 0; j < LanesDefined; ++j)
8256 LaneMask[j] = ExtractBase + j;
8257 }
8258
8259
8260 // We can't handle more than two sources. This should have already
8261 // been checked before this point.
8262 assert(Sources.size() <= 2 && "Too many sources!");
8263
8264 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8265 for (unsigned i = 0; i < Sources.size(); ++i)
8266 ShuffleOps[i] = Sources[i].ShuffleVec;
8267
8268 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8269 ShuffleOps[1], Mask, DAG);
8270 if (!Shuffle)
8271 return SDValue();
8272 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8273}
8274
8276 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8285 OP_VUZPL, // VUZP, left result
8286 OP_VUZPR, // VUZP, right result
8287 OP_VZIPL, // VZIP, left result
8288 OP_VZIPR, // VZIP, right result
8289 OP_VTRNL, // VTRN, left result
8290 OP_VTRNR // VTRN, right result
8291};
8292
8293static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8294 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8295 switch (OpNum) {
8296 case OP_COPY:
8297 case OP_VREV:
8298 case OP_VDUP0:
8299 case OP_VDUP1:
8300 case OP_VDUP2:
8301 case OP_VDUP3:
8302 return true;
8303 }
8304 return false;
8305}
8306
8307/// isShuffleMaskLegal - Targets can use this to indicate that they only
8308/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8309/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8310/// are assumed to be legal.
8312 if (VT.getVectorNumElements() == 4 &&
8313 (VT.is128BitVector() || VT.is64BitVector())) {
8314 unsigned PFIndexes[4];
8315 for (unsigned i = 0; i != 4; ++i) {
8316 if (M[i] < 0)
8317 PFIndexes[i] = 8;
8318 else
8319 PFIndexes[i] = M[i];
8320 }
8321
8322 // Compute the index in the perfect shuffle table.
8323 unsigned PFTableIndex =
8324 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8325 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8326 unsigned Cost = (PFEntry >> 30);
8327
8328 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8329 return true;
8330 }
8331
8332 bool ReverseVEXT, isV_UNDEF;
8333 unsigned Imm, WhichResult;
8334
8335 unsigned EltSize = VT.getScalarSizeInBits();
8336 if (EltSize >= 32 ||
8338 ShuffleVectorInst::isIdentityMask(M, M.size()) ||
8339 isVREVMask(M, VT, 64) ||
8340 isVREVMask(M, VT, 32) ||
8341 isVREVMask(M, VT, 16))
8342 return true;
8343 else if (Subtarget->hasNEON() &&
8344 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8345 isVTBLMask(M, VT) ||
8346 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8347 return true;
8348 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8349 isReverseMask(M, VT))
8350 return true;
8351 else if (Subtarget->hasMVEIntegerOps() &&
8352 (isVMOVNMask(M, VT, true, false) ||
8353 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8354 return true;
8355 else if (Subtarget->hasMVEIntegerOps() &&
8356 (isTruncMask(M, VT, false, false) ||
8357 isTruncMask(M, VT, false, true) ||
8358 isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8359 return true;
8360 else
8361 return false;
8362}
8363
8364/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8365/// the specified operations to build the shuffle.
8367 SDValue RHS, SelectionDAG &DAG,
8368 const SDLoc &dl) {
8369 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8370 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8371 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8372
8373 if (OpNum == OP_COPY) {
8374 if (LHSID == (1*9+2)*9+3) return LHS;
8375 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8376 return RHS;
8377 }
8378
8379 SDValue OpLHS, OpRHS;
8380 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8381 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8382 EVT VT = OpLHS.getValueType();
8383
8384 switch (OpNum) {
8385 default: llvm_unreachable("Unknown shuffle opcode!");
8386 case OP_VREV:
8387 // VREV divides the vector in half and swaps within the half.
8388 if (VT.getScalarSizeInBits() == 32)
8389 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8390 // vrev <4 x i16> -> VREV32
8391 if (VT.getScalarSizeInBits() == 16)
8392 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8393 // vrev <4 x i8> -> VREV16
8394 assert(VT.getScalarSizeInBits() == 8);
8395 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8396 case OP_VDUP0:
8397 case OP_VDUP1:
8398 case OP_VDUP2:
8399 case OP_VDUP3:
8400 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8401 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8402 case OP_VEXT1:
8403 case OP_VEXT2:
8404 case OP_VEXT3:
8405 return DAG.getNode(ARMISD::VEXT, dl, VT,
8406 OpLHS, OpRHS,
8407 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8408 case OP_VUZPL:
8409 case OP_VUZPR:
8410 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8411 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8412 case OP_VZIPL:
8413 case OP_VZIPR:
8414 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8415 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8416 case OP_VTRNL:
8417 case OP_VTRNR:
8418 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8419 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8420 }
8421}
8422
8424 ArrayRef<int> ShuffleMask,
8425 SelectionDAG &DAG) {
8426 // Check to see if we can use the VTBL instruction.
8427 SDValue V1 = Op.getOperand(0);
8428 SDValue V2 = Op.getOperand(1);
8429 SDLoc DL(Op);
8430
8431 SmallVector<SDValue, 8> VTBLMask;
8432 for (int I : ShuffleMask)
8433 VTBLMask.push_back(DAG.getSignedConstant(I, DL, MVT::i32));
8434
8435 if (V2.getNode()->isUndef())
8436 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8437 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8438
8439 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8440 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8441}
8442
8444 SDLoc DL(Op);
8445 EVT VT = Op.getValueType();
8446
8447 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8448 "Expect an v8i16/v16i8 type");
8449 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8450 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8451 // extract the first 8 bytes into the top double word and the last 8 bytes
8452 // into the bottom double word, through a new vector shuffle that will be
8453 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8454 std::vector<int> NewMask;
8455 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8456 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8457 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8458 NewMask.push_back(i);
8459 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8460}
8461
8463 switch (VT.getSimpleVT().SimpleTy) {
8464 case MVT::v2i1:
8465 return MVT::v2f64;
8466 case MVT::v4i1:
8467 return MVT::v4i32;
8468 case MVT::v8i1:
8469 return MVT::v8i16;
8470 case MVT::v16i1:
8471 return MVT::v16i8;
8472 default:
8473 llvm_unreachable("Unexpected vector predicate type");
8474 }
8475}
8476
8478 SelectionDAG &DAG) {
8479 // Converting from boolean predicates to integers involves creating a vector
8480 // of all ones or all zeroes and selecting the lanes based upon the real
8481 // predicate.
8483 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8484 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8485
8486 SDValue AllZeroes =
8487 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8488 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8489
8490 // Get full vector type from predicate type
8492
8493 SDValue RecastV1;
8494 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8495 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8496 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8497 // since we know in hardware the sizes are really the same.
8498 if (VT != MVT::v16i1)
8499 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8500 else
8501 RecastV1 = Pred;
8502
8503 // Select either all ones or zeroes depending upon the real predicate bits.
8504 SDValue PredAsVector =
8505 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8506
8507 // Recast our new predicate-as-integer v16i8 vector into something
8508 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8509 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8510}
8511
8513 const ARMSubtarget *ST) {
8514 EVT VT = Op.getValueType();
8516 ArrayRef<int> ShuffleMask = SVN->getMask();
8517
8518 assert(ST->hasMVEIntegerOps() &&
8519 "No support for vector shuffle of boolean predicates");
8520
8521 SDValue V1 = Op.getOperand(0);
8522 SDValue V2 = Op.getOperand(1);
8523 SDLoc dl(Op);
8524 if (isReverseMask(ShuffleMask, VT)) {
8525 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8526 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8527 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8528 DAG.getConstant(16, dl, MVT::i32));
8529 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8530 }
8531
8532 // Until we can come up with optimised cases for every single vector
8533 // shuffle in existence we have chosen the least painful strategy. This is
8534 // to essentially promote the boolean predicate to a 8-bit integer, where
8535 // each predicate represents a byte. Then we fall back on a normal integer
8536 // vector shuffle and convert the result back into a predicate vector. In
8537 // many cases the generated code might be even better than scalar code
8538 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8539 // fields in a register into 8 other arbitrary 2-bit fields!
8540 SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8541 EVT NewVT = PredAsVector1.getValueType();
8542 SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8543 : PromoteMVEPredVector(dl, V2, VT, DAG);
8544 assert(PredAsVector2.getValueType() == NewVT &&
8545 "Expected identical vector type in expanded i1 shuffle!");
8546
8547 // Do the shuffle!
8548 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8549 PredAsVector2, ShuffleMask);
8550
8551 // Now return the result of comparing the shuffled vector with zero,
8552 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8553 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8554 if (VT == MVT::v2i1) {
8555 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8556 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8557 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8558 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8559 }
8560 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8561 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8562}
8563
8565 ArrayRef<int> ShuffleMask,
8566 SelectionDAG &DAG) {
8567 // Attempt to lower the vector shuffle using as many whole register movs as
8568 // possible. This is useful for types smaller than 32bits, which would
8569 // often otherwise become a series for grp movs.
8570 SDLoc dl(Op);
8571 EVT VT = Op.getValueType();
8572 if (VT.getScalarSizeInBits() >= 32)
8573 return SDValue();
8574
8575 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8576 "Unexpected vector type");
8577 int NumElts = VT.getVectorNumElements();
8578 int QuarterSize = NumElts / 4;
8579 // The four final parts of the vector, as i32's
8580 SDValue Parts[4];
8581
8582 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8583 // <u,u,u,u>), returning the vmov lane index
8584 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8585 // Detect which mov lane this would be from the first non-undef element.
8586 int MovIdx = -1;
8587 for (int i = 0; i < Length; i++) {
8588 if (ShuffleMask[Start + i] >= 0) {
8589 if (ShuffleMask[Start + i] % Length != i)
8590 return -1;
8591 MovIdx = ShuffleMask[Start + i] / Length;
8592 break;
8593 }
8594 }
8595 // If all items are undef, leave this for other combines
8596 if (MovIdx == -1)
8597 return -1;
8598 // Check the remaining values are the correct part of the same mov
8599 for (int i = 1; i < Length; i++) {
8600 if (ShuffleMask[Start + i] >= 0 &&
8601 (ShuffleMask[Start + i] / Length != MovIdx ||
8602 ShuffleMask[Start + i] % Length != i))
8603 return -1;
8604 }
8605 return MovIdx;
8606 };
8607
8608 for (int Part = 0; Part < 4; ++Part) {
8609 // Does this part look like a mov
8610 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8611 if (Elt != -1) {
8612 SDValue Input = Op->getOperand(0);
8613 if (Elt >= 4) {
8614 Input = Op->getOperand(1);
8615 Elt -= 4;
8616 }
8617 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8618 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8619 DAG.getConstant(Elt, dl, MVT::i32));
8620 }
8621 }
8622
8623 // Nothing interesting found, just return
8624 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8625 return SDValue();
8626
8627 // The other parts need to be built with the old shuffle vector, cast to a
8628 // v4i32 and extract_vector_elts
8629 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8630 SmallVector<int, 16> NewShuffleMask;
8631 for (int Part = 0; Part < 4; ++Part)
8632 for (int i = 0; i < QuarterSize; i++)
8633 NewShuffleMask.push_back(
8634 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8635 SDValue NewShuffle = DAG.getVectorShuffle(
8636 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8637 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8638
8639 for (int Part = 0; Part < 4; ++Part)
8640 if (!Parts[Part])
8641 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8642 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8643 }
8644 // Build a vector out of the various parts and bitcast it back to the original
8645 // type.
8646 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8647 return DAG.getBitcast(VT, NewVec);
8648}
8649
8651 ArrayRef<int> ShuffleMask,
8652 SelectionDAG &DAG) {
8653 SDValue V1 = Op.getOperand(0);
8654 SDValue V2 = Op.getOperand(1);
8655 EVT VT = Op.getValueType();
8656 unsigned NumElts = VT.getVectorNumElements();
8657
8658 // An One-Off Identity mask is one that is mostly an identity mask from as
8659 // single source but contains a single element out-of-place, either from a
8660 // different vector or from another position in the same vector. As opposed to
8661 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8662 // pair directly.
8663 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8664 int &OffElement) {
8665 OffElement = -1;
8666 int NonUndef = 0;
8667 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8668 if (Mask[i] == -1)
8669 continue;
8670 NonUndef++;
8671 if (Mask[i] != i + BaseOffset) {
8672 if (OffElement == -1)
8673 OffElement = i;
8674 else
8675 return false;
8676 }
8677 }
8678 return NonUndef > 2 && OffElement != -1;
8679 };
8680 int OffElement;
8681 SDValue VInput;
8682 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8683 VInput = V1;
8684 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8685 VInput = V2;
8686 else
8687 return SDValue();
8688
8689 SDLoc dl(Op);
8690 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8691 ? MVT::i32
8692 : VT.getScalarType();
8693 SDValue Elt = DAG.getNode(
8694 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8695 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8696 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8697 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8698 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8699}
8700
8702 const ARMSubtarget *ST) {
8703 SDValue V1 = Op.getOperand(0);
8704 SDValue V2 = Op.getOperand(1);
8705 SDLoc dl(Op);
8706 EVT VT = Op.getValueType();
8708 unsigned EltSize = VT.getScalarSizeInBits();
8709
8710 if (ST->hasMVEIntegerOps() && EltSize == 1)
8711 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8712
8713 // Convert shuffles that are directly supported on NEON to target-specific
8714 // DAG nodes, instead of keeping them as shuffles and matching them again
8715 // during code selection. This is more efficient and avoids the possibility
8716 // of inconsistencies between legalization and selection.
8717 // FIXME: floating-point vectors should be canonicalized to integer vectors
8718 // of the same time so that they get CSEd properly.
8719 ArrayRef<int> ShuffleMask = SVN->getMask();
8720
8721 if (EltSize <= 32) {
8722 if (SVN->isSplat()) {
8723 int Lane = SVN->getSplatIndex();
8724 // If this is undef splat, generate it via "just" vdup, if possible.
8725 if (Lane == -1) Lane = 0;
8726
8727 // Test if V1 is a SCALAR_TO_VECTOR.
8728 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8729 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8730 }
8731 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8732 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8733 // reaches it).
8734 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8736 bool IsScalarToVector = true;
8737 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8738 if (!V1.getOperand(i).isUndef()) {
8739 IsScalarToVector = false;
8740 break;
8741 }
8742 if (IsScalarToVector)
8743 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8744 }
8745 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8746 DAG.getConstant(Lane, dl, MVT::i32));
8747 }
8748
8749 bool ReverseVEXT = false;
8750 unsigned Imm = 0;
8751 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8752 if (ReverseVEXT)
8753 std::swap(V1, V2);
8754 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8755 DAG.getConstant(Imm, dl, MVT::i32));
8756 }
8757
8758 if (isVREVMask(ShuffleMask, VT, 64))
8759 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8760 if (isVREVMask(ShuffleMask, VT, 32))
8761 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8762 if (isVREVMask(ShuffleMask, VT, 16))
8763 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8764
8765 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8766 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8767 DAG.getConstant(Imm, dl, MVT::i32));
8768 }
8769
8770 // Check for Neon shuffles that modify both input vectors in place.
8771 // If both results are used, i.e., if there are two shuffles with the same
8772 // source operands and with masks corresponding to both results of one of
8773 // these operations, DAG memoization will ensure that a single node is
8774 // used for both shuffles.
8775 unsigned WhichResult = 0;
8776 bool isV_UNDEF = false;
8777 if (ST->hasNEON()) {
8778 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8779 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8780 if (isV_UNDEF)
8781 V2 = V1;
8782 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8783 .getValue(WhichResult);
8784 }
8785 }
8786 if (ST->hasMVEIntegerOps()) {
8787 if (isVMOVNMask(ShuffleMask, VT, false, false))
8788 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8789 DAG.getConstant(0, dl, MVT::i32));
8790 if (isVMOVNMask(ShuffleMask, VT, true, false))
8791 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8792 DAG.getConstant(1, dl, MVT::i32));
8793 if (isVMOVNMask(ShuffleMask, VT, true, true))
8794 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8795 DAG.getConstant(1, dl, MVT::i32));
8796 }
8797
8798 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8799 // shuffles that produce a result larger than their operands with:
8800 // shuffle(concat(v1, undef), concat(v2, undef))
8801 // ->
8802 // shuffle(concat(v1, v2), undef)
8803 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8804 //
8805 // This is useful in the general case, but there are special cases where
8806 // native shuffles produce larger results: the two-result ops.
8807 //
8808 // Look through the concat when lowering them:
8809 // shuffle(concat(v1, v2), undef)
8810 // ->
8811 // concat(VZIP(v1, v2):0, :1)
8812 //
8813 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8814 SDValue SubV1 = V1->getOperand(0);
8815 SDValue SubV2 = V1->getOperand(1);
8816 EVT SubVT = SubV1.getValueType();
8817
8818 // We expect these to have been canonicalized to -1.
8819 assert(llvm::all_of(ShuffleMask, [&](int i) {
8820 return i < (int)VT.getVectorNumElements();
8821 }) && "Unexpected shuffle index into UNDEF operand!");
8822
8823 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8824 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8825 if (isV_UNDEF)
8826 SubV2 = SubV1;
8827 assert((WhichResult == 0) &&
8828 "In-place shuffle of concat can only have one result!");
8829 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8830 SubV1, SubV2);
8831 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8832 Res.getValue(1));
8833 }
8834 }
8835 }
8836
8837 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8838 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8839 return V;
8840
8841 for (bool Top : {false, true}) {
8842 for (bool SingleSource : {false, true}) {
8843 if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8844 MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8845 MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8846 SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8847 SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8848 SingleSource ? V1 : V2);
8849 if (Top) {
8850 SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8851 Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8852 Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8853 }
8854 return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
8855 }
8856 }
8857 }
8858 }
8859
8860 // If the shuffle is not directly supported and it has 4 elements, use
8861 // the PerfectShuffle-generated table to synthesize it from other shuffles.
8862 unsigned NumElts = VT.getVectorNumElements();
8863 if (NumElts == 4) {
8864 unsigned PFIndexes[4];
8865 for (unsigned i = 0; i != 4; ++i) {
8866 if (ShuffleMask[i] < 0)
8867 PFIndexes[i] = 8;
8868 else
8869 PFIndexes[i] = ShuffleMask[i];
8870 }
8871
8872 // Compute the index in the perfect shuffle table.
8873 unsigned PFTableIndex =
8874 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8875 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8876 unsigned Cost = (PFEntry >> 30);
8877
8878 if (Cost <= 4) {
8879 if (ST->hasNEON())
8880 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8881 else if (isLegalMVEShuffleOp(PFEntry)) {
8882 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8883 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8884 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
8885 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
8886 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
8887 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8888 }
8889 }
8890 }
8891
8892 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
8893 if (EltSize >= 32) {
8894 // Do the expansion with floating-point types, since that is what the VFP
8895 // registers are defined to use, and since i64 is not legal.
8896 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8897 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8898 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
8899 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
8901 for (unsigned i = 0; i < NumElts; ++i) {
8902 if (ShuffleMask[i] < 0)
8903 Ops.push_back(DAG.getUNDEF(EltVT));
8904 else
8905 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
8906 ShuffleMask[i] < (int)NumElts ? V1 : V2,
8907 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
8908 dl, MVT::i32)));
8909 }
8910 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8911 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8912 }
8913
8914 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8915 isReverseMask(ShuffleMask, VT))
8916 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
8917
8918 if (ST->hasNEON() && VT == MVT::v8i8)
8919 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
8920 return NewOp;
8921
8922 if (ST->hasMVEIntegerOps())
8923 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
8924 return NewOp;
8925
8926 return SDValue();
8927}
8928
8930 const ARMSubtarget *ST) {
8931 EVT VecVT = Op.getOperand(0).getValueType();
8932 SDLoc dl(Op);
8933
8934 assert(ST->hasMVEIntegerOps() &&
8935 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
8936
8937 SDValue Conv =
8938 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
8939 unsigned Lane = Op.getConstantOperandVal(2);
8940 unsigned LaneWidth =
8942 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
8943 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
8944 Op.getOperand(1), DAG.getValueType(MVT::i1));
8945 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
8946 DAG.getConstant(~Mask, dl, MVT::i32));
8947 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
8948}
8949
8950SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8951 SelectionDAG &DAG) const {
8952 // INSERT_VECTOR_ELT is legal only for immediate indexes.
8953 SDValue Lane = Op.getOperand(2);
8954 if (!isa<ConstantSDNode>(Lane))
8955 return SDValue();
8956
8957 SDValue Elt = Op.getOperand(1);
8958 EVT EltVT = Elt.getValueType();
8959
8960 if (Subtarget->hasMVEIntegerOps() &&
8961 Op.getValueType().getScalarSizeInBits() == 1)
8962 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
8963
8964 if (getTypeAction(*DAG.getContext(), EltVT) ==
8966 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
8967 // but the type system will try to do that if we don't intervene.
8968 // Reinterpret any such vector-element insertion as one with the
8969 // corresponding integer types.
8970
8971 SDLoc dl(Op);
8972
8973 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
8974 assert(getTypeAction(*DAG.getContext(), IEltVT) !=
8976
8977 SDValue VecIn = Op.getOperand(0);
8978 EVT VecVT = VecIn.getValueType();
8979 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
8980 VecVT.getVectorNumElements());
8981
8982 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
8983 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
8984 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
8985 IVecIn, IElt, Lane);
8986 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
8987 }
8988
8989 return Op;
8990}
8991
8993 const ARMSubtarget *ST) {
8994 EVT VecVT = Op.getOperand(0).getValueType();
8995 SDLoc dl(Op);
8996
8997 assert(ST->hasMVEIntegerOps() &&
8998 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
8999
9000 SDValue Conv =
9001 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9002 unsigned Lane = Op.getConstantOperandVal(1);
9003 unsigned LaneWidth =
9005 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
9006 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
9007 return Shift;
9008}
9009
9011 const ARMSubtarget *ST) {
9012 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
9013 SDValue Lane = Op.getOperand(1);
9014 if (!isa<ConstantSDNode>(Lane))
9015 return SDValue();
9016
9017 SDValue Vec = Op.getOperand(0);
9018 EVT VT = Vec.getValueType();
9019
9020 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9021 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
9022
9023 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
9024 SDLoc dl(Op);
9025 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
9026 }
9027
9028 return Op;
9029}
9030
9032 const ARMSubtarget *ST) {
9033 SDLoc dl(Op);
9034 assert(Op.getValueType().getScalarSizeInBits() == 1 &&
9035 "Unexpected custom CONCAT_VECTORS lowering");
9036 assert(isPowerOf2_32(Op.getNumOperands()) &&
9037 "Unexpected custom CONCAT_VECTORS lowering");
9038 assert(ST->hasMVEIntegerOps() &&
9039 "CONCAT_VECTORS lowering only supported for MVE");
9040
9041 auto ConcatPair = [&](SDValue V1, SDValue V2) {
9042 EVT Op1VT = V1.getValueType();
9043 EVT Op2VT = V2.getValueType();
9044 assert(Op1VT == Op2VT && "Operand types don't match!");
9045 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
9046 "Unexpected i1 concat operations!");
9047 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9048
9049 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9050 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9051
9052 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9053 // promoted to v8i16, etc.
9054 MVT ElType =
9056 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9057
9058 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9059 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
9060 // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller
9061 // ConcatVT.
9062 SDValue ConVec =
9063 DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);
9064 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9065 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9066 }
9067
9068 // Extract the vector elements from Op1 and Op2 one by one and truncate them
9069 // to be the right size for the destination. For example, if Op1 is v4i1
9070 // then the promoted vector is v4i32. The result of concatenation gives a
9071 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9072 // needs truncating to i16 and inserting in the result.
9073 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9074 EVT NewVT = NewV.getValueType();
9075 EVT ConcatVT = ConVec.getValueType();
9076 unsigned ExtScale = 1;
9077 if (NewVT == MVT::v2f64) {
9078 NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
9079 ExtScale = 2;
9080 }
9081 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9082 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9083 DAG.getIntPtrConstant(i * ExtScale, dl));
9084 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9085 DAG.getConstant(j, dl, MVT::i32));
9086 }
9087 return ConVec;
9088 };
9089 unsigned j = 0;
9090 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9091 ConVec = ExtractInto(NewV1, ConVec, j);
9092 ConVec = ExtractInto(NewV2, ConVec, j);
9093
9094 // Now return the result of comparing the subvector with zero, which will
9095 // generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9096 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9097 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9098 };
9099
9100 // Concat each pair of subvectors and pack into the lower half of the array.
9101 SmallVector<SDValue> ConcatOps(Op->ops());
9102 while (ConcatOps.size() > 1) {
9103 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9104 SDValue V1 = ConcatOps[I];
9105 SDValue V2 = ConcatOps[I + 1];
9106 ConcatOps[I / 2] = ConcatPair(V1, V2);
9107 }
9108 ConcatOps.resize(ConcatOps.size() / 2);
9109 }
9110 return ConcatOps[0];
9111}
9112
9114 const ARMSubtarget *ST) {
9115 EVT VT = Op->getValueType(0);
9116 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9117 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9118
9119 // The only time a CONCAT_VECTORS operation can have legal types is when
9120 // two 64-bit vectors are concatenated to a 128-bit vector.
9121 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
9122 "unexpected CONCAT_VECTORS");
9123 SDLoc dl(Op);
9124 SDValue Val = DAG.getUNDEF(MVT::v2f64);
9125 SDValue Op0 = Op.getOperand(0);
9126 SDValue Op1 = Op.getOperand(1);
9127 if (!Op0.isUndef())
9128 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9129 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9130 DAG.getIntPtrConstant(0, dl));
9131 if (!Op1.isUndef())
9132 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9133 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9134 DAG.getIntPtrConstant(1, dl));
9135 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9136}
9137
9139 const ARMSubtarget *ST) {
9140 SDValue V1 = Op.getOperand(0);
9141 SDValue V2 = Op.getOperand(1);
9142 SDLoc dl(Op);
9143 EVT VT = Op.getValueType();
9144 EVT Op1VT = V1.getValueType();
9145 unsigned NumElts = VT.getVectorNumElements();
9146 unsigned Index = V2->getAsZExtVal();
9147
9148 assert(VT.getScalarSizeInBits() == 1 &&
9149 "Unexpected custom EXTRACT_SUBVECTOR lowering");
9150 assert(ST->hasMVEIntegerOps() &&
9151 "EXTRACT_SUBVECTOR lowering only supported for MVE");
9152
9153 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9154
9155 // We now have Op1 promoted to a vector of integers, where v8i1 gets
9156 // promoted to v8i16, etc.
9157
9159
9160 if (NumElts == 2) {
9161 EVT SubVT = MVT::v4i32;
9162 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9163 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9164 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9165 DAG.getIntPtrConstant(i, dl));
9166 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9167 DAG.getConstant(j, dl, MVT::i32));
9168 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9169 DAG.getConstant(j + 1, dl, MVT::i32));
9170 }
9171 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9172 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9173 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9174 }
9175
9176 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9177 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9178 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9179 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9180 DAG.getIntPtrConstant(i, dl));
9181 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9182 DAG.getConstant(j, dl, MVT::i32));
9183 }
9184
9185 // Now return the result of comparing the subvector with zero,
9186 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9187 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9188 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9189}
9190
9191// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9193 const ARMSubtarget *ST) {
9194 assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9195 EVT VT = N->getValueType(0);
9196 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9197 "Expected a vector i1 type!");
9198 SDValue Op = N->getOperand(0);
9199 EVT FromVT = Op.getValueType();
9200 SDLoc DL(N);
9201
9202 SDValue And =
9203 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9204 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9205 DAG.getCondCode(ISD::SETNE));
9206}
9207
9209 const ARMSubtarget *Subtarget) {
9210 if (!Subtarget->hasMVEIntegerOps())
9211 return SDValue();
9212
9213 EVT ToVT = N->getValueType(0);
9214 if (ToVT.getScalarType() == MVT::i1)
9215 return LowerTruncatei1(N, DAG, Subtarget);
9216
9217 // MVE does not have a single instruction to perform the truncation of a v4i32
9218 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9219 // Most of the instructions in MVE follow the 'Beats' system, where moving
9220 // values from different lanes is usually something that the instructions
9221 // avoid.
9222 //
9223 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9224 // which take a the top/bottom half of a larger lane and extend it (or do the
9225 // opposite, truncating into the top/bottom lane from a larger lane). Note
9226 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9227 // bottom 16bits from each vector lane. This works really well with T/B
9228 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9229 // to move order.
9230 //
9231 // But truncates and sext/zext are always going to be fairly common from llvm.
9232 // We have several options for how to deal with them:
9233 // - Wherever possible combine them into an instruction that makes them
9234 // "free". This includes loads/stores, which can perform the trunc as part
9235 // of the memory operation. Or certain shuffles that can be turned into
9236 // VMOVN/VMOVL.
9237 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9238 // trunc(mul(sext(a), sext(b))) may become
9239 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9240 // this case can use VMULL). This is performed in the
9241 // MVELaneInterleavingPass.
9242 // - Otherwise we have an option. By default we would expand the
9243 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9244 // registers. One for each vector lane in the vector. This can obviously be
9245 // very expensive.
9246 // - The other option is to use the fact that loads/store can extend/truncate
9247 // to turn a trunc into two truncating stack stores and a stack reload. This
9248 // becomes 3 back-to-back memory operations, but at least that is less than
9249 // all the insert/extracts.
9250 //
9251 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9252 // are either optimized where they can be, or eventually lowered into stack
9253 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9254 // two early, where other instructions would be better, and stops us from
9255 // having to reconstruct multiple buildvector shuffles into loads/stores.
9256 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9257 return SDValue();
9258 EVT FromVT = N->getOperand(0).getValueType();
9259 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9260 return SDValue();
9261
9262 SDValue Lo, Hi;
9263 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9264 SDLoc DL(N);
9265 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9266}
9267
9269 const ARMSubtarget *Subtarget) {
9270 if (!Subtarget->hasMVEIntegerOps())
9271 return SDValue();
9272
9273 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9274
9275 EVT ToVT = N->getValueType(0);
9276 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9277 return SDValue();
9278 SDValue Op = N->getOperand(0);
9279 EVT FromVT = Op.getValueType();
9280 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9281 return SDValue();
9282
9283 SDLoc DL(N);
9284 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9285 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9286 ExtVT = MVT::v8i16;
9287
9288 unsigned Opcode =
9290 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9291 SDValue Ext1 = Ext.getValue(1);
9292
9293 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9294 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9295 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9296 }
9297
9298 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9299}
9300
9301/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9302/// element has been zero/sign-extended, depending on the isSigned parameter,
9303/// from an integer type half its size.
9305 bool isSigned) {
9306 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9307 EVT VT = N->getValueType(0);
9308 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9309 SDNode *BVN = N->getOperand(0).getNode();
9310 if (BVN->getValueType(0) != MVT::v4i32 ||
9311 BVN->getOpcode() != ISD::BUILD_VECTOR)
9312 return false;
9313 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9314 unsigned HiElt = 1 - LoElt;
9319 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9320 return false;
9321 if (isSigned) {
9322 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9323 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9324 return true;
9325 } else {
9326 if (Hi0->isZero() && Hi1->isZero())
9327 return true;
9328 }
9329 return false;
9330 }
9331
9332 if (N->getOpcode() != ISD::BUILD_VECTOR)
9333 return false;
9334
9335 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9336 SDNode *Elt = N->getOperand(i).getNode();
9338 unsigned EltSize = VT.getScalarSizeInBits();
9339 unsigned HalfSize = EltSize / 2;
9340 if (isSigned) {
9341 if (!isIntN(HalfSize, C->getSExtValue()))
9342 return false;
9343 } else {
9344 if (!isUIntN(HalfSize, C->getZExtValue()))
9345 return false;
9346 }
9347 continue;
9348 }
9349 return false;
9350 }
9351
9352 return true;
9353}
9354
9355/// isSignExtended - Check if a node is a vector value that is sign-extended
9356/// or a constant BUILD_VECTOR with sign-extended elements.
9358 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9359 return true;
9360 if (isExtendedBUILD_VECTOR(N, DAG, true))
9361 return true;
9362 return false;
9363}
9364
9365/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9366/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9368 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9370 return true;
9371 if (isExtendedBUILD_VECTOR(N, DAG, false))
9372 return true;
9373 return false;
9374}
9375
9376static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9377 if (OrigVT.getSizeInBits() >= 64)
9378 return OrigVT;
9379
9380 assert(OrigVT.isSimple() && "Expecting a simple value type");
9381
9382 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9383 switch (OrigSimpleTy) {
9384 default: llvm_unreachable("Unexpected Vector Type");
9385 case MVT::v2i8:
9386 case MVT::v2i16:
9387 return MVT::v2i32;
9388 case MVT::v4i8:
9389 return MVT::v4i16;
9390 }
9391}
9392
9393/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9394/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9395/// We insert the required extension here to get the vector to fill a D register.
9397 const EVT &OrigTy,
9398 const EVT &ExtTy,
9399 unsigned ExtOpcode) {
9400 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9401 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9402 // 64-bits we need to insert a new extension so that it will be 64-bits.
9403 assert(ExtTy.is128BitVector() && "Unexpected extension size");
9404 if (OrigTy.getSizeInBits() >= 64)
9405 return N;
9406
9407 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9408 EVT NewVT = getExtensionTo64Bits(OrigTy);
9409
9410 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9411}
9412
9413/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9414/// does not do any sign/zero extension. If the original vector is less
9415/// than 64 bits, an appropriate extension will be added after the load to
9416/// reach a total size of 64 bits. We have to add the extension separately
9417/// because ARM does not have a sign/zero extending load for vectors.
9419 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9420
9421 // The load already has the right type.
9422 if (ExtendedTy == LD->getMemoryVT())
9423 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9424 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9425 LD->getMemOperand()->getFlags());
9426
9427 // We need to create a zextload/sextload. We cannot just create a load
9428 // followed by a zext/zext node because LowerMUL is also run during normal
9429 // operation legalization where we can't create illegal types.
9430 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9431 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9432 LD->getMemoryVT(), LD->getAlign(),
9433 LD->getMemOperand()->getFlags());
9434}
9435
9436/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9437/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9438/// the unextended value. The unextended vector should be 64 bits so that it can
9439/// be used as an operand to a VMULL instruction. If the original vector size
9440/// before extension is less than 64 bits we add a an extension to resize
9441/// the vector to 64 bits.
9443 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9444 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9445 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9446 N->getOperand(0)->getValueType(0),
9447 N->getValueType(0),
9448 N->getOpcode());
9449
9450 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9451 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
9452 "Expected extending load");
9453
9454 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9455 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9456 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9457 SDValue extLoad =
9458 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9459 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9460
9461 return newLoad;
9462 }
9463
9464 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9465 // have been legalized as a BITCAST from v4i32.
9466 if (N->getOpcode() == ISD::BITCAST) {
9467 SDNode *BVN = N->getOperand(0).getNode();
9469 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9470 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9471 return DAG.getBuildVector(
9472 MVT::v2i32, SDLoc(N),
9473 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9474 }
9475 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9476 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9477 EVT VT = N->getValueType(0);
9478 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9479 unsigned NumElts = VT.getVectorNumElements();
9480 MVT TruncVT = MVT::getIntegerVT(EltSize);
9482 SDLoc dl(N);
9483 for (unsigned i = 0; i != NumElts; ++i) {
9484 const APInt &CInt = N->getConstantOperandAPInt(i);
9485 // Element types smaller than 32 bits are not legal, so use i32 elements.
9486 // The values are implicitly truncated so sext vs. zext doesn't matter.
9487 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9488 }
9489 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9490}
9491
9492static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9493 unsigned Opcode = N->getOpcode();
9494 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9495 SDNode *N0 = N->getOperand(0).getNode();
9496 SDNode *N1 = N->getOperand(1).getNode();
9497 return N0->hasOneUse() && N1->hasOneUse() &&
9498 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9499 }
9500 return false;
9501}
9502
9503static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9504 unsigned Opcode = N->getOpcode();
9505 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9506 SDNode *N0 = N->getOperand(0).getNode();
9507 SDNode *N1 = N->getOperand(1).getNode();
9508 return N0->hasOneUse() && N1->hasOneUse() &&
9509 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9510 }
9511 return false;
9512}
9513
9515 // Multiplications are only custom-lowered for 128-bit vectors so that
9516 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9517 EVT VT = Op.getValueType();
9518 assert(VT.is128BitVector() && VT.isInteger() &&
9519 "unexpected type for custom-lowering ISD::MUL");
9520 SDNode *N0 = Op.getOperand(0).getNode();
9521 SDNode *N1 = Op.getOperand(1).getNode();
9522 unsigned NewOpc = 0;
9523 bool isMLA = false;
9524 bool isN0SExt = isSignExtended(N0, DAG);
9525 bool isN1SExt = isSignExtended(N1, DAG);
9526 if (isN0SExt && isN1SExt)
9527 NewOpc = ARMISD::VMULLs;
9528 else {
9529 bool isN0ZExt = isZeroExtended(N0, DAG);
9530 bool isN1ZExt = isZeroExtended(N1, DAG);
9531 if (isN0ZExt && isN1ZExt)
9532 NewOpc = ARMISD::VMULLu;
9533 else if (isN1SExt || isN1ZExt) {
9534 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9535 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9536 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9537 NewOpc = ARMISD::VMULLs;
9538 isMLA = true;
9539 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9540 NewOpc = ARMISD::VMULLu;
9541 isMLA = true;
9542 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9543 std::swap(N0, N1);
9544 NewOpc = ARMISD::VMULLu;
9545 isMLA = true;
9546 }
9547 }
9548
9549 if (!NewOpc) {
9550 if (VT == MVT::v2i64)
9551 // Fall through to expand this. It is not legal.
9552 return SDValue();
9553 else
9554 // Other vector multiplications are legal.
9555 return Op;
9556 }
9557 }
9558
9559 // Legalize to a VMULL instruction.
9560 SDLoc DL(Op);
9561 SDValue Op0;
9562 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9563 if (!isMLA) {
9564 Op0 = SkipExtensionForVMULL(N0, DAG);
9566 Op1.getValueType().is64BitVector() &&
9567 "unexpected types for extended operands to VMULL");
9568 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9569 }
9570
9571 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9572 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9573 // vmull q0, d4, d6
9574 // vmlal q0, d5, d6
9575 // is faster than
9576 // vaddl q0, d4, d5
9577 // vmovl q1, d6
9578 // vmul q0, q0, q1
9579 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9580 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9581 EVT Op1VT = Op1.getValueType();
9582 return DAG.getNode(N0->getOpcode(), DL, VT,
9583 DAG.getNode(NewOpc, DL, VT,
9584 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9585 DAG.getNode(NewOpc, DL, VT,
9586 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9587}
9588
9590 SelectionDAG &DAG) {
9591 // TODO: Should this propagate fast-math-flags?
9592
9593 // Convert to float
9594 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9595 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9596 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9597 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9598 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9599 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9600 // Get reciprocal estimate.
9601 // float4 recip = vrecpeq_f32(yf);
9602 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9603 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9604 Y);
9605 // Because char has a smaller range than uchar, we can actually get away
9606 // without any newton steps. This requires that we use a weird bias
9607 // of 0xb000, however (again, this has been exhaustively tested).
9608 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9609 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9610 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9611 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9612 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9613 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9614 // Convert back to short.
9615 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9616 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9617 return X;
9618}
9619
9621 SelectionDAG &DAG) {
9622 // TODO: Should this propagate fast-math-flags?
9623
9624 SDValue N2;
9625 // Convert to float.
9626 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9627 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9628 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9629 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9630 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9631 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9632
9633 // Use reciprocal estimate and one refinement step.
9634 // float4 recip = vrecpeq_f32(yf);
9635 // recip *= vrecpsq_f32(yf, recip);
9636 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9637 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9638 N1);
9639 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9640 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9641 N1, N2);
9642 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9643 // Because short has a smaller range than ushort, we can actually get away
9644 // with only a single newton step. This requires that we use a weird bias
9645 // of 89, however (again, this has been exhaustively tested).
9646 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9647 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9648 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9649 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9650 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9651 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9652 // Convert back to integer and return.
9653 // return vmovn_s32(vcvt_s32_f32(result));
9654 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9655 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9656 return N0;
9657}
9658
9660 const ARMSubtarget *ST) {
9661 EVT VT = Op.getValueType();
9662 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9663 "unexpected type for custom-lowering ISD::SDIV");
9664
9665 SDLoc dl(Op);
9666 SDValue N0 = Op.getOperand(0);
9667 SDValue N1 = Op.getOperand(1);
9668 SDValue N2, N3;
9669
9670 if (VT == MVT::v8i8) {
9671 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9672 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9673
9674 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9675 DAG.getIntPtrConstant(4, dl));
9676 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9677 DAG.getIntPtrConstant(4, dl));
9678 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9679 DAG.getIntPtrConstant(0, dl));
9680 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9681 DAG.getIntPtrConstant(0, dl));
9682
9683 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9684 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9685
9686 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9687 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9688
9689 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9690 return N0;
9691 }
9692 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9693}
9694
9696 const ARMSubtarget *ST) {
9697 // TODO: Should this propagate fast-math-flags?
9698 EVT VT = Op.getValueType();
9699 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9700 "unexpected type for custom-lowering ISD::UDIV");
9701
9702 SDLoc dl(Op);
9703 SDValue N0 = Op.getOperand(0);
9704 SDValue N1 = Op.getOperand(1);
9705 SDValue N2, N3;
9706
9707 if (VT == MVT::v8i8) {
9708 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9709 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9710
9711 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9712 DAG.getIntPtrConstant(4, dl));
9713 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9714 DAG.getIntPtrConstant(4, dl));
9715 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9716 DAG.getIntPtrConstant(0, dl));
9717 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9718 DAG.getIntPtrConstant(0, dl));
9719
9720 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9721 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9722
9723 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9724 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9725
9726 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9727 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9728 MVT::i32),
9729 N0);
9730 return N0;
9731 }
9732
9733 // v4i16 sdiv ... Convert to float.
9734 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9735 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9736 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9737 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9738 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9739 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9740
9741 // Use reciprocal estimate and two refinement steps.
9742 // float4 recip = vrecpeq_f32(yf);
9743 // recip *= vrecpsq_f32(yf, recip);
9744 // recip *= vrecpsq_f32(yf, recip);
9745 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9746 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9747 BN1);
9748 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9749 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9750 BN1, N2);
9751 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9752 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9753 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9754 BN1, N2);
9755 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9756 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9757 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9758 // and that it will never cause us to return an answer too large).
9759 // float4 result = as_float4(as_int4(xf*recip) + 2);
9760 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9761 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9762 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9763 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9764 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9765 // Convert back to integer and return.
9766 // return vmovn_u32(vcvt_s32_f32(result));
9767 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9768 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9769 return N0;
9770}
9771
9773 SDNode *N = Op.getNode();
9774 EVT VT = N->getValueType(0);
9775 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9776
9777 SDValue Carry = Op.getOperand(2);
9778
9779 SDLoc DL(Op);
9780
9781 SDValue Result;
9782 if (Op.getOpcode() == ISD::UADDO_CARRY) {
9783 // This converts the boolean value carry into the carry flag.
9784 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9785
9786 // Do the addition proper using the carry flag we wanted.
9787 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9788 Op.getOperand(1), Carry);
9789
9790 // Now convert the carry flag into a boolean value.
9791 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9792 } else {
9793 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
9794 // have to invert the carry first.
9795 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9796 DAG.getConstant(1, DL, MVT::i32), Carry);
9797 // This converts the boolean value carry into the carry flag.
9798 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9799
9800 // Do the subtraction proper using the carry flag we wanted.
9801 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9802 Op.getOperand(1), Carry);
9803
9804 // Now convert the carry flag into a boolean value.
9805 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9806 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9807 // by ISD::USUBO_CARRY, so compute 1 - C.
9808 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9809 DAG.getConstant(1, DL, MVT::i32), Carry);
9810 }
9811
9812 // Return both values.
9813 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9814}
9815
9816SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9817 assert(Subtarget->isTargetDarwin());
9818
9819 // For iOS, we want to call an alternative entry point: __sincos_stret,
9820 // return values are passed via sret.
9821 SDLoc dl(Op);
9822 SDValue Arg = Op.getOperand(0);
9823 EVT ArgVT = Arg.getValueType();
9824 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9825 auto PtrVT = getPointerTy(DAG.getDataLayout());
9826
9827 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9828
9829 // Pair of floats / doubles used to pass the result.
9830 Type *RetTy = StructType::get(ArgTy, ArgTy);
9831 auto &DL = DAG.getDataLayout();
9832
9834 bool ShouldUseSRet = getTM().isAPCS_ABI();
9835 SDValue SRet;
9836 if (ShouldUseSRet) {
9837 // Create stack object for sret.
9838 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9839 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9840 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9841 SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL));
9842
9844 Entry.IsSExt = false;
9845 Entry.IsZExt = false;
9846 Entry.IsSRet = true;
9847 Args.push_back(Entry);
9848 RetTy = Type::getVoidTy(*DAG.getContext());
9849 }
9850
9851 Args.emplace_back(Arg, ArgTy);
9852
9853 RTLIB::Libcall LC =
9854 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
9855 const char *LibcallName = getLibcallName(LC);
9857 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
9858
9859 TargetLowering::CallLoweringInfo CLI(DAG);
9860 CLI.setDebugLoc(dl)
9861 .setChain(DAG.getEntryNode())
9862 .setCallee(CC, RetTy, Callee, std::move(Args))
9863 .setDiscardResult(ShouldUseSRet);
9864 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9865
9866 if (!ShouldUseSRet)
9867 return CallResult.first;
9868
9869 SDValue LoadSin =
9870 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
9871
9872 // Address of cos field.
9873 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
9874 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
9875 SDValue LoadCos =
9876 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
9877
9878 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
9879 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
9880 LoadSin.getValue(0), LoadCos.getValue(0));
9881}
9882
9883SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
9884 bool Signed,
9885 SDValue &Chain) const {
9886 EVT VT = Op.getValueType();
9887 assert((VT == MVT::i32 || VT == MVT::i64) &&
9888 "unexpected type for custom lowering DIV");
9889 SDLoc dl(Op);
9890
9891 const auto &DL = DAG.getDataLayout();
9892 RTLIB::Libcall LC;
9893 if (Signed)
9894 LC = VT == MVT::i32 ? RTLIB::SDIVREM_I32 : RTLIB::SDIVREM_I64;
9895 else
9896 LC = VT == MVT::i32 ? RTLIB::UDIVREM_I32 : RTLIB::UDIVREM_I64;
9897
9898 const char *Name = getLibcallName(LC);
9899 SDValue ES = DAG.getExternalSymbol(Name, getPointerTy(DL));
9900
9902
9903 for (auto AI : {1, 0}) {
9904 SDValue Operand = Op.getOperand(AI);
9905 Args.emplace_back(Operand,
9906 Operand.getValueType().getTypeForEVT(*DAG.getContext()));
9907 }
9908
9909 CallLoweringInfo CLI(DAG);
9910 CLI.setDebugLoc(dl)
9911 .setChain(Chain)
9913 ES, std::move(Args));
9914
9915 return LowerCallTo(CLI).first;
9916}
9917
9918// This is a code size optimisation: return the original SDIV node to
9919// DAGCombiner when we don't want to expand SDIV into a sequence of
9920// instructions, and an empty node otherwise which will cause the
9921// SDIV to be expanded in DAGCombine.
9922SDValue
9923ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
9924 SelectionDAG &DAG,
9925 SmallVectorImpl<SDNode *> &Created) const {
9926 // TODO: Support SREM
9927 if (N->getOpcode() != ISD::SDIV)
9928 return SDValue();
9929
9930 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
9931 const bool MinSize = ST.hasMinSize();
9932 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
9933 : ST.hasDivideInARMMode();
9934
9935 // Don't touch vector types; rewriting this may lead to scalarizing
9936 // the int divs.
9937 if (N->getOperand(0).getValueType().isVector())
9938 return SDValue();
9939
9940 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
9941 // hwdiv support for this to be really profitable.
9942 if (!(MinSize && HasDivide))
9943 return SDValue();
9944
9945 // ARM mode is a bit simpler than Thumb: we can handle large power
9946 // of 2 immediates with 1 mov instruction; no further checks required,
9947 // just return the sdiv node.
9948 if (!ST.isThumb())
9949 return SDValue(N, 0);
9950
9951 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
9952 // and thus lose the code size benefits of a MOVS that requires only 2.
9953 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
9954 // but as it's doing exactly this, it's not worth the trouble to get TTI.
9955 if (Divisor.sgt(128))
9956 return SDValue();
9957
9958 return SDValue(N, 0);
9959}
9960
9961SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
9962 bool Signed) const {
9963 assert(Op.getValueType() == MVT::i32 &&
9964 "unexpected type for custom lowering DIV");
9965 SDLoc dl(Op);
9966
9967 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
9968 DAG.getEntryNode(), Op.getOperand(1));
9969
9970 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
9971}
9972
9974 SDLoc DL(N);
9975 SDValue Op = N->getOperand(1);
9976 if (N->getValueType(0) == MVT::i32)
9977 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
9978 SDValue Lo, Hi;
9979 std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
9980 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
9981 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
9982}
9983
9984void ARMTargetLowering::ExpandDIV_Windows(
9985 SDValue Op, SelectionDAG &DAG, bool Signed,
9987 const auto &DL = DAG.getDataLayout();
9988
9989 assert(Op.getValueType() == MVT::i64 &&
9990 "unexpected type for custom lowering DIV");
9991 SDLoc dl(Op);
9992
9993 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
9994
9995 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
9996
9997 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
9998 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
9999 DAG.getConstant(32, dl, getPointerTy(DL)));
10000 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
10001
10002 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
10003}
10004
10006 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
10007 EVT MemVT = LD->getMemoryVT();
10008 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10009 MemVT == MVT::v16i1) &&
10010 "Expected a predicate type!");
10011 assert(MemVT == Op.getValueType());
10012 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10013 "Expected a non-extending load");
10014 assert(LD->isUnindexed() && "Expected a unindexed load");
10015
10016 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
10017 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
10018 // need to make sure that 8/4/2 bits are actually loaded into the correct
10019 // place, which means loading the value and then shuffling the values into
10020 // the bottom bits of the predicate.
10021 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
10022 // for BE).
10023 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
10024 // a natural VMSR(load), so needs to be reversed.
10025
10026 SDLoc dl(Op);
10027 SDValue Load = DAG.getExtLoad(
10028 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10030 LD->getMemOperand());
10031 SDValue Val = Load;
10032 if (DAG.getDataLayout().isBigEndian())
10033 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
10034 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10035 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10036 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10037 if (MemVT != MVT::v16i1)
10038 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10039 DAG.getConstant(0, dl, MVT::i32));
10040 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10041}
10042
10043void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10044 SelectionDAG &DAG) const {
10045 LoadSDNode *LD = cast<LoadSDNode>(N);
10046 EVT MemVT = LD->getMemoryVT();
10047 assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10048
10049 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10050 !Subtarget->isThumb1Only() && LD->isVolatile() &&
10051 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10052 SDLoc dl(N);
10054 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10055 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10056 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10057 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10058 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10059 Results.append({Pair, Result.getValue(2)});
10060 }
10061}
10062
10064 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10065 EVT MemVT = ST->getMemoryVT();
10066 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10067 MemVT == MVT::v16i1) &&
10068 "Expected a predicate type!");
10069 assert(MemVT == ST->getValue().getValueType());
10070 assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10071 assert(ST->isUnindexed() && "Expected a unindexed store");
10072
10073 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10074 // top bits unset and a scalar store.
10075 SDLoc dl(Op);
10076 SDValue Build = ST->getValue();
10077 if (MemVT != MVT::v16i1) {
10079 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10080 unsigned Elt = DAG.getDataLayout().isBigEndian()
10081 ? MemVT.getVectorNumElements() - I - 1
10082 : I;
10083 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10084 DAG.getConstant(Elt, dl, MVT::i32)));
10085 }
10086 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10087 Ops.push_back(DAG.getUNDEF(MVT::i32));
10088 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10089 }
10090 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10091 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10092 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10093 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10094 DAG.getConstant(16, dl, MVT::i32));
10095 return DAG.getTruncStore(
10096 ST->getChain(), dl, GRP, ST->getBasePtr(),
10098 ST->getMemOperand());
10099}
10100
10102 const ARMSubtarget *Subtarget) {
10103 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10104 EVT MemVT = ST->getMemoryVT();
10105 assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10106
10107 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10108 !Subtarget->isThumb1Only() && ST->isVolatile() &&
10109 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10110 SDNode *N = Op.getNode();
10111 SDLoc dl(N);
10112
10113 SDValue Lo = DAG.getNode(
10114 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10115 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10116 MVT::i32));
10117 SDValue Hi = DAG.getNode(
10118 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10119 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10120 MVT::i32));
10121
10122 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
10123 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10124 MemVT, ST->getMemOperand());
10125 } else if (Subtarget->hasMVEIntegerOps() &&
10126 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10127 MemVT == MVT::v16i1))) {
10128 return LowerPredicateStore(Op, DAG);
10129 }
10130
10131 return SDValue();
10132}
10133
10134static bool isZeroVector(SDValue N) {
10135 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10136 (N->getOpcode() == ARMISD::VMOVIMM &&
10137 isNullConstant(N->getOperand(0))));
10138}
10139
10142 MVT VT = Op.getSimpleValueType();
10143 SDValue Mask = N->getMask();
10144 SDValue PassThru = N->getPassThru();
10145 SDLoc dl(Op);
10146
10147 if (isZeroVector(PassThru))
10148 return Op;
10149
10150 // MVE Masked loads use zero as the passthru value. Here we convert undef to
10151 // zero too, and other values are lowered to a select.
10152 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10153 DAG.getTargetConstant(0, dl, MVT::i32));
10154 SDValue NewLoad = DAG.getMaskedLoad(
10155 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10156 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10157 N->getExtensionType(), N->isExpandingLoad());
10158 SDValue Combo = NewLoad;
10159 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10160 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10161 isZeroVector(PassThru->getOperand(0));
10162 if (!PassThru.isUndef() && !PassThruIsCastZero)
10163 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10164 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10165}
10166
10168 const ARMSubtarget *ST) {
10169 if (!ST->hasMVEIntegerOps())
10170 return SDValue();
10171
10172 SDLoc dl(Op);
10173 unsigned BaseOpcode = 0;
10174 switch (Op->getOpcode()) {
10175 default: llvm_unreachable("Expected VECREDUCE opcode");
10176 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10177 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10178 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10179 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10180 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10181 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10182 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10183 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10184 }
10185
10186 SDValue Op0 = Op->getOperand(0);
10187 EVT VT = Op0.getValueType();
10188 EVT EltVT = VT.getVectorElementType();
10189 unsigned NumElts = VT.getVectorNumElements();
10190 unsigned NumActiveLanes = NumElts;
10191
10192 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10193 NumActiveLanes == 2) &&
10194 "Only expected a power 2 vector size");
10195
10196 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10197 // allows us to easily extract vector elements from the lanes.
10198 while (NumActiveLanes > 4) {
10199 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10200 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10201 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10202 NumActiveLanes /= 2;
10203 }
10204
10205 SDValue Res;
10206 if (NumActiveLanes == 4) {
10207 // The remaining 4 elements are summed sequentially
10208 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10209 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10210 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10211 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10212 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10213 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10214 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10215 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10216 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10217 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10218 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10219 } else {
10220 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10221 DAG.getConstant(0, dl, MVT::i32));
10222 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10223 DAG.getConstant(1, dl, MVT::i32));
10224 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10225 }
10226
10227 // Result type may be wider than element type.
10228 if (EltVT != Op->getValueType(0))
10229 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10230 return Res;
10231}
10232
10234 const ARMSubtarget *ST) {
10235 if (!ST->hasMVEFloatOps())
10236 return SDValue();
10237 return LowerVecReduce(Op, DAG, ST);
10238}
10239
10241 const ARMSubtarget *ST) {
10242 if (!ST->hasNEON())
10243 return SDValue();
10244
10245 SDLoc dl(Op);
10246 SDValue Op0 = Op->getOperand(0);
10247 EVT VT = Op0.getValueType();
10248 EVT EltVT = VT.getVectorElementType();
10249
10250 unsigned PairwiseIntrinsic = 0;
10251 switch (Op->getOpcode()) {
10252 default:
10253 llvm_unreachable("Expected VECREDUCE opcode");
10254 case ISD::VECREDUCE_UMIN:
10255 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
10256 break;
10257 case ISD::VECREDUCE_UMAX:
10258 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
10259 break;
10260 case ISD::VECREDUCE_SMIN:
10261 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
10262 break;
10263 case ISD::VECREDUCE_SMAX:
10264 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10265 break;
10266 }
10267 SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
10268
10269 unsigned NumElts = VT.getVectorNumElements();
10270 unsigned NumActiveLanes = NumElts;
10271
10272 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10273 NumActiveLanes == 2) &&
10274 "Only expected a power 2 vector size");
10275
10276 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10277 if (VT.is128BitVector()) {
10278 SDValue Lo, Hi;
10279 std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
10280 VT = Lo.getValueType();
10281 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
10282 NumActiveLanes /= 2;
10283 }
10284
10285 // Use pairwise reductions until one lane remains
10286 while (NumActiveLanes > 1) {
10287 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
10288 NumActiveLanes /= 2;
10289 }
10290
10291 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10292 DAG.getConstant(0, dl, MVT::i32));
10293
10294 // Result type may be wider than element type.
10295 if (EltVT != Op.getValueType()) {
10296 unsigned Extend = 0;
10297 switch (Op->getOpcode()) {
10298 default:
10299 llvm_unreachable("Expected VECREDUCE opcode");
10300 case ISD::VECREDUCE_UMIN:
10301 case ISD::VECREDUCE_UMAX:
10302 Extend = ISD::ZERO_EXTEND;
10303 break;
10304 case ISD::VECREDUCE_SMIN:
10305 case ISD::VECREDUCE_SMAX:
10306 Extend = ISD::SIGN_EXTEND;
10307 break;
10308 }
10309 Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
10310 }
10311 return Res;
10312}
10313
10315 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10316 // Acquire/Release load/store is not legal for targets without a dmb or
10317 // equivalent available.
10318 return SDValue();
10319
10320 // Monotonic load/store is legal for all targets.
10321 return Op;
10322}
10323
10326 SelectionDAG &DAG,
10327 const ARMSubtarget *Subtarget) {
10328 SDLoc DL(N);
10329 // Under Power Management extensions, the cycle-count is:
10330 // mrc p15, #0, <Rt>, c9, c13, #0
10331 SDValue Ops[] = { N->getOperand(0), // Chain
10332 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10333 DAG.getTargetConstant(15, DL, MVT::i32),
10334 DAG.getTargetConstant(0, DL, MVT::i32),
10335 DAG.getTargetConstant(9, DL, MVT::i32),
10336 DAG.getTargetConstant(13, DL, MVT::i32),
10337 DAG.getTargetConstant(0, DL, MVT::i32)
10338 };
10339
10340 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10341 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10342 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10343 DAG.getConstant(0, DL, MVT::i32)));
10344 Results.push_back(Cycles32.getValue(1));
10345}
10346
10348 SDValue V1) {
10349 SDLoc dl(V0.getNode());
10350 SDValue RegClass =
10351 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10352 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10353 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10354 const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1};
10355 return SDValue(
10356 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10357}
10358
10360 SDLoc dl(V.getNode());
10361 auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10362 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10363 if (isBigEndian)
10364 std::swap(VLo, VHi);
10365 return createGPRPairNode2xi32(DAG, VLo, VHi);
10366}
10367
10370 SelectionDAG &DAG) {
10371 assert(N->getValueType(0) == MVT::i64 &&
10372 "AtomicCmpSwap on types less than 64 should be legal");
10373 SDValue Ops[] = {
10374 createGPRPairNode2xi32(DAG, N->getOperand(1),
10375 DAG.getUNDEF(MVT::i32)), // pointer, temp
10376 createGPRPairNodei64(DAG, N->getOperand(2)), // expected
10377 createGPRPairNodei64(DAG, N->getOperand(3)), // new
10378 N->getOperand(0), // chain in
10379 };
10380 SDNode *CmpSwap = DAG.getMachineNode(
10381 ARM::CMP_SWAP_64, SDLoc(N),
10382 DAG.getVTList(MVT::Untyped, MVT::Untyped, MVT::Other), Ops);
10383
10384 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10385 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10386
10387 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10388
10389 SDValue Lo =
10390 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10391 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10392 SDValue Hi =
10393 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10394 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10395 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10396 Results.push_back(SDValue(CmpSwap, 2));
10397}
10398
10399SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10400 SDLoc dl(Op);
10401 EVT VT = Op.getValueType();
10402 SDValue Chain = Op.getOperand(0);
10403 SDValue LHS = Op.getOperand(1);
10404 SDValue RHS = Op.getOperand(2);
10405 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10406 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10407
10408 // If we don't have instructions of this float type then soften to a libcall
10409 // and use SETCC instead.
10410 if (isUnsupportedFloatingType(LHS.getValueType())) {
10411 softenSetCCOperands(DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS,
10412 Chain, IsSignaling);
10413 if (!RHS.getNode()) {
10414 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10415 CC = ISD::SETNE;
10416 }
10417 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10418 DAG.getCondCode(CC));
10419 return DAG.getMergeValues({Result, Chain}, dl);
10420 }
10421
10422 ARMCC::CondCodes CondCode, CondCode2;
10423 FPCCToARMCC(CC, CondCode, CondCode2);
10424
10425 SDValue True = DAG.getConstant(1, dl, VT);
10426 SDValue False = DAG.getConstant(0, dl, VT);
10427 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10428 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10429 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG);
10430 if (CondCode2 != ARMCC::AL) {
10431 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10432 Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG);
10433 }
10434 return DAG.getMergeValues({Result, Chain}, dl);
10435}
10436
10437SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10438 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10439
10440 EVT VT = getPointerTy(DAG.getDataLayout());
10441 int FI = MFI.CreateFixedObject(4, 0, false);
10442 return DAG.getFrameIndex(FI, VT);
10443}
10444
10445SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
10446 SelectionDAG &DAG) const {
10447 SDLoc DL(Op);
10448 MakeLibCallOptions CallOptions;
10449 MVT SVT = Op.getOperand(0).getSimpleValueType();
10450 RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, MVT::bf16);
10451 SDValue Res =
10452 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
10453 return DAG.getBitcast(MVT::i32, Res);
10454}
10455
10456SDValue ARMTargetLowering::LowerCMP(SDValue Op, SelectionDAG &DAG) const {
10457 SDLoc dl(Op);
10458 SDValue LHS = Op.getOperand(0);
10459 SDValue RHS = Op.getOperand(1);
10460
10461 // Determine if this is signed or unsigned comparison
10462 bool IsSigned = (Op.getOpcode() == ISD::SCMP);
10463
10464 // Special case for Thumb1 UCMP only
10465 if (!IsSigned && Subtarget->isThumb1Only()) {
10466 // For Thumb unsigned comparison, use this sequence:
10467 // subs r2, r0, r1 ; r2 = LHS - RHS, sets flags
10468 // sbc r2, r2 ; r2 = r2 - r2 - !carry
10469 // cmp r1, r0 ; compare RHS with LHS
10470 // sbc r1, r1 ; r1 = r1 - r1 - !carry
10471 // subs r0, r2, r1 ; r0 = r2 - r1 (final result)
10472
10473 // First subtraction: LHS - RHS
10474 SDValue Sub1WithFlags = DAG.getNode(
10475 ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
10476 SDValue Sub1Result = Sub1WithFlags.getValue(0);
10477 SDValue Flags1 = Sub1WithFlags.getValue(1);
10478
10479 // SUBE: Sub1Result - Sub1Result - !carry
10480 // This gives 0 if LHS >= RHS (unsigned), -1 if LHS < RHS (unsigned)
10481 SDValue Sbc1 =
10482 DAG.getNode(ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT),
10483 Sub1Result, Sub1Result, Flags1);
10484 SDValue Sbc1Result = Sbc1.getValue(0);
10485
10486 // Second comparison: RHS vs LHS (reverse comparison)
10487 SDValue CmpFlags = DAG.getNode(ARMISD::CMP, dl, FlagsVT, RHS, LHS);
10488
10489 // SUBE: RHS - RHS - !carry
10490 // This gives 0 if RHS <= LHS (unsigned), -1 if RHS > LHS (unsigned)
10491 SDValue Sbc2 = DAG.getNode(
10492 ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, CmpFlags);
10493 SDValue Sbc2Result = Sbc2.getValue(0);
10494
10495 // Final subtraction: Sbc1Result - Sbc2Result (no flags needed)
10496 SDValue Result =
10497 DAG.getNode(ISD::SUB, dl, MVT::i32, Sbc1Result, Sbc2Result);
10498 if (Op.getValueType() != MVT::i32)
10499 Result = DAG.getSExtOrTrunc(Result, dl, Op.getValueType());
10500
10501 return Result;
10502 }
10503
10504 // For the ARM assembly pattern:
10505 // subs r0, r0, r1 ; subtract RHS from LHS and set flags
10506 // movgt r0, #1 ; if LHS > RHS, set result to 1 (GT for signed, HI for
10507 // unsigned) mvnlt r0, #0 ; if LHS < RHS, set result to -1 (LT for
10508 // signed, LO for unsigned)
10509 // ; if LHS == RHS, result remains 0 from the subs
10510
10511 // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC
10512 unsigned Opcode = ARMISD::SUBC;
10513
10514 // Check if RHS is a subtraction against 0: (0 - X)
10515 if (RHS.getOpcode() == ISD::SUB) {
10516 SDValue SubLHS = RHS.getOperand(0);
10517 SDValue SubRHS = RHS.getOperand(1);
10518
10519 // Check if it's 0 - X
10520 if (isNullConstant(SubLHS)) {
10521 bool CanUseAdd = false;
10522 if (IsSigned) {
10523 // For SCMP: only if X is known to never be INT_MIN (to avoid overflow)
10524 if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS)
10526 .isMinSignedValue()) {
10527 CanUseAdd = true;
10528 }
10529 } else {
10530 // For UCMP: only if X is known to never be zero
10531 if (DAG.isKnownNeverZero(SubRHS)) {
10532 CanUseAdd = true;
10533 }
10534 }
10535
10536 if (CanUseAdd) {
10537 Opcode = ARMISD::ADDC;
10538 RHS = SubRHS; // Replace RHS with X, so we do LHS + X instead of
10539 // LHS - (0 - X)
10540 }
10541 }
10542 }
10543
10544 // Generate the operation with flags
10545 SDValue OpWithFlags =
10546 DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
10547
10548 SDValue OpResult = OpWithFlags.getValue(0);
10549 SDValue Flags = OpWithFlags.getValue(1);
10550
10551 // Constants for conditional moves
10552 SDValue One = DAG.getConstant(1, dl, MVT::i32);
10553 SDValue MinusOne = DAG.getAllOnesConstant(dl, MVT::i32);
10554
10555 // Select condition codes based on signed vs unsigned
10556 ARMCC::CondCodes GTCond = IsSigned ? ARMCC::GT : ARMCC::HI;
10557 ARMCC::CondCodes LTCond = IsSigned ? ARMCC::LT : ARMCC::LO;
10558
10559 // First conditional move: if greater than, set to 1
10560 SDValue GTCondValue = DAG.getConstant(GTCond, dl, MVT::i32);
10561 SDValue Result1 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, OpResult, One,
10562 GTCondValue, Flags);
10563
10564 // Second conditional move: if less than, set to -1
10565 SDValue LTCondValue = DAG.getConstant(LTCond, dl, MVT::i32);
10566 SDValue Result2 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne,
10567 LTCondValue, Flags);
10568
10569 if (Op.getValueType() != MVT::i32)
10570 Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType());
10571
10572 return Result2;
10573}
10574
10576 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10577 switch (Op.getOpcode()) {
10578 default: llvm_unreachable("Don't know how to custom lower this!");
10579 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10580 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10581 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10582 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10583 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10584 case ISD::SELECT: return LowerSELECT(Op, DAG);
10585 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10586 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10587 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10588 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10589 case ISD::VASTART: return LowerVASTART(Op, DAG);
10590 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10591 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10592 case ISD::SINT_TO_FP:
10593 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10596 case ISD::FP_TO_SINT:
10597 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10599 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10600 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10601 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10602 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10603 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10604 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10605 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10606 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10607 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10608 Subtarget);
10609 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10610 case ISD::SHL:
10611 case ISD::SRL:
10612 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10613 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10614 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10615 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10616 case ISD::SRL_PARTS:
10617 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10618 case ISD::CTTZ:
10619 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10620 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10621 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10622 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10623 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10624 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10625 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10626 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10627 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10628 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10629 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10630 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10631 case ISD::SIGN_EXTEND:
10632 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10633 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10634 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10635 case ISD::SET_FPMODE:
10636 return LowerSET_FPMODE(Op, DAG);
10637 case ISD::RESET_FPMODE:
10638 return LowerRESET_FPMODE(Op, DAG);
10639 case ISD::MUL: return LowerMUL(Op, DAG);
10640 case ISD::SDIV:
10641 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10642 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10643 return LowerSDIV(Op, DAG, Subtarget);
10644 case ISD::UDIV:
10645 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10646 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10647 return LowerUDIV(Op, DAG, Subtarget);
10648 case ISD::UADDO_CARRY:
10649 case ISD::USUBO_CARRY:
10650 return LowerUADDSUBO_CARRY(Op, DAG);
10651 case ISD::SADDO:
10652 case ISD::SSUBO:
10653 return LowerSignedALUO(Op, DAG);
10654 case ISD::UADDO:
10655 case ISD::USUBO:
10656 return LowerUnsignedALUO(Op, DAG);
10657 case ISD::SADDSAT:
10658 case ISD::SSUBSAT:
10659 case ISD::UADDSAT:
10660 case ISD::USUBSAT:
10661 return LowerADDSUBSAT(Op, DAG, Subtarget);
10662 case ISD::LOAD:
10663 return LowerPredicateLoad(Op, DAG);
10664 case ISD::STORE:
10665 return LowerSTORE(Op, DAG, Subtarget);
10666 case ISD::MLOAD:
10667 return LowerMLOAD(Op, DAG);
10668 case ISD::VECREDUCE_MUL:
10669 case ISD::VECREDUCE_AND:
10670 case ISD::VECREDUCE_OR:
10671 case ISD::VECREDUCE_XOR:
10672 return LowerVecReduce(Op, DAG, Subtarget);
10673 case ISD::VECREDUCE_FADD:
10674 case ISD::VECREDUCE_FMUL:
10675 case ISD::VECREDUCE_FMIN:
10676 case ISD::VECREDUCE_FMAX:
10677 return LowerVecReduceF(Op, DAG, Subtarget);
10678 case ISD::VECREDUCE_UMIN:
10679 case ISD::VECREDUCE_UMAX:
10680 case ISD::VECREDUCE_SMIN:
10681 case ISD::VECREDUCE_SMAX:
10682 return LowerVecReduceMinMax(Op, DAG, Subtarget);
10683 case ISD::ATOMIC_LOAD:
10684 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10685 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10686 case ISD::SDIVREM:
10687 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10688 case ISD::DYNAMIC_STACKALLOC:
10689 if (Subtarget->isTargetWindows())
10690 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10691 llvm_unreachable("Don't know how to custom lower this!");
10693 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10695 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10696 case ISD::STRICT_FSETCC:
10697 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10698 case ISD::SPONENTRY:
10699 return LowerSPONENTRY(Op, DAG);
10700 case ISD::FP_TO_BF16:
10701 return LowerFP_TO_BF16(Op, DAG);
10702 case ARMISD::WIN__DBZCHK: return SDValue();
10703 case ISD::UCMP:
10704 case ISD::SCMP:
10705 return LowerCMP(Op, DAG);
10706 }
10707}
10708
10710 SelectionDAG &DAG) {
10711 unsigned IntNo = N->getConstantOperandVal(0);
10712 unsigned Opc = 0;
10713 if (IntNo == Intrinsic::arm_smlald)
10715 else if (IntNo == Intrinsic::arm_smlaldx)
10717 else if (IntNo == Intrinsic::arm_smlsld)
10719 else if (IntNo == Intrinsic::arm_smlsldx)
10721 else
10722 return;
10723
10724 SDLoc dl(N);
10725 SDValue Lo, Hi;
10726 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10727
10728 SDValue LongMul = DAG.getNode(Opc, dl,
10729 DAG.getVTList(MVT::i32, MVT::i32),
10730 N->getOperand(1), N->getOperand(2),
10731 Lo, Hi);
10732 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10733 LongMul.getValue(0), LongMul.getValue(1)));
10734}
10735
10736/// ReplaceNodeResults - Replace the results of node with an illegal result
10737/// type with new values built out of custom code.
10740 SelectionDAG &DAG) const {
10741 SDValue Res;
10742 switch (N->getOpcode()) {
10743 default:
10744 llvm_unreachable("Don't know how to custom expand this!");
10745 case ISD::READ_REGISTER:
10747 break;
10748 case ISD::BITCAST:
10749 Res = ExpandBITCAST(N, DAG, Subtarget);
10750 break;
10751 case ISD::SRL:
10752 case ISD::SRA:
10753 case ISD::SHL:
10754 Res = Expand64BitShift(N, DAG, Subtarget);
10755 break;
10756 case ISD::SREM:
10757 case ISD::UREM:
10758 Res = LowerREM(N, DAG);
10759 break;
10760 case ISD::SDIVREM:
10761 case ISD::UDIVREM:
10762 Res = LowerDivRem(SDValue(N, 0), DAG);
10763 assert(Res.getNumOperands() == 2 && "DivRem needs two values");
10764 Results.push_back(Res.getValue(0));
10765 Results.push_back(Res.getValue(1));
10766 return;
10767 case ISD::SADDSAT:
10768 case ISD::SSUBSAT:
10769 case ISD::UADDSAT:
10770 case ISD::USUBSAT:
10771 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10772 break;
10773 case ISD::READCYCLECOUNTER:
10774 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10775 return;
10776 case ISD::UDIV:
10777 case ISD::SDIV:
10778 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10779 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10780 Results);
10781 case ISD::ATOMIC_CMP_SWAP:
10783 return;
10785 return ReplaceLongIntrinsic(N, Results, DAG);
10786 case ISD::LOAD:
10787 LowerLOAD(N, Results, DAG);
10788 break;
10789 case ISD::TRUNCATE:
10790 Res = LowerTruncate(N, DAG, Subtarget);
10791 break;
10792 case ISD::SIGN_EXTEND:
10793 case ISD::ZERO_EXTEND:
10794 Res = LowerVectorExtend(N, DAG, Subtarget);
10795 break;
10798 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10799 break;
10800 }
10801 if (Res.getNode())
10802 Results.push_back(Res);
10803}
10804
10805//===----------------------------------------------------------------------===//
10806// ARM Scheduler Hooks
10807//===----------------------------------------------------------------------===//
10808
10809/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10810/// registers the function context.
10811void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10813 MachineBasicBlock *DispatchBB,
10814 int FI) const {
10815 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10816 "ROPI/RWPI not currently supported with SjLj");
10817 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10818 DebugLoc dl = MI.getDebugLoc();
10819 MachineFunction *MF = MBB->getParent();
10823 const Function &F = MF->getFunction();
10824
10825 bool isThumb = Subtarget->isThumb();
10826 bool isThumb2 = Subtarget->isThumb2();
10827
10828 unsigned PCLabelId = AFI->createPICLabelUId();
10829 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10831 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10832 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10833
10834 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10835 : &ARM::GPRRegClass;
10836
10837 // Grab constant pool and fixed stack memory operands.
10838 MachineMemOperand *CPMMO =
10841
10842 MachineMemOperand *FIMMOSt =
10845
10846 // Load the address of the dispatch MBB into the jump buffer.
10847 if (isThumb2) {
10848 // Incoming value: jbuf
10849 // ldr.n r5, LCPI1_1
10850 // orr r5, r5, #1
10851 // add r5, pc
10852 // str r5, [$jbuf, #+4] ; &jbuf[1]
10853 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10854 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10856 .addMemOperand(CPMMO)
10858 // Set the low bit because of thumb mode.
10859 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10860 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10861 .addReg(NewVReg1, RegState::Kill)
10862 .addImm(0x01)
10864 .add(condCodeOp());
10865 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10866 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10867 .addReg(NewVReg2, RegState::Kill)
10868 .addImm(PCLabelId);
10869 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10870 .addReg(NewVReg3, RegState::Kill)
10871 .addFrameIndex(FI)
10872 .addImm(36) // &jbuf[1] :: pc
10873 .addMemOperand(FIMMOSt)
10875 } else if (isThumb) {
10876 // Incoming value: jbuf
10877 // ldr.n r1, LCPI1_4
10878 // add r1, pc
10879 // mov r2, #1
10880 // orrs r1, r2
10881 // add r2, $jbuf, #+4 ; &jbuf[1]
10882 // str r1, [r2]
10883 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10884 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10886 .addMemOperand(CPMMO)
10888 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10889 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10890 .addReg(NewVReg1, RegState::Kill)
10891 .addImm(PCLabelId);
10892 // Set the low bit because of thumb mode.
10893 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10894 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10895 .addReg(ARM::CPSR, RegState::Define)
10896 .addImm(1)
10898 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10899 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10900 .addReg(ARM::CPSR, RegState::Define)
10901 .addReg(NewVReg2, RegState::Kill)
10902 .addReg(NewVReg3, RegState::Kill)
10904 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10905 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10906 .addFrameIndex(FI)
10907 .addImm(36); // &jbuf[1] :: pc
10908 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10909 .addReg(NewVReg4, RegState::Kill)
10910 .addReg(NewVReg5, RegState::Kill)
10911 .addImm(0)
10912 .addMemOperand(FIMMOSt)
10914 } else {
10915 // Incoming value: jbuf
10916 // ldr r1, LCPI1_1
10917 // add r1, pc, r1
10918 // str r1, [$jbuf, #+4] ; &jbuf[1]
10919 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10920 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10922 .addImm(0)
10923 .addMemOperand(CPMMO)
10925 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10926 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10927 .addReg(NewVReg1, RegState::Kill)
10928 .addImm(PCLabelId)
10930 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10931 .addReg(NewVReg2, RegState::Kill)
10932 .addFrameIndex(FI)
10933 .addImm(36) // &jbuf[1] :: pc
10934 .addMemOperand(FIMMOSt)
10936 }
10937}
10938
10939void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10940 MachineBasicBlock *MBB) const {
10941 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10942 DebugLoc dl = MI.getDebugLoc();
10943 MachineFunction *MF = MBB->getParent();
10944 MachineRegisterInfo *MRI = &MF->getRegInfo();
10945 MachineFrameInfo &MFI = MF->getFrameInfo();
10946 int FI = MFI.getFunctionContextIndex();
10947
10948 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10949 : &ARM::GPRnopcRegClass;
10950
10951 // Get a mapping of the call site numbers to all of the landing pads they're
10952 // associated with.
10953 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
10954 unsigned MaxCSNum = 0;
10955 for (MachineBasicBlock &BB : *MF) {
10956 if (!BB.isEHPad())
10957 continue;
10958
10959 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10960 // pad.
10961 for (MachineInstr &II : BB) {
10962 if (!II.isEHLabel())
10963 continue;
10964
10965 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10966 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10967
10968 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10969 for (unsigned Idx : CallSiteIdxs) {
10970 CallSiteNumToLPad[Idx].push_back(&BB);
10971 MaxCSNum = std::max(MaxCSNum, Idx);
10972 }
10973 break;
10974 }
10975 }
10976
10977 // Get an ordered list of the machine basic blocks for the jump table.
10978 std::vector<MachineBasicBlock*> LPadList;
10979 SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;
10980 LPadList.reserve(CallSiteNumToLPad.size());
10981 for (unsigned I = 1; I <= MaxCSNum; ++I) {
10982 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
10983 for (MachineBasicBlock *MBB : MBBList) {
10984 LPadList.push_back(MBB);
10985 InvokeBBs.insert_range(MBB->predecessors());
10986 }
10987 }
10988
10989 assert(!LPadList.empty() &&
10990 "No landing pad destinations for the dispatch jump table!");
10991
10992 // Create the jump table and associated information.
10993 MachineJumpTableInfo *JTI =
10994 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10995 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10996
10997 // Create the MBBs for the dispatch code.
10998
10999 // Shove the dispatch's address into the return slot in the function context.
11000 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
11001 DispatchBB->setIsEHPad();
11002
11003 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11004
11005 BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
11006 DispatchBB->addSuccessor(TrapBB);
11007
11008 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
11009 DispatchBB->addSuccessor(DispContBB);
11010
11011 // Insert and MBBs.
11012 MF->insert(MF->end(), DispatchBB);
11013 MF->insert(MF->end(), DispContBB);
11014 MF->insert(MF->end(), TrapBB);
11015
11016 // Insert code into the entry block that creates and registers the function
11017 // context.
11018 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
11019
11020 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
11023
11024 MachineInstrBuilder MIB;
11025 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
11026
11027 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
11028 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
11029
11030 // Add a register mask with no preserved registers. This results in all
11031 // registers being marked as clobbered. This can't work if the dispatch block
11032 // is in a Thumb1 function and is linked with ARM code which uses the FP
11033 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
11035
11036 bool IsPositionIndependent = isPositionIndependent();
11037 unsigned NumLPads = LPadList.size();
11038 if (Subtarget->isThumb2()) {
11039 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11040 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
11041 .addFrameIndex(FI)
11042 .addImm(4)
11043 .addMemOperand(FIMMOLd)
11045
11046 if (NumLPads < 256) {
11047 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
11048 .addReg(NewVReg1)
11049 .addImm(LPadList.size())
11051 } else {
11052 Register VReg1 = MRI->createVirtualRegister(TRC);
11053 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
11054 .addImm(NumLPads & 0xFFFF)
11056
11057 unsigned VReg2 = VReg1;
11058 if ((NumLPads & 0xFFFF0000) != 0) {
11059 VReg2 = MRI->createVirtualRegister(TRC);
11060 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
11061 .addReg(VReg1)
11062 .addImm(NumLPads >> 16)
11064 }
11065
11066 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
11067 .addReg(NewVReg1)
11068 .addReg(VReg2)
11070 }
11071
11072 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
11073 .addMBB(TrapBB)
11075 .addReg(ARM::CPSR);
11076
11077 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11078 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
11079 .addJumpTableIndex(MJTI)
11081
11082 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11083 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
11084 .addReg(NewVReg3, RegState::Kill)
11085 .addReg(NewVReg1)
11088 .add(condCodeOp());
11089
11090 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
11091 .addReg(NewVReg4, RegState::Kill)
11092 .addReg(NewVReg1)
11093 .addJumpTableIndex(MJTI);
11094 } else if (Subtarget->isThumb()) {
11095 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11096 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
11097 .addFrameIndex(FI)
11098 .addImm(1)
11099 .addMemOperand(FIMMOLd)
11101
11102 if (NumLPads < 256) {
11103 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11104 .addReg(NewVReg1)
11105 .addImm(NumLPads)
11107 } else {
11108 MachineConstantPool *ConstantPool = MF->getConstantPool();
11109 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11110 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11111
11112 // MachineConstantPool wants an explicit alignment.
11113 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11114 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11115
11116 Register VReg1 = MRI->createVirtualRegister(TRC);
11117 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11118 .addReg(VReg1, RegState::Define)
11121 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11122 .addReg(NewVReg1)
11123 .addReg(VReg1)
11125 }
11126
11127 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11128 .addMBB(TrapBB)
11130 .addReg(ARM::CPSR);
11131
11132 Register NewVReg2 = MRI->createVirtualRegister(TRC);
11133 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11134 .addReg(ARM::CPSR, RegState::Define)
11135 .addReg(NewVReg1)
11136 .addImm(2)
11138
11139 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11140 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11141 .addJumpTableIndex(MJTI)
11143
11144 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11145 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11146 .addReg(ARM::CPSR, RegState::Define)
11147 .addReg(NewVReg2, RegState::Kill)
11148 .addReg(NewVReg3)
11150
11151 MachineMemOperand *JTMMOLd =
11152 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11154
11155 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11156 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11157 .addReg(NewVReg4, RegState::Kill)
11158 .addImm(0)
11159 .addMemOperand(JTMMOLd)
11161
11162 unsigned NewVReg6 = NewVReg5;
11163 if (IsPositionIndependent) {
11164 NewVReg6 = MRI->createVirtualRegister(TRC);
11165 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11166 .addReg(ARM::CPSR, RegState::Define)
11167 .addReg(NewVReg5, RegState::Kill)
11168 .addReg(NewVReg3)
11170 }
11171
11172 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11173 .addReg(NewVReg6, RegState::Kill)
11174 .addJumpTableIndex(MJTI);
11175 } else {
11176 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11177 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11178 .addFrameIndex(FI)
11179 .addImm(4)
11180 .addMemOperand(FIMMOLd)
11182
11183 if (NumLPads < 256) {
11184 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11185 .addReg(NewVReg1)
11186 .addImm(NumLPads)
11188 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11189 Register VReg1 = MRI->createVirtualRegister(TRC);
11190 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11191 .addImm(NumLPads & 0xFFFF)
11193
11194 unsigned VReg2 = VReg1;
11195 if ((NumLPads & 0xFFFF0000) != 0) {
11196 VReg2 = MRI->createVirtualRegister(TRC);
11197 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11198 .addReg(VReg1)
11199 .addImm(NumLPads >> 16)
11201 }
11202
11203 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11204 .addReg(NewVReg1)
11205 .addReg(VReg2)
11207 } else {
11208 MachineConstantPool *ConstantPool = MF->getConstantPool();
11209 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11210 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11211
11212 // MachineConstantPool wants an explicit alignment.
11213 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11214 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11215
11216 Register VReg1 = MRI->createVirtualRegister(TRC);
11217 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11218 .addReg(VReg1, RegState::Define)
11220 .addImm(0)
11222 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11223 .addReg(NewVReg1)
11224 .addReg(VReg1, RegState::Kill)
11226 }
11227
11228 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11229 .addMBB(TrapBB)
11231 .addReg(ARM::CPSR);
11232
11233 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11234 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11235 .addReg(NewVReg1)
11238 .add(condCodeOp());
11239 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11240 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11241 .addJumpTableIndex(MJTI)
11243
11244 MachineMemOperand *JTMMOLd =
11245 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11247 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11248 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11249 .addReg(NewVReg3, RegState::Kill)
11250 .addReg(NewVReg4)
11251 .addImm(0)
11252 .addMemOperand(JTMMOLd)
11254
11255 if (IsPositionIndependent) {
11256 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11257 .addReg(NewVReg5, RegState::Kill)
11258 .addReg(NewVReg4)
11259 .addJumpTableIndex(MJTI);
11260 } else {
11261 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11262 .addReg(NewVReg5, RegState::Kill)
11263 .addJumpTableIndex(MJTI);
11264 }
11265 }
11266
11267 // Add the jump table entries as successors to the MBB.
11268 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
11269 for (MachineBasicBlock *CurMBB : LPadList) {
11270 if (SeenMBBs.insert(CurMBB).second)
11271 DispContBB->addSuccessor(CurMBB);
11272 }
11273
11274 // N.B. the order the invoke BBs are processed in doesn't matter here.
11275 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11277 for (MachineBasicBlock *BB : InvokeBBs) {
11278
11279 // Remove the landing pad successor from the invoke block and replace it
11280 // with the new dispatch block.
11281 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11282 while (!Successors.empty()) {
11283 MachineBasicBlock *SMBB = Successors.pop_back_val();
11284 if (SMBB->isEHPad()) {
11285 BB->removeSuccessor(SMBB);
11286 MBBLPads.push_back(SMBB);
11287 }
11288 }
11289
11290 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11291 BB->normalizeSuccProbs();
11292
11293 // Find the invoke call and mark all of the callee-saved registers as
11294 // 'implicit defined' so that they're spilled. This prevents code from
11295 // moving instructions to before the EH block, where they will never be
11296 // executed.
11298 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11299 if (!II->isCall()) continue;
11300
11301 DenseSet<unsigned> DefRegs;
11303 OI = II->operands_begin(), OE = II->operands_end();
11304 OI != OE; ++OI) {
11305 if (!OI->isReg()) continue;
11306 DefRegs.insert(OI->getReg());
11307 }
11308
11309 MachineInstrBuilder MIB(*MF, &*II);
11310
11311 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11312 unsigned Reg = SavedRegs[i];
11313 if (Subtarget->isThumb2() &&
11314 !ARM::tGPRRegClass.contains(Reg) &&
11315 !ARM::hGPRRegClass.contains(Reg))
11316 continue;
11317 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11318 continue;
11319 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11320 continue;
11321 if (!DefRegs.contains(Reg))
11323 }
11324
11325 break;
11326 }
11327 }
11328
11329 // Mark all former landing pads as non-landing pads. The dispatch is the only
11330 // landing pad now.
11331 for (MachineBasicBlock *MBBLPad : MBBLPads)
11332 MBBLPad->setIsEHPad(false);
11333
11334 // The instruction is gone now.
11335 MI.eraseFromParent();
11336}
11337
11338static
11340 for (MachineBasicBlock *S : MBB->successors())
11341 if (S != Succ)
11342 return S;
11343 llvm_unreachable("Expecting a BB with two successors!");
11344}
11345
11346/// Return the load opcode for a given load size. If load size >= 8,
11347/// neon opcode will be returned.
11348static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11349 if (LdSize >= 8)
11350 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11351 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11352 if (IsThumb1)
11353 return LdSize == 4 ? ARM::tLDRi
11354 : LdSize == 2 ? ARM::tLDRHi
11355 : LdSize == 1 ? ARM::tLDRBi : 0;
11356 if (IsThumb2)
11357 return LdSize == 4 ? ARM::t2LDR_POST
11358 : LdSize == 2 ? ARM::t2LDRH_POST
11359 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11360 return LdSize == 4 ? ARM::LDR_POST_IMM
11361 : LdSize == 2 ? ARM::LDRH_POST
11362 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11363}
11364
11365/// Return the store opcode for a given store size. If store size >= 8,
11366/// neon opcode will be returned.
11367static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11368 if (StSize >= 8)
11369 return StSize == 16 ? ARM::VST1q32wb_fixed
11370 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11371 if (IsThumb1)
11372 return StSize == 4 ? ARM::tSTRi
11373 : StSize == 2 ? ARM::tSTRHi
11374 : StSize == 1 ? ARM::tSTRBi : 0;
11375 if (IsThumb2)
11376 return StSize == 4 ? ARM::t2STR_POST
11377 : StSize == 2 ? ARM::t2STRH_POST
11378 : StSize == 1 ? ARM::t2STRB_POST : 0;
11379 return StSize == 4 ? ARM::STR_POST_IMM
11380 : StSize == 2 ? ARM::STRH_POST
11381 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11382}
11383
11384/// Emit a post-increment load operation with given size. The instructions
11385/// will be added to BB at Pos.
11387 const TargetInstrInfo *TII, const DebugLoc &dl,
11388 unsigned LdSize, unsigned Data, unsigned AddrIn,
11389 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11390 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11391 assert(LdOpc != 0 && "Should have a load opcode");
11392 if (LdSize >= 8) {
11393 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11394 .addReg(AddrOut, RegState::Define)
11395 .addReg(AddrIn)
11396 .addImm(0)
11398 } else if (IsThumb1) {
11399 // load + update AddrIn
11400 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11401 .addReg(AddrIn)
11402 .addImm(0)
11404 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11405 .add(t1CondCodeOp())
11406 .addReg(AddrIn)
11407 .addImm(LdSize)
11409 } else if (IsThumb2) {
11410 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11411 .addReg(AddrOut, RegState::Define)
11412 .addReg(AddrIn)
11413 .addImm(LdSize)
11415 } else { // arm
11416 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11417 .addReg(AddrOut, RegState::Define)
11418 .addReg(AddrIn)
11419 .addReg(0)
11420 .addImm(LdSize)
11422 }
11423}
11424
11425/// Emit a post-increment store operation with given size. The instructions
11426/// will be added to BB at Pos.
11428 const TargetInstrInfo *TII, const DebugLoc &dl,
11429 unsigned StSize, unsigned Data, unsigned AddrIn,
11430 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11431 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11432 assert(StOpc != 0 && "Should have a store opcode");
11433 if (StSize >= 8) {
11434 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11435 .addReg(AddrIn)
11436 .addImm(0)
11437 .addReg(Data)
11439 } else if (IsThumb1) {
11440 // store + update AddrIn
11441 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11442 .addReg(Data)
11443 .addReg(AddrIn)
11444 .addImm(0)
11446 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11447 .add(t1CondCodeOp())
11448 .addReg(AddrIn)
11449 .addImm(StSize)
11451 } else if (IsThumb2) {
11452 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11453 .addReg(Data)
11454 .addReg(AddrIn)
11455 .addImm(StSize)
11457 } else { // arm
11458 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11459 .addReg(Data)
11460 .addReg(AddrIn)
11461 .addReg(0)
11462 .addImm(StSize)
11464 }
11465}
11466
11468ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11469 MachineBasicBlock *BB) const {
11470 // This pseudo instruction has 3 operands: dst, src, size
11471 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11472 // Otherwise, we will generate unrolled scalar copies.
11473 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11474 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11476
11477 Register dest = MI.getOperand(0).getReg();
11478 Register src = MI.getOperand(1).getReg();
11479 unsigned SizeVal = MI.getOperand(2).getImm();
11480 unsigned Alignment = MI.getOperand(3).getImm();
11481 DebugLoc dl = MI.getDebugLoc();
11482
11483 MachineFunction *MF = BB->getParent();
11484 MachineRegisterInfo &MRI = MF->getRegInfo();
11485 unsigned UnitSize = 0;
11486 const TargetRegisterClass *TRC = nullptr;
11487 const TargetRegisterClass *VecTRC = nullptr;
11488
11489 bool IsThumb1 = Subtarget->isThumb1Only();
11490 bool IsThumb2 = Subtarget->isThumb2();
11491 bool IsThumb = Subtarget->isThumb();
11492
11493 if (Alignment & 1) {
11494 UnitSize = 1;
11495 } else if (Alignment & 2) {
11496 UnitSize = 2;
11497 } else {
11498 // Check whether we can use NEON instructions.
11499 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11500 Subtarget->hasNEON()) {
11501 if ((Alignment % 16 == 0) && SizeVal >= 16)
11502 UnitSize = 16;
11503 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11504 UnitSize = 8;
11505 }
11506 // Can't use NEON instructions.
11507 if (UnitSize == 0)
11508 UnitSize = 4;
11509 }
11510
11511 // Select the correct opcode and register class for unit size load/store
11512 bool IsNeon = UnitSize >= 8;
11513 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11514 if (IsNeon)
11515 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11516 : UnitSize == 8 ? &ARM::DPRRegClass
11517 : nullptr;
11518
11519 unsigned BytesLeft = SizeVal % UnitSize;
11520 unsigned LoopSize = SizeVal - BytesLeft;
11521
11522 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11523 // Use LDR and STR to copy.
11524 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11525 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11526 unsigned srcIn = src;
11527 unsigned destIn = dest;
11528 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11529 Register srcOut = MRI.createVirtualRegister(TRC);
11530 Register destOut = MRI.createVirtualRegister(TRC);
11531 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11532 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11533 IsThumb1, IsThumb2);
11534 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11535 IsThumb1, IsThumb2);
11536 srcIn = srcOut;
11537 destIn = destOut;
11538 }
11539
11540 // Handle the leftover bytes with LDRB and STRB.
11541 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11542 // [destOut] = STRB_POST(scratch, destIn, 1)
11543 for (unsigned i = 0; i < BytesLeft; i++) {
11544 Register srcOut = MRI.createVirtualRegister(TRC);
11545 Register destOut = MRI.createVirtualRegister(TRC);
11546 Register scratch = MRI.createVirtualRegister(TRC);
11547 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11548 IsThumb1, IsThumb2);
11549 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11550 IsThumb1, IsThumb2);
11551 srcIn = srcOut;
11552 destIn = destOut;
11553 }
11554 MI.eraseFromParent(); // The instruction is gone now.
11555 return BB;
11556 }
11557
11558 // Expand the pseudo op to a loop.
11559 // thisMBB:
11560 // ...
11561 // movw varEnd, # --> with thumb2
11562 // movt varEnd, #
11563 // ldrcp varEnd, idx --> without thumb2
11564 // fallthrough --> loopMBB
11565 // loopMBB:
11566 // PHI varPhi, varEnd, varLoop
11567 // PHI srcPhi, src, srcLoop
11568 // PHI destPhi, dst, destLoop
11569 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11570 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11571 // subs varLoop, varPhi, #UnitSize
11572 // bne loopMBB
11573 // fallthrough --> exitMBB
11574 // exitMBB:
11575 // epilogue to handle left-over bytes
11576 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11577 // [destOut] = STRB_POST(scratch, destLoop, 1)
11578 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11579 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11580 MF->insert(It, loopMBB);
11581 MF->insert(It, exitMBB);
11582
11583 // Set the call frame size on entry to the new basic blocks.
11584 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11585 loopMBB->setCallFrameSize(CallFrameSize);
11586 exitMBB->setCallFrameSize(CallFrameSize);
11587
11588 // Transfer the remainder of BB and its successor edges to exitMBB.
11589 exitMBB->splice(exitMBB->begin(), BB,
11590 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11592
11593 // Load an immediate to varEnd.
11594 Register varEnd = MRI.createVirtualRegister(TRC);
11595 if (Subtarget->useMovt()) {
11596 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11597 varEnd)
11598 .addImm(LoopSize);
11599 } else if (Subtarget->genExecuteOnly()) {
11600 assert(IsThumb && "Non-thumb expected to have used movt");
11601 BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11602 } else {
11603 MachineConstantPool *ConstantPool = MF->getConstantPool();
11605 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11606
11607 // MachineConstantPool wants an explicit alignment.
11608 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11609 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11610 MachineMemOperand *CPMMO =
11613
11614 if (IsThumb)
11615 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11616 .addReg(varEnd, RegState::Define)
11619 .addMemOperand(CPMMO);
11620 else
11621 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11622 .addReg(varEnd, RegState::Define)
11624 .addImm(0)
11626 .addMemOperand(CPMMO);
11627 }
11628 BB->addSuccessor(loopMBB);
11629
11630 // Generate the loop body:
11631 // varPhi = PHI(varLoop, varEnd)
11632 // srcPhi = PHI(srcLoop, src)
11633 // destPhi = PHI(destLoop, dst)
11634 MachineBasicBlock *entryBB = BB;
11635 BB = loopMBB;
11636 Register varLoop = MRI.createVirtualRegister(TRC);
11637 Register varPhi = MRI.createVirtualRegister(TRC);
11638 Register srcLoop = MRI.createVirtualRegister(TRC);
11639 Register srcPhi = MRI.createVirtualRegister(TRC);
11640 Register destLoop = MRI.createVirtualRegister(TRC);
11641 Register destPhi = MRI.createVirtualRegister(TRC);
11642
11643 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11644 .addReg(varLoop).addMBB(loopMBB)
11645 .addReg(varEnd).addMBB(entryBB);
11646 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11647 .addReg(srcLoop).addMBB(loopMBB)
11648 .addReg(src).addMBB(entryBB);
11649 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11650 .addReg(destLoop).addMBB(loopMBB)
11651 .addReg(dest).addMBB(entryBB);
11652
11653 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11654 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11655 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11656 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11657 IsThumb1, IsThumb2);
11658 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11659 IsThumb1, IsThumb2);
11660
11661 // Decrement loop variable by UnitSize.
11662 if (IsThumb1) {
11663 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11664 .add(t1CondCodeOp())
11665 .addReg(varPhi)
11666 .addImm(UnitSize)
11668 } else {
11669 MachineInstrBuilder MIB =
11670 BuildMI(*BB, BB->end(), dl,
11671 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11672 MIB.addReg(varPhi)
11673 .addImm(UnitSize)
11675 .add(condCodeOp());
11676 MIB->getOperand(5).setReg(ARM::CPSR);
11677 MIB->getOperand(5).setIsDef(true);
11678 }
11679 BuildMI(*BB, BB->end(), dl,
11680 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11681 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11682
11683 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11684 BB->addSuccessor(loopMBB);
11685 BB->addSuccessor(exitMBB);
11686
11687 // Add epilogue to handle BytesLeft.
11688 BB = exitMBB;
11689 auto StartOfExit = exitMBB->begin();
11690
11691 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11692 // [destOut] = STRB_POST(scratch, destLoop, 1)
11693 unsigned srcIn = srcLoop;
11694 unsigned destIn = destLoop;
11695 for (unsigned i = 0; i < BytesLeft; i++) {
11696 Register srcOut = MRI.createVirtualRegister(TRC);
11697 Register destOut = MRI.createVirtualRegister(TRC);
11698 Register scratch = MRI.createVirtualRegister(TRC);
11699 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11700 IsThumb1, IsThumb2);
11701 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11702 IsThumb1, IsThumb2);
11703 srcIn = srcOut;
11704 destIn = destOut;
11705 }
11706
11707 MI.eraseFromParent(); // The instruction is gone now.
11708 return BB;
11709}
11710
11712ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11713 MachineBasicBlock *MBB) const {
11714 const TargetMachine &TM = getTargetMachine();
11715 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11716 DebugLoc DL = MI.getDebugLoc();
11717
11718 assert(Subtarget->isTargetWindows() &&
11719 "__chkstk is only supported on Windows");
11720 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11721
11722 // __chkstk takes the number of words to allocate on the stack in R4, and
11723 // returns the stack adjustment in number of bytes in R4. This will not
11724 // clober any other registers (other than the obvious lr).
11725 //
11726 // Although, technically, IP should be considered a register which may be
11727 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11728 // thumb-2 environment, so there is no interworking required. As a result, we
11729 // do not expect a veneer to be emitted by the linker, clobbering IP.
11730 //
11731 // Each module receives its own copy of __chkstk, so no import thunk is
11732 // required, again, ensuring that IP is not clobbered.
11733 //
11734 // Finally, although some linkers may theoretically provide a trampoline for
11735 // out of range calls (which is quite common due to a 32M range limitation of
11736 // branches for Thumb), we can generate the long-call version via
11737 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11738 // IP.
11739
11740 switch (TM.getCodeModel()) {
11741 case CodeModel::Tiny:
11742 llvm_unreachable("Tiny code model not available on ARM.");
11743 case CodeModel::Small:
11744 case CodeModel::Medium:
11745 case CodeModel::Kernel:
11746 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11748 .addExternalSymbol("__chkstk")
11751 .addReg(ARM::R12,
11753 .addReg(ARM::CPSR,
11755 break;
11756 case CodeModel::Large: {
11757 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
11758 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11759
11760 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11761 .addExternalSymbol("__chkstk");
11767 .addReg(ARM::R12,
11769 .addReg(ARM::CPSR,
11771 break;
11772 }
11773 }
11774
11775 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11776 .addReg(ARM::SP, RegState::Kill)
11777 .addReg(ARM::R4, RegState::Kill)
11780 .add(condCodeOp());
11781
11782 MI.eraseFromParent();
11783 return MBB;
11784}
11785
11787ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11788 MachineBasicBlock *MBB) const {
11789 DebugLoc DL = MI.getDebugLoc();
11790 MachineFunction *MF = MBB->getParent();
11791 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11792
11793 MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
11794 MF->insert(++MBB->getIterator(), ContBB);
11795 ContBB->splice(ContBB->begin(), MBB,
11796 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11798 MBB->addSuccessor(ContBB);
11799
11800 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11801 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11802 MF->push_back(TrapBB);
11803 MBB->addSuccessor(TrapBB);
11804
11805 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11806 .addReg(MI.getOperand(0).getReg())
11807 .addImm(0)
11809 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11810 .addMBB(TrapBB)
11812 .addReg(ARM::CPSR);
11813
11814 MI.eraseFromParent();
11815 return ContBB;
11816}
11817
11818// The CPSR operand of SelectItr might be missing a kill marker
11819// because there were multiple uses of CPSR, and ISel didn't know
11820// which to mark. Figure out whether SelectItr should have had a
11821// kill marker, and set it if it should. Returns the correct kill
11822// marker value.
11825 const TargetRegisterInfo* TRI) {
11826 // Scan forward through BB for a use/def of CPSR.
11827 MachineBasicBlock::iterator miI(std::next(SelectItr));
11828 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11829 const MachineInstr& mi = *miI;
11830 if (mi.readsRegister(ARM::CPSR, /*TRI=*/nullptr))
11831 return false;
11832 if (mi.definesRegister(ARM::CPSR, /*TRI=*/nullptr))
11833 break; // Should have kill-flag - update below.
11834 }
11835
11836 // If we hit the end of the block, check whether CPSR is live into a
11837 // successor.
11838 if (miI == BB->end()) {
11839 for (MachineBasicBlock *Succ : BB->successors())
11840 if (Succ->isLiveIn(ARM::CPSR))
11841 return false;
11842 }
11843
11844 // We found a def, or hit the end of the basic block and CPSR wasn't live
11845 // out. SelectMI should have a kill flag on CPSR.
11846 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11847 return true;
11848}
11849
11850/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11851/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11853 MachineBasicBlock *TpLoopBody,
11854 MachineBasicBlock *TpExit, Register OpSizeReg,
11855 const TargetInstrInfo *TII, DebugLoc Dl,
11857 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11858 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11859 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11860 .addUse(OpSizeReg)
11861 .addImm(15)
11863 .addReg(0);
11864
11865 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11866 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11867 .addUse(AddDestReg, RegState::Kill)
11868 .addImm(4)
11870 .addReg(0);
11871
11872 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11873 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11874 .addUse(LsrDestReg, RegState::Kill);
11875
11876 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11877 .addUse(TotalIterationsReg)
11878 .addMBB(TpExit);
11879
11880 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11881 .addMBB(TpLoopBody)
11883
11884 return TotalIterationsReg;
11885}
11886
11887/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11888/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11889/// loops.
11890static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11891 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11892 const TargetInstrInfo *TII, DebugLoc Dl,
11893 MachineRegisterInfo &MRI, Register OpSrcReg,
11894 Register OpDestReg, Register ElementCountReg,
11895 Register TotalIterationsReg, bool IsMemcpy) {
11896 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11897 // array, loop iteration counter, predication counter.
11898
11899 Register SrcPhiReg, CurrSrcReg;
11900 if (IsMemcpy) {
11901 // Current position in the src array
11902 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11903 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11904 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11905 .addUse(OpSrcReg)
11906 .addMBB(TpEntry)
11907 .addUse(CurrSrcReg)
11908 .addMBB(TpLoopBody);
11909 }
11910
11911 // Current position in the dest array
11912 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11913 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11914 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11915 .addUse(OpDestReg)
11916 .addMBB(TpEntry)
11917 .addUse(CurrDestReg)
11918 .addMBB(TpLoopBody);
11919
11920 // Current loop counter
11921 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11922 Register RemainingLoopIterationsReg =
11923 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11924 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11925 .addUse(TotalIterationsReg)
11926 .addMBB(TpEntry)
11927 .addUse(RemainingLoopIterationsReg)
11928 .addMBB(TpLoopBody);
11929
11930 // Predication counter
11931 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11932 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11933 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11934 .addUse(ElementCountReg)
11935 .addMBB(TpEntry)
11936 .addUse(RemainingElementsReg)
11937 .addMBB(TpLoopBody);
11938
11939 // Pass predication counter to VCTP
11940 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11941 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11942 .addUse(PredCounterPhiReg)
11944 .addReg(0)
11945 .addReg(0);
11946
11947 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11948 .addUse(PredCounterPhiReg)
11949 .addImm(16)
11951 .addReg(0);
11952
11953 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11954 Register SrcValueReg;
11955 if (IsMemcpy) {
11956 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11957 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11958 .addDef(CurrSrcReg)
11959 .addDef(SrcValueReg)
11960 .addReg(SrcPhiReg)
11961 .addImm(16)
11963 .addUse(VccrReg)
11964 .addReg(0);
11965 } else
11966 SrcValueReg = OpSrcReg;
11967
11968 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11969 .addDef(CurrDestReg)
11970 .addUse(SrcValueReg)
11971 .addReg(DestPhiReg)
11972 .addImm(16)
11974 .addUse(VccrReg)
11975 .addReg(0);
11976
11977 // Add the pseudoInstrs for decrementing the loop counter and marking the
11978 // end:t2DoLoopDec and t2DoLoopEnd
11979 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11980 .addUse(LoopCounterPhiReg)
11981 .addImm(1);
11982
11983 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11984 .addUse(RemainingLoopIterationsReg)
11985 .addMBB(TpLoopBody);
11986
11987 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11988 .addMBB(TpExit)
11990}
11991
11994 MachineBasicBlock *BB) const {
11995 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11996 DebugLoc dl = MI.getDebugLoc();
11997 bool isThumb2 = Subtarget->isThumb2();
11998 switch (MI.getOpcode()) {
11999 default: {
12000 MI.print(errs());
12001 llvm_unreachable("Unexpected instr type to insert");
12002 }
12003
12004 // Thumb1 post-indexed loads are really just single-register LDMs.
12005 case ARM::tLDR_postidx: {
12006 MachineOperand Def(MI.getOperand(1));
12007 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
12008 .add(Def) // Rn_wb
12009 .add(MI.getOperand(2)) // Rn
12010 .add(MI.getOperand(3)) // PredImm
12011 .add(MI.getOperand(4)) // PredReg
12012 .add(MI.getOperand(0)) // Rt
12013 .cloneMemRefs(MI);
12014 MI.eraseFromParent();
12015 return BB;
12016 }
12017
12018 case ARM::MVE_MEMCPYLOOPINST:
12019 case ARM::MVE_MEMSETLOOPINST: {
12020
12021 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
12022 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
12023 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
12024 // adds the relevant instructions in the TP loop Body for generation of a
12025 // WLSTP loop.
12026
12027 // Below is relevant portion of the CFG after the transformation.
12028 // The Machine Basic Blocks are shown along with branch conditions (in
12029 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
12030 // portion of the CFG and may not necessarily be the entry/exit of the
12031 // function.
12032
12033 // (Relevant) CFG after transformation:
12034 // TP entry MBB
12035 // |
12036 // |-----------------|
12037 // (n <= 0) (n > 0)
12038 // | |
12039 // | TP loop Body MBB<--|
12040 // | | |
12041 // \ |___________|
12042 // \ /
12043 // TP exit MBB
12044
12045 MachineFunction *MF = BB->getParent();
12046 MachineFunctionProperties &Properties = MF->getProperties();
12048
12049 Register OpDestReg = MI.getOperand(0).getReg();
12050 Register OpSrcReg = MI.getOperand(1).getReg();
12051 Register OpSizeReg = MI.getOperand(2).getReg();
12052
12053 // Allocate the required MBBs and add to parent function.
12054 MachineBasicBlock *TpEntry = BB;
12055 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
12056 MachineBasicBlock *TpExit;
12057
12058 MF->push_back(TpLoopBody);
12059
12060 // If any instructions are present in the current block after
12061 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
12062 // move the instructions into the newly created exit block. If there are no
12063 // instructions add an explicit branch to the FallThrough block and then
12064 // split.
12065 //
12066 // The split is required for two reasons:
12067 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
12068 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
12069 // need to be updated. splitAt() already handles this.
12070 TpExit = BB->splitAt(MI, false);
12071 if (TpExit == BB) {
12072 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
12073 "block containing memcpy/memset Pseudo");
12074 TpExit = BB->getFallThrough();
12075 BuildMI(BB, dl, TII->get(ARM::t2B))
12076 .addMBB(TpExit)
12078 TpExit = BB->splitAt(MI, false);
12079 }
12080
12081 // Add logic for iteration count
12082 Register TotalIterationsReg =
12083 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
12084
12085 // Add the vectorized (and predicated) loads/store instructions
12086 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
12087 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
12088 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
12089
12090 // Required to avoid conflict with the MachineVerifier during testing.
12091 Properties.resetNoPHIs();
12092
12093 // Connect the blocks
12094 TpEntry->addSuccessor(TpLoopBody);
12095 TpLoopBody->addSuccessor(TpLoopBody);
12096 TpLoopBody->addSuccessor(TpExit);
12097
12098 // Reorder for a more natural layout
12099 TpLoopBody->moveAfter(TpEntry);
12100 TpExit->moveAfter(TpLoopBody);
12101
12102 // Finally, remove the memcpy Pseudo Instruction
12103 MI.eraseFromParent();
12104
12105 // Return the exit block as it may contain other instructions requiring a
12106 // custom inserter
12107 return TpExit;
12108 }
12109
12110 // The Thumb2 pre-indexed stores have the same MI operands, they just
12111 // define them differently in the .td files from the isel patterns, so
12112 // they need pseudos.
12113 case ARM::t2STR_preidx:
12114 MI.setDesc(TII->get(ARM::t2STR_PRE));
12115 return BB;
12116 case ARM::t2STRB_preidx:
12117 MI.setDesc(TII->get(ARM::t2STRB_PRE));
12118 return BB;
12119 case ARM::t2STRH_preidx:
12120 MI.setDesc(TII->get(ARM::t2STRH_PRE));
12121 return BB;
12122
12123 case ARM::STRi_preidx:
12124 case ARM::STRBi_preidx: {
12125 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
12126 : ARM::STRB_PRE_IMM;
12127 // Decode the offset.
12128 unsigned Offset = MI.getOperand(4).getImm();
12129 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
12131 if (isSub)
12132 Offset = -Offset;
12133
12134 MachineMemOperand *MMO = *MI.memoperands_begin();
12135 BuildMI(*BB, MI, dl, TII->get(NewOpc))
12136 .add(MI.getOperand(0)) // Rn_wb
12137 .add(MI.getOperand(1)) // Rt
12138 .add(MI.getOperand(2)) // Rn
12139 .addImm(Offset) // offset (skip GPR==zero_reg)
12140 .add(MI.getOperand(5)) // pred
12141 .add(MI.getOperand(6))
12142 .addMemOperand(MMO);
12143 MI.eraseFromParent();
12144 return BB;
12145 }
12146 case ARM::STRr_preidx:
12147 case ARM::STRBr_preidx:
12148 case ARM::STRH_preidx: {
12149 unsigned NewOpc;
12150 switch (MI.getOpcode()) {
12151 default: llvm_unreachable("unexpected opcode!");
12152 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
12153 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
12154 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
12155 }
12156 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12157 for (const MachineOperand &MO : MI.operands())
12158 MIB.add(MO);
12159 MI.eraseFromParent();
12160 return BB;
12161 }
12162
12163 case ARM::tMOVCCr_pseudo: {
12164 // To "insert" a SELECT_CC instruction, we actually have to insert the
12165 // diamond control-flow pattern. The incoming instruction knows the
12166 // destination vreg to set, the condition code register to branch on, the
12167 // true/false values to select between, and a branch opcode to use.
12168 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12170
12171 // thisMBB:
12172 // ...
12173 // TrueVal = ...
12174 // cmpTY ccX, r1, r2
12175 // bCC copy1MBB
12176 // fallthrough --> copy0MBB
12177 MachineBasicBlock *thisMBB = BB;
12178 MachineFunction *F = BB->getParent();
12179 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12180 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12181 F->insert(It, copy0MBB);
12182 F->insert(It, sinkMBB);
12183
12184 // Set the call frame size on entry to the new basic blocks.
12185 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12186 copy0MBB->setCallFrameSize(CallFrameSize);
12187 sinkMBB->setCallFrameSize(CallFrameSize);
12188
12189 // Check whether CPSR is live past the tMOVCCr_pseudo.
12190 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12191 if (!MI.killsRegister(ARM::CPSR, /*TRI=*/nullptr) &&
12192 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12193 copy0MBB->addLiveIn(ARM::CPSR);
12194 sinkMBB->addLiveIn(ARM::CPSR);
12195 }
12196
12197 // Transfer the remainder of BB and its successor edges to sinkMBB.
12198 sinkMBB->splice(sinkMBB->begin(), BB,
12199 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12201
12202 BB->addSuccessor(copy0MBB);
12203 BB->addSuccessor(sinkMBB);
12204
12205 BuildMI(BB, dl, TII->get(ARM::tBcc))
12206 .addMBB(sinkMBB)
12207 .addImm(MI.getOperand(3).getImm())
12208 .addReg(MI.getOperand(4).getReg());
12209
12210 // copy0MBB:
12211 // %FalseValue = ...
12212 // # fallthrough to sinkMBB
12213 BB = copy0MBB;
12214
12215 // Update machine-CFG edges
12216 BB->addSuccessor(sinkMBB);
12217
12218 // sinkMBB:
12219 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12220 // ...
12221 BB = sinkMBB;
12222 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12223 .addReg(MI.getOperand(1).getReg())
12224 .addMBB(copy0MBB)
12225 .addReg(MI.getOperand(2).getReg())
12226 .addMBB(thisMBB);
12227
12228 MI.eraseFromParent(); // The pseudo instruction is gone now.
12229 return BB;
12230 }
12231
12232 case ARM::BCCi64:
12233 case ARM::BCCZi64: {
12234 // If there is an unconditional branch to the other successor, remove it.
12235 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12236
12237 // Compare both parts that make up the double comparison separately for
12238 // equality.
12239 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12240
12241 Register LHS1 = MI.getOperand(1).getReg();
12242 Register LHS2 = MI.getOperand(2).getReg();
12243 if (RHSisZero) {
12244 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12245 .addReg(LHS1)
12246 .addImm(0)
12248 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12249 .addReg(LHS2).addImm(0)
12250 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12251 } else {
12252 Register RHS1 = MI.getOperand(3).getReg();
12253 Register RHS2 = MI.getOperand(4).getReg();
12254 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12255 .addReg(LHS1)
12256 .addReg(RHS1)
12258 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12259 .addReg(LHS2).addReg(RHS2)
12260 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12261 }
12262
12263 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12264 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12265 if (MI.getOperand(0).getImm() == ARMCC::NE)
12266 std::swap(destMBB, exitMBB);
12267
12268 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12269 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12270 if (isThumb2)
12271 BuildMI(BB, dl, TII->get(ARM::t2B))
12272 .addMBB(exitMBB)
12274 else
12275 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12276
12277 MI.eraseFromParent(); // The pseudo instruction is gone now.
12278 return BB;
12279 }
12280
12281 case ARM::Int_eh_sjlj_setjmp:
12282 case ARM::Int_eh_sjlj_setjmp_nofp:
12283 case ARM::tInt_eh_sjlj_setjmp:
12284 case ARM::t2Int_eh_sjlj_setjmp:
12285 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12286 return BB;
12287
12288 case ARM::Int_eh_sjlj_setup_dispatch:
12289 EmitSjLjDispatchBlock(MI, BB);
12290 return BB;
12291
12292 case ARM::ABS:
12293 case ARM::t2ABS: {
12294 // To insert an ABS instruction, we have to insert the
12295 // diamond control-flow pattern. The incoming instruction knows the
12296 // source vreg to test against 0, the destination vreg to set,
12297 // the condition code register to branch on, the
12298 // true/false values to select between, and a branch opcode to use.
12299 // It transforms
12300 // V1 = ABS V0
12301 // into
12302 // V2 = MOVS V0
12303 // BCC (branch to SinkBB if V0 >= 0)
12304 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12305 // SinkBB: V1 = PHI(V2, V3)
12306 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12308 MachineFunction *Fn = BB->getParent();
12309 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12310 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12311 Fn->insert(BBI, RSBBB);
12312 Fn->insert(BBI, SinkBB);
12313
12314 // Set the call frame size on entry to the new basic blocks.
12315 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12316 RSBBB->setCallFrameSize(CallFrameSize);
12317 SinkBB->setCallFrameSize(CallFrameSize);
12318
12319 Register ABSSrcReg = MI.getOperand(1).getReg();
12320 Register ABSDstReg = MI.getOperand(0).getReg();
12321 bool ABSSrcKIll = MI.getOperand(1).isKill();
12322 bool isThumb2 = Subtarget->isThumb2();
12324 // In Thumb mode S must not be specified if source register is the SP or
12325 // PC and if destination register is the SP, so restrict register class
12326 Register NewRsbDstReg = MRI.createVirtualRegister(
12327 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12328
12329 // Transfer the remainder of BB and its successor edges to sinkMBB.
12330 SinkBB->splice(SinkBB->begin(), BB,
12331 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12333
12334 BB->addSuccessor(RSBBB);
12335 BB->addSuccessor(SinkBB);
12336
12337 // fall through to SinkMBB
12338 RSBBB->addSuccessor(SinkBB);
12339
12340 // insert a cmp at the end of BB
12341 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12342 .addReg(ABSSrcReg)
12343 .addImm(0)
12345
12346 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12347 BuildMI(BB, dl,
12348 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12350
12351 // insert rsbri in RSBBB
12352 // Note: BCC and rsbri will be converted into predicated rsbmi
12353 // by if-conversion pass
12354 BuildMI(*RSBBB, RSBBB->begin(), dl,
12355 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12356 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12357 .addImm(0)
12359 .add(condCodeOp());
12360
12361 // insert PHI in SinkBB,
12362 // reuse ABSDstReg to not change uses of ABS instruction
12363 BuildMI(*SinkBB, SinkBB->begin(), dl,
12364 TII->get(ARM::PHI), ABSDstReg)
12365 .addReg(NewRsbDstReg).addMBB(RSBBB)
12366 .addReg(ABSSrcReg).addMBB(BB);
12367
12368 // remove ABS instruction
12369 MI.eraseFromParent();
12370
12371 // return last added BB
12372 return SinkBB;
12373 }
12374 case ARM::COPY_STRUCT_BYVAL_I32:
12375 ++NumLoopByVals;
12376 return EmitStructByval(MI, BB);
12377 case ARM::WIN__CHKSTK:
12378 return EmitLowered__chkstk(MI, BB);
12379 case ARM::WIN__DBZCHK:
12380 return EmitLowered__dbzchk(MI, BB);
12381 }
12382}
12383
12384/// Attaches vregs to MEMCPY that it will use as scratch registers
12385/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12386/// instead of as a custom inserter because we need the use list from the SDNode.
12387static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12388 MachineInstr &MI, const SDNode *Node) {
12389 bool isThumb1 = Subtarget->isThumb1Only();
12390
12391 MachineFunction *MF = MI.getParent()->getParent();
12393 MachineInstrBuilder MIB(*MF, MI);
12394
12395 // If the new dst/src is unused mark it as dead.
12396 if (!Node->hasAnyUseOfValue(0)) {
12397 MI.getOperand(0).setIsDead(true);
12398 }
12399 if (!Node->hasAnyUseOfValue(1)) {
12400 MI.getOperand(1).setIsDead(true);
12401 }
12402
12403 // The MEMCPY both defines and kills the scratch registers.
12404 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12405 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12406 : &ARM::GPRRegClass);
12408 }
12409}
12410
12412 SDNode *Node) const {
12413 if (MI.getOpcode() == ARM::MEMCPY) {
12414 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12415 return;
12416 }
12417
12418 const MCInstrDesc *MCID = &MI.getDesc();
12419 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12420 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12421 // operand is still set to noreg. If needed, set the optional operand's
12422 // register to CPSR, and remove the redundant implicit def.
12423 //
12424 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12425
12426 // Rename pseudo opcodes.
12427 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12428 unsigned ccOutIdx;
12429 if (NewOpc) {
12430 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12431 MCID = &TII->get(NewOpc);
12432
12433 assert(MCID->getNumOperands() ==
12434 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12435 && "converted opcode should be the same except for cc_out"
12436 " (and, on Thumb1, pred)");
12437
12438 MI.setDesc(*MCID);
12439
12440 // Add the optional cc_out operand
12441 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12442
12443 // On Thumb1, move all input operands to the end, then add the predicate
12444 if (Subtarget->isThumb1Only()) {
12445 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12446 MI.addOperand(MI.getOperand(1));
12447 MI.removeOperand(1);
12448 }
12449
12450 // Restore the ties
12451 for (unsigned i = MI.getNumOperands(); i--;) {
12452 const MachineOperand& op = MI.getOperand(i);
12453 if (op.isReg() && op.isUse()) {
12454 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12455 if (DefIdx != -1)
12456 MI.tieOperands(DefIdx, i);
12457 }
12458 }
12459
12461 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12462 ccOutIdx = 1;
12463 } else
12464 ccOutIdx = MCID->getNumOperands() - 1;
12465 } else
12466 ccOutIdx = MCID->getNumOperands() - 1;
12467
12468 // Any ARM instruction that sets the 's' bit should specify an optional
12469 // "cc_out" operand in the last operand position.
12470 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12471 assert(!NewOpc && "Optional cc_out operand required");
12472 return;
12473 }
12474 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12475 // since we already have an optional CPSR def.
12476 bool definesCPSR = false;
12477 bool deadCPSR = false;
12478 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12479 ++i) {
12480 const MachineOperand &MO = MI.getOperand(i);
12481 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12482 definesCPSR = true;
12483 if (MO.isDead())
12484 deadCPSR = true;
12485 MI.removeOperand(i);
12486 break;
12487 }
12488 }
12489 if (!definesCPSR) {
12490 assert(!NewOpc && "Optional cc_out operand required");
12491 return;
12492 }
12493 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12494 if (deadCPSR) {
12495 assert(!MI.getOperand(ccOutIdx).getReg() &&
12496 "expect uninitialized optional cc_out operand");
12497 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12498 if (!Subtarget->isThumb1Only())
12499 return;
12500 }
12501
12502 // If this instruction was defined with an optional CPSR def and its dag node
12503 // had a live implicit CPSR def, then activate the optional CPSR def.
12504 MachineOperand &MO = MI.getOperand(ccOutIdx);
12505 MO.setReg(ARM::CPSR);
12506 MO.setIsDef(true);
12507}
12508
12509//===----------------------------------------------------------------------===//
12510// ARM Optimization Hooks
12511//===----------------------------------------------------------------------===//
12512
12513// Helper function that checks if N is a null or all ones constant.
12514static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12516}
12517
12518// Return true if N is conditionally 0 or all ones.
12519// Detects these expressions where cc is an i1 value:
12520//
12521// (select cc 0, y) [AllOnes=0]
12522// (select cc y, 0) [AllOnes=0]
12523// (zext cc) [AllOnes=0]
12524// (sext cc) [AllOnes=0/1]
12525// (select cc -1, y) [AllOnes=1]
12526// (select cc y, -1) [AllOnes=1]
12527//
12528// Invert is set when N is the null/all ones constant when CC is false.
12529// OtherOp is set to the alternative value of N.
12531 SDValue &CC, bool &Invert,
12532 SDValue &OtherOp,
12533 SelectionDAG &DAG) {
12534 switch (N->getOpcode()) {
12535 default: return false;
12536 case ISD::SELECT: {
12537 CC = N->getOperand(0);
12538 SDValue N1 = N->getOperand(1);
12539 SDValue N2 = N->getOperand(2);
12540 if (isZeroOrAllOnes(N1, AllOnes)) {
12541 Invert = false;
12542 OtherOp = N2;
12543 return true;
12544 }
12545 if (isZeroOrAllOnes(N2, AllOnes)) {
12546 Invert = true;
12547 OtherOp = N1;
12548 return true;
12549 }
12550 return false;
12551 }
12552 case ISD::ZERO_EXTEND:
12553 // (zext cc) can never be the all ones value.
12554 if (AllOnes)
12555 return false;
12556 [[fallthrough]];
12557 case ISD::SIGN_EXTEND: {
12558 SDLoc dl(N);
12559 EVT VT = N->getValueType(0);
12560 CC = N->getOperand(0);
12561 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12562 return false;
12563 Invert = !AllOnes;
12564 if (AllOnes)
12565 // When looking for an AllOnes constant, N is an sext, and the 'other'
12566 // value is 0.
12567 OtherOp = DAG.getConstant(0, dl, VT);
12568 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12569 // When looking for a 0 constant, N can be zext or sext.
12570 OtherOp = DAG.getConstant(1, dl, VT);
12571 else
12572 OtherOp = DAG.getAllOnesConstant(dl, VT);
12573 return true;
12574 }
12575 }
12576}
12577
12578// Combine a constant select operand into its use:
12579//
12580// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12581// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12582// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12583// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12584// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12585//
12586// The transform is rejected if the select doesn't have a constant operand that
12587// is null, or all ones when AllOnes is set.
12588//
12589// Also recognize sext/zext from i1:
12590//
12591// (add (zext cc), x) -> (select cc (add x, 1), x)
12592// (add (sext cc), x) -> (select cc (add x, -1), x)
12593//
12594// These transformations eventually create predicated instructions.
12595//
12596// @param N The node to transform.
12597// @param Slct The N operand that is a select.
12598// @param OtherOp The other N operand (x above).
12599// @param DCI Context.
12600// @param AllOnes Require the select constant to be all ones instead of null.
12601// @returns The new node, or SDValue() on failure.
12602static
12605 bool AllOnes = false) {
12606 SelectionDAG &DAG = DCI.DAG;
12607 EVT VT = N->getValueType(0);
12608 SDValue NonConstantVal;
12609 SDValue CCOp;
12610 bool SwapSelectOps;
12611 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12612 NonConstantVal, DAG))
12613 return SDValue();
12614
12615 // Slct is now know to be the desired identity constant when CC is true.
12616 SDValue TrueVal = OtherOp;
12617 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12618 OtherOp, NonConstantVal);
12619 // Unless SwapSelectOps says CC should be false.
12620 if (SwapSelectOps)
12621 std::swap(TrueVal, FalseVal);
12622
12623 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12624 CCOp, TrueVal, FalseVal);
12625}
12626
12627// Attempt combineSelectAndUse on each operand of a commutative operator N.
12628static
12631 SDValue N0 = N->getOperand(0);
12632 SDValue N1 = N->getOperand(1);
12633 if (N0.getNode()->hasOneUse())
12634 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12635 return Result;
12636 if (N1.getNode()->hasOneUse())
12637 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12638 return Result;
12639 return SDValue();
12640}
12641
12643 // VUZP shuffle node.
12644 if (N->getOpcode() == ARMISD::VUZP)
12645 return true;
12646
12647 // "VUZP" on i32 is an alias for VTRN.
12648 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12649 return true;
12650
12651 return false;
12652}
12653
12656 const ARMSubtarget *Subtarget) {
12657 // Look for ADD(VUZP.0, VUZP.1).
12658 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12659 N0 == N1)
12660 return SDValue();
12661
12662 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12663 if (!N->getValueType(0).is64BitVector())
12664 return SDValue();
12665
12666 // Generate vpadd.
12667 SelectionDAG &DAG = DCI.DAG;
12668 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12669 SDLoc dl(N);
12670 SDNode *Unzip = N0.getNode();
12671 EVT VT = N->getValueType(0);
12672
12674 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12675 TLI.getPointerTy(DAG.getDataLayout())));
12676 Ops.push_back(Unzip->getOperand(0));
12677 Ops.push_back(Unzip->getOperand(1));
12678
12679 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12680}
12681
12684 const ARMSubtarget *Subtarget) {
12685 // Check for two extended operands.
12686 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12687 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12688 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12689 N1.getOpcode() == ISD::ZERO_EXTEND))
12690 return SDValue();
12691
12692 SDValue N00 = N0.getOperand(0);
12693 SDValue N10 = N1.getOperand(0);
12694
12695 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12696 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12697 N00 == N10)
12698 return SDValue();
12699
12700 // We only recognize Q register paddl here; this can't be reached until
12701 // after type legalization.
12702 if (!N00.getValueType().is64BitVector() ||
12704 return SDValue();
12705
12706 // Generate vpaddl.
12707 SelectionDAG &DAG = DCI.DAG;
12708 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12709 SDLoc dl(N);
12710 EVT VT = N->getValueType(0);
12711
12713 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12714 unsigned Opcode;
12715 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12716 Opcode = Intrinsic::arm_neon_vpaddls;
12717 else
12718 Opcode = Intrinsic::arm_neon_vpaddlu;
12719 Ops.push_back(DAG.getConstant(Opcode, dl,
12720 TLI.getPointerTy(DAG.getDataLayout())));
12721 EVT ElemTy = N00.getValueType().getVectorElementType();
12722 unsigned NumElts = VT.getVectorNumElements();
12723 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12724 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12725 N00.getOperand(0), N00.getOperand(1));
12726 Ops.push_back(Concat);
12727
12728 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12729}
12730
12731// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12732// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12733// much easier to match.
12734static SDValue
12737 const ARMSubtarget *Subtarget) {
12738 // Only perform optimization if after legalize, and if NEON is available. We
12739 // also expected both operands to be BUILD_VECTORs.
12740 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12741 || N0.getOpcode() != ISD::BUILD_VECTOR
12742 || N1.getOpcode() != ISD::BUILD_VECTOR)
12743 return SDValue();
12744
12745 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12746 EVT VT = N->getValueType(0);
12747 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12748 return SDValue();
12749
12750 // Check that the vector operands are of the right form.
12751 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12752 // operands, where N is the size of the formed vector.
12753 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12754 // index such that we have a pair wise add pattern.
12755
12756 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12758 return SDValue();
12759 SDValue Vec = N0->getOperand(0)->getOperand(0);
12760 SDNode *V = Vec.getNode();
12761 unsigned nextIndex = 0;
12762
12763 // For each operands to the ADD which are BUILD_VECTORs,
12764 // check to see if each of their operands are an EXTRACT_VECTOR with
12765 // the same vector and appropriate index.
12766 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12769
12770 SDValue ExtVec0 = N0->getOperand(i);
12771 SDValue ExtVec1 = N1->getOperand(i);
12772
12773 // First operand is the vector, verify its the same.
12774 if (V != ExtVec0->getOperand(0).getNode() ||
12775 V != ExtVec1->getOperand(0).getNode())
12776 return SDValue();
12777
12778 // Second is the constant, verify its correct.
12781
12782 // For the constant, we want to see all the even or all the odd.
12783 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12784 || C1->getZExtValue() != nextIndex+1)
12785 return SDValue();
12786
12787 // Increment index.
12788 nextIndex+=2;
12789 } else
12790 return SDValue();
12791 }
12792
12793 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12794 // we're using the entire input vector, otherwise there's a size/legality
12795 // mismatch somewhere.
12796 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12798 return SDValue();
12799
12800 // Create VPADDL node.
12801 SelectionDAG &DAG = DCI.DAG;
12802 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12803
12804 SDLoc dl(N);
12805
12806 // Build operand list.
12808 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12809 TLI.getPointerTy(DAG.getDataLayout())));
12810
12811 // Input is the vector.
12812 Ops.push_back(Vec);
12813
12814 // Get widened type and narrowed type.
12815 MVT widenType;
12816 unsigned numElem = VT.getVectorNumElements();
12817
12818 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12819 switch (inputLaneType.getSimpleVT().SimpleTy) {
12820 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12821 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12822 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12823 default:
12824 llvm_unreachable("Invalid vector element type for padd optimization.");
12825 }
12826
12827 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12828 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12829 return DAG.getNode(ExtOp, dl, VT, tmp);
12830}
12831
12833 if (V->getOpcode() == ISD::UMUL_LOHI ||
12834 V->getOpcode() == ISD::SMUL_LOHI)
12835 return V;
12836 return SDValue();
12837}
12838
12839static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12841 const ARMSubtarget *Subtarget) {
12842 if (!Subtarget->hasBaseDSP())
12843 return SDValue();
12844
12845 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12846 // accumulates the product into a 64-bit value. The 16-bit values will
12847 // be sign extended somehow or SRA'd into 32-bit values
12848 // (addc (adde (mul 16bit, 16bit), lo), hi)
12849 SDValue Mul = AddcNode->getOperand(0);
12850 SDValue Lo = AddcNode->getOperand(1);
12851 if (Mul.getOpcode() != ISD::MUL) {
12852 Lo = AddcNode->getOperand(0);
12853 Mul = AddcNode->getOperand(1);
12854 if (Mul.getOpcode() != ISD::MUL)
12855 return SDValue();
12856 }
12857
12858 SDValue SRA = AddeNode->getOperand(0);
12859 SDValue Hi = AddeNode->getOperand(1);
12860 if (SRA.getOpcode() != ISD::SRA) {
12861 SRA = AddeNode->getOperand(1);
12862 Hi = AddeNode->getOperand(0);
12863 if (SRA.getOpcode() != ISD::SRA)
12864 return SDValue();
12865 }
12866 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12867 if (Const->getZExtValue() != 31)
12868 return SDValue();
12869 } else
12870 return SDValue();
12871
12872 if (SRA.getOperand(0) != Mul)
12873 return SDValue();
12874
12875 SelectionDAG &DAG = DCI.DAG;
12876 SDLoc dl(AddcNode);
12877 unsigned Opcode = 0;
12878 SDValue Op0;
12879 SDValue Op1;
12880
12881 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12882 Opcode = ARMISD::SMLALBB;
12883 Op0 = Mul.getOperand(0);
12884 Op1 = Mul.getOperand(1);
12885 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12886 Opcode = ARMISD::SMLALBT;
12887 Op0 = Mul.getOperand(0);
12888 Op1 = Mul.getOperand(1).getOperand(0);
12889 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12890 Opcode = ARMISD::SMLALTB;
12891 Op0 = Mul.getOperand(0).getOperand(0);
12892 Op1 = Mul.getOperand(1);
12893 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12894 Opcode = ARMISD::SMLALTT;
12895 Op0 = Mul->getOperand(0).getOperand(0);
12896 Op1 = Mul->getOperand(1).getOperand(0);
12897 }
12898
12899 if (!Op0 || !Op1)
12900 return SDValue();
12901
12902 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12903 Op0, Op1, Lo, Hi);
12904 // Replace the ADDs' nodes uses by the MLA node's values.
12905 SDValue HiMLALResult(SMLAL.getNode(), 1);
12906 SDValue LoMLALResult(SMLAL.getNode(), 0);
12907
12908 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12909 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12910
12911 // Return original node to notify the driver to stop replacing.
12912 SDValue resNode(AddcNode, 0);
12913 return resNode;
12914}
12915
12918 const ARMSubtarget *Subtarget) {
12919 // Look for multiply add opportunities.
12920 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12921 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12922 // a glue link from the first add to the second add.
12923 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12924 // a S/UMLAL instruction.
12925 // UMUL_LOHI
12926 // / :lo \ :hi
12927 // V \ [no multiline comment]
12928 // loAdd -> ADDC |
12929 // \ :carry /
12930 // V V
12931 // ADDE <- hiAdd
12932 //
12933 // In the special case where only the higher part of a signed result is used
12934 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12935 // a constant with the exact value of 0x80000000, we recognize we are dealing
12936 // with a "rounded multiply and add" (or subtract) and transform it into
12937 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12938
12939 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12940 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12941 "Expect an ADDE or SUBE");
12942
12943 assert(AddeSubeNode->getNumOperands() == 3 &&
12944 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12945 "ADDE node has the wrong inputs");
12946
12947 // Check that we are chained to the right ADDC or SUBC node.
12948 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12949 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12950 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12951 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12952 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12953 return SDValue();
12954
12955 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12956 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12957
12958 // Check if the two operands are from the same mul_lohi node.
12959 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12960 return SDValue();
12961
12962 assert(AddcSubcNode->getNumValues() == 2 &&
12963 AddcSubcNode->getValueType(0) == MVT::i32 &&
12964 "Expect ADDC with two result values. First: i32");
12965
12966 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
12967 // maybe a SMLAL which multiplies two 16-bit values.
12968 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12969 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12970 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12971 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12972 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12973 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
12974
12975 // Check for the triangle shape.
12976 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12977 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12978
12979 // Make sure that the ADDE/SUBE operands are not coming from the same node.
12980 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
12981 return SDValue();
12982
12983 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
12984 bool IsLeftOperandMUL = false;
12985 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
12986 if (MULOp == SDValue())
12987 MULOp = findMUL_LOHI(AddeSubeOp1);
12988 else
12989 IsLeftOperandMUL = true;
12990 if (MULOp == SDValue())
12991 return SDValue();
12992
12993 // Figure out the right opcode.
12994 unsigned Opc = MULOp->getOpcode();
12995 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
12996
12997 // Figure out the high and low input values to the MLAL node.
12998 SDValue *HiAddSub = nullptr;
12999 SDValue *LoMul = nullptr;
13000 SDValue *LowAddSub = nullptr;
13001
13002 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
13003 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
13004 return SDValue();
13005
13006 if (IsLeftOperandMUL)
13007 HiAddSub = &AddeSubeOp1;
13008 else
13009 HiAddSub = &AddeSubeOp0;
13010
13011 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
13012 // whose low result is fed to the ADDC/SUBC we are checking.
13013
13014 if (AddcSubcOp0 == MULOp.getValue(0)) {
13015 LoMul = &AddcSubcOp0;
13016 LowAddSub = &AddcSubcOp1;
13017 }
13018 if (AddcSubcOp1 == MULOp.getValue(0)) {
13019 LoMul = &AddcSubcOp1;
13020 LowAddSub = &AddcSubcOp0;
13021 }
13022
13023 if (!LoMul)
13024 return SDValue();
13025
13026 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
13027 // the replacement below will create a cycle.
13028 if (AddcSubcNode == HiAddSub->getNode() ||
13029 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
13030 return SDValue();
13031
13032 // Create the merged node.
13033 SelectionDAG &DAG = DCI.DAG;
13034
13035 // Start building operand list.
13037 Ops.push_back(LoMul->getOperand(0));
13038 Ops.push_back(LoMul->getOperand(1));
13039
13040 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
13041 // the case, we must be doing signed multiplication and only use the higher
13042 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
13043 // addition or subtraction with the value of 0x800000.
13044 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
13045 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
13046 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
13047 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
13048 0x80000000) {
13049 Ops.push_back(*HiAddSub);
13050 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
13051 FinalOpc = ARMISD::SMMLSR;
13052 } else {
13053 FinalOpc = ARMISD::SMMLAR;
13054 }
13055 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
13056 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
13057
13058 return SDValue(AddeSubeNode, 0);
13059 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
13060 // SMMLS is generated during instruction selection and the rest of this
13061 // function can not handle the case where AddcSubcNode is a SUBC.
13062 return SDValue();
13063
13064 // Finish building the operand list for {U/S}MLAL
13065 Ops.push_back(*LowAddSub);
13066 Ops.push_back(*HiAddSub);
13067
13068 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
13069 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13070
13071 // Replace the ADDs' nodes uses by the MLA node's values.
13072 SDValue HiMLALResult(MLALNode.getNode(), 1);
13073 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
13074
13075 SDValue LoMLALResult(MLALNode.getNode(), 0);
13076 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
13077
13078 // Return original node to notify the driver to stop replacing.
13079 return SDValue(AddeSubeNode, 0);
13080}
13081
13084 const ARMSubtarget *Subtarget) {
13085 // UMAAL is similar to UMLAL except that it adds two unsigned values.
13086 // While trying to combine for the other MLAL nodes, first search for the
13087 // chance to use UMAAL. Check if Addc uses a node which has already
13088 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
13089 // as the addend, and it's handled in PerformUMLALCombine.
13090
13091 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13092 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13093
13094 // Check that we have a glued ADDC node.
13095 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
13096 if (AddcNode->getOpcode() != ARMISD::ADDC)
13097 return SDValue();
13098
13099 // Find the converted UMAAL or quit if it doesn't exist.
13100 SDNode *UmlalNode = nullptr;
13101 SDValue AddHi;
13102 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
13103 UmlalNode = AddcNode->getOperand(0).getNode();
13104 AddHi = AddcNode->getOperand(1);
13105 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
13106 UmlalNode = AddcNode->getOperand(1).getNode();
13107 AddHi = AddcNode->getOperand(0);
13108 } else {
13109 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13110 }
13111
13112 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
13113 // the ADDC as well as Zero.
13114 if (!isNullConstant(UmlalNode->getOperand(3)))
13115 return SDValue();
13116
13117 if ((isNullConstant(AddeNode->getOperand(0)) &&
13118 AddeNode->getOperand(1).getNode() == UmlalNode) ||
13119 (AddeNode->getOperand(0).getNode() == UmlalNode &&
13120 isNullConstant(AddeNode->getOperand(1)))) {
13121 SelectionDAG &DAG = DCI.DAG;
13122 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13123 UmlalNode->getOperand(2), AddHi };
13124 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
13125 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13126
13127 // Replace the ADDs' nodes uses by the UMAAL node's values.
13128 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
13129 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
13130
13131 // Return original node to notify the driver to stop replacing.
13132 return SDValue(AddeNode, 0);
13133 }
13134 return SDValue();
13135}
13136
13138 const ARMSubtarget *Subtarget) {
13139 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13140 return SDValue();
13141
13142 // Check that we have a pair of ADDC and ADDE as operands.
13143 // Both addends of the ADDE must be zero.
13144 SDNode* AddcNode = N->getOperand(2).getNode();
13145 SDNode* AddeNode = N->getOperand(3).getNode();
13146 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13147 (AddeNode->getOpcode() == ARMISD::ADDE) &&
13148 isNullConstant(AddeNode->getOperand(0)) &&
13149 isNullConstant(AddeNode->getOperand(1)) &&
13150 (AddeNode->getOperand(2).getNode() == AddcNode))
13151 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
13152 DAG.getVTList(MVT::i32, MVT::i32),
13153 {N->getOperand(0), N->getOperand(1),
13154 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13155 else
13156 return SDValue();
13157}
13158
13161 const ARMSubtarget *Subtarget) {
13162 SelectionDAG &DAG(DCI.DAG);
13163
13164 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13165 // (SUBC (ADDE 0, 0, C), 1) -> C
13166 SDValue LHS = N->getOperand(0);
13167 SDValue RHS = N->getOperand(1);
13168 if (LHS->getOpcode() == ARMISD::ADDE &&
13169 isNullConstant(LHS->getOperand(0)) &&
13170 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13171 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13172 }
13173 }
13174
13175 if (Subtarget->isThumb1Only()) {
13176 SDValue RHS = N->getOperand(1);
13178 int32_t imm = C->getSExtValue();
13179 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13180 SDLoc DL(N);
13181 RHS = DAG.getConstant(-imm, DL, MVT::i32);
13182 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13183 : ARMISD::ADDC;
13184 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13185 }
13186 }
13187 }
13188
13189 return SDValue();
13190}
13191
13194 const ARMSubtarget *Subtarget) {
13195 if (Subtarget->isThumb1Only()) {
13196 SelectionDAG &DAG = DCI.DAG;
13197 SDValue RHS = N->getOperand(1);
13199 int64_t imm = C->getSExtValue();
13200 if (imm < 0) {
13201 SDLoc DL(N);
13202
13203 // The with-carry-in form matches bitwise not instead of the negation.
13204 // Effectively, the inverse interpretation of the carry flag already
13205 // accounts for part of the negation.
13206 RHS = DAG.getConstant(~imm, DL, MVT::i32);
13207
13208 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13209 : ARMISD::ADDE;
13210 return DAG.getNode(Opcode, DL, N->getVTList(),
13211 N->getOperand(0), RHS, N->getOperand(2));
13212 }
13213 }
13214 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13215 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
13216 }
13217 return SDValue();
13218}
13219
13222 const ARMSubtarget *Subtarget) {
13223 if (!Subtarget->hasMVEIntegerOps())
13224 return SDValue();
13225
13226 SDLoc dl(N);
13227 SDValue SetCC;
13228 SDValue LHS;
13229 SDValue RHS;
13230 ISD::CondCode CC;
13231 SDValue TrueVal;
13232 SDValue FalseVal;
13233
13234 if (N->getOpcode() == ISD::SELECT &&
13235 N->getOperand(0)->getOpcode() == ISD::SETCC) {
13236 SetCC = N->getOperand(0);
13237 LHS = SetCC->getOperand(0);
13238 RHS = SetCC->getOperand(1);
13239 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13240 TrueVal = N->getOperand(1);
13241 FalseVal = N->getOperand(2);
13242 } else if (N->getOpcode() == ISD::SELECT_CC) {
13243 LHS = N->getOperand(0);
13244 RHS = N->getOperand(1);
13245 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13246 TrueVal = N->getOperand(2);
13247 FalseVal = N->getOperand(3);
13248 } else {
13249 return SDValue();
13250 }
13251
13252 unsigned int Opcode = 0;
13253 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13254 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13255 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13256 Opcode = ARMISD::VMINVu;
13257 if (CC == ISD::SETUGT)
13258 std::swap(TrueVal, FalseVal);
13259 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13260 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13261 (CC == ISD::SETLT || CC == ISD::SETGT)) {
13262 Opcode = ARMISD::VMINVs;
13263 if (CC == ISD::SETGT)
13264 std::swap(TrueVal, FalseVal);
13265 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13266 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13267 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13268 Opcode = ARMISD::VMAXVu;
13269 if (CC == ISD::SETULT)
13270 std::swap(TrueVal, FalseVal);
13271 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13272 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13273 (CC == ISD::SETGT || CC == ISD::SETLT)) {
13274 Opcode = ARMISD::VMAXVs;
13275 if (CC == ISD::SETLT)
13276 std::swap(TrueVal, FalseVal);
13277 } else
13278 return SDValue();
13279
13280 // Normalise to the right hand side being the vector reduction
13281 switch (TrueVal->getOpcode()) {
13282 case ISD::VECREDUCE_UMIN:
13283 case ISD::VECREDUCE_SMIN:
13284 case ISD::VECREDUCE_UMAX:
13285 case ISD::VECREDUCE_SMAX:
13286 std::swap(LHS, RHS);
13287 std::swap(TrueVal, FalseVal);
13288 break;
13289 }
13290
13291 EVT VectorType = FalseVal->getOperand(0).getValueType();
13292
13293 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13294 VectorType != MVT::v4i32)
13295 return SDValue();
13296
13297 EVT VectorScalarType = VectorType.getVectorElementType();
13298
13299 // The values being selected must also be the ones being compared
13300 if (TrueVal != LHS || FalseVal != RHS)
13301 return SDValue();
13302
13303 EVT LeftType = LHS->getValueType(0);
13304 EVT RightType = RHS->getValueType(0);
13305
13306 // The types must match the reduced type too
13307 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13308 return SDValue();
13309
13310 // Legalise the scalar to an i32
13311 if (VectorScalarType != MVT::i32)
13312 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13313
13314 // Generate the reduction as an i32 for legalisation purposes
13315 auto Reduction =
13316 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13317
13318 // The result isn't actually an i32 so truncate it back to its original type
13319 if (VectorScalarType != MVT::i32)
13320 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13321
13322 return Reduction;
13323}
13324
13325// A special combine for the vqdmulh family of instructions. This is one of the
13326// potential set of patterns that could patch this instruction. The base pattern
13327// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13328// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13329// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13330// the max is unnecessary.
13332 EVT VT = N->getValueType(0);
13333 SDValue Shft;
13334 ConstantSDNode *Clamp;
13335
13336 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13337 return SDValue();
13338
13339 if (N->getOpcode() == ISD::SMIN) {
13340 Shft = N->getOperand(0);
13341 Clamp = isConstOrConstSplat(N->getOperand(1));
13342 } else if (N->getOpcode() == ISD::VSELECT) {
13343 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13344 SDValue Cmp = N->getOperand(0);
13345 if (Cmp.getOpcode() != ISD::SETCC ||
13346 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13347 Cmp.getOperand(0) != N->getOperand(1) ||
13348 Cmp.getOperand(1) != N->getOperand(2))
13349 return SDValue();
13350 Shft = N->getOperand(1);
13351 Clamp = isConstOrConstSplat(N->getOperand(2));
13352 } else
13353 return SDValue();
13354
13355 if (!Clamp)
13356 return SDValue();
13357
13358 MVT ScalarType;
13359 int ShftAmt = 0;
13360 switch (Clamp->getSExtValue()) {
13361 case (1 << 7) - 1:
13362 ScalarType = MVT::i8;
13363 ShftAmt = 7;
13364 break;
13365 case (1 << 15) - 1:
13366 ScalarType = MVT::i16;
13367 ShftAmt = 15;
13368 break;
13369 case (1ULL << 31) - 1:
13370 ScalarType = MVT::i32;
13371 ShftAmt = 31;
13372 break;
13373 default:
13374 return SDValue();
13375 }
13376
13377 if (Shft.getOpcode() != ISD::SRA)
13378 return SDValue();
13380 if (!N1 || N1->getSExtValue() != ShftAmt)
13381 return SDValue();
13382
13383 SDValue Mul = Shft.getOperand(0);
13384 if (Mul.getOpcode() != ISD::MUL)
13385 return SDValue();
13386
13387 SDValue Ext0 = Mul.getOperand(0);
13388 SDValue Ext1 = Mul.getOperand(1);
13389 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13390 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13391 return SDValue();
13392 EVT VecVT = Ext0.getOperand(0).getValueType();
13393 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13394 return SDValue();
13395 if (Ext1.getOperand(0).getValueType() != VecVT ||
13396 VecVT.getScalarType() != ScalarType ||
13397 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13398 return SDValue();
13399
13400 SDLoc DL(Mul);
13401 unsigned LegalLanes = 128 / (ShftAmt + 1);
13402 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13403 // For types smaller than legal vectors extend to be legal and only use needed
13404 // lanes.
13405 if (VecVT.getSizeInBits() < 128) {
13406 EVT ExtVecVT =
13408 VecVT.getVectorNumElements());
13409 SDValue Inp0 =
13410 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13411 SDValue Inp1 =
13412 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13413 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13414 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13415 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13416 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13417 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13418 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13419 }
13420
13421 // For larger types, split into legal sized chunks.
13422 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");
13423 unsigned NumParts = VecVT.getSizeInBits() / 128;
13425 for (unsigned I = 0; I < NumParts; ++I) {
13426 SDValue Inp0 =
13427 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13428 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13429 SDValue Inp1 =
13430 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13431 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13432 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13433 Parts.push_back(VQDMULH);
13434 }
13435 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13436 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13437}
13438
13441 const ARMSubtarget *Subtarget) {
13442 if (!Subtarget->hasMVEIntegerOps())
13443 return SDValue();
13444
13445 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13446 return V;
13447
13448 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13449 //
13450 // We need to re-implement this optimization here as the implementation in the
13451 // Target-Independent DAGCombiner does not handle the kind of constant we make
13452 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13453 // good reason, allowing truncation there would break other targets).
13454 //
13455 // Currently, this is only done for MVE, as it's the only target that benefits
13456 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13457 if (N->getOperand(0).getOpcode() != ISD::XOR)
13458 return SDValue();
13459 SDValue XOR = N->getOperand(0);
13460
13461 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13462 // It is important to check with truncation allowed as the BUILD_VECTORs we
13463 // generate in those situations will truncate their operands.
13464 ConstantSDNode *Const =
13465 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13466 /*AllowTruncation*/ true);
13467 if (!Const || !Const->isOne())
13468 return SDValue();
13469
13470 // Rewrite into vselect(cond, rhs, lhs).
13471 SDValue Cond = XOR->getOperand(0);
13472 SDValue LHS = N->getOperand(1);
13473 SDValue RHS = N->getOperand(2);
13474 EVT Type = N->getValueType(0);
13475 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13476}
13477
13478// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13481 const ARMSubtarget *Subtarget) {
13482 SDValue Op0 = N->getOperand(0);
13483 SDValue Op1 = N->getOperand(1);
13484 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13485 EVT VT = N->getValueType(0);
13486
13487 if (!Subtarget->hasMVEIntegerOps() ||
13489 return SDValue();
13490
13491 if (CC == ISD::SETUGE) {
13492 std::swap(Op0, Op1);
13493 CC = ISD::SETULT;
13494 }
13495
13496 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13498 return SDValue();
13499
13500 // Check first operand is BuildVector of 0,1,2,...
13501 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13502 if (!Op0.getOperand(I).isUndef() &&
13504 Op0.getConstantOperandVal(I) == I))
13505 return SDValue();
13506 }
13507
13508 // The second is a Splat of Op1S
13509 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13510 if (!Op1S)
13511 return SDValue();
13512
13513 unsigned Opc;
13514 switch (VT.getVectorNumElements()) {
13515 case 2:
13516 Opc = Intrinsic::arm_mve_vctp64;
13517 break;
13518 case 4:
13519 Opc = Intrinsic::arm_mve_vctp32;
13520 break;
13521 case 8:
13522 Opc = Intrinsic::arm_mve_vctp16;
13523 break;
13524 case 16:
13525 Opc = Intrinsic::arm_mve_vctp8;
13526 break;
13527 default:
13528 return SDValue();
13529 }
13530
13531 SDLoc DL(N);
13532 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13533 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13534 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13535}
13536
13537/// PerformADDECombine - Target-specific dag combine transform from
13538/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13539/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13542 const ARMSubtarget *Subtarget) {
13543 // Only ARM and Thumb2 support UMLAL/SMLAL.
13544 if (Subtarget->isThumb1Only())
13545 return PerformAddeSubeCombine(N, DCI, Subtarget);
13546
13547 // Only perform the checks after legalize when the pattern is available.
13548 if (DCI.isBeforeLegalize()) return SDValue();
13549
13550 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13551}
13552
13553/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13554/// operands N0 and N1. This is a helper for PerformADDCombine that is
13555/// called with the default operands, and if that fails, with commuted
13556/// operands.
13559 const ARMSubtarget *Subtarget){
13560 // Attempt to create vpadd for this add.
13561 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13562 return Result;
13563
13564 // Attempt to create vpaddl for this add.
13565 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13566 return Result;
13567 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13568 Subtarget))
13569 return Result;
13570
13571 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13572 if (N0.getNode()->hasOneUse())
13573 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13574 return Result;
13575 return SDValue();
13576}
13577
13579 EVT VT = N->getValueType(0);
13580 SDValue N0 = N->getOperand(0);
13581 SDValue N1 = N->getOperand(1);
13582 SDLoc dl(N);
13583
13584 auto IsVecReduce = [](SDValue Op) {
13585 switch (Op.getOpcode()) {
13586 case ISD::VECREDUCE_ADD:
13587 case ARMISD::VADDVs:
13588 case ARMISD::VADDVu:
13589 case ARMISD::VMLAVs:
13590 case ARMISD::VMLAVu:
13591 return true;
13592 }
13593 return false;
13594 };
13595
13596 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13597 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13598 // add(add(X, vecreduce(Y)), vecreduce(Z))
13599 // to make better use of vaddva style instructions.
13600 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13601 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13602 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13603 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13604 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13605 }
13606 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13607 // add(add(add(A, C), reduce(B)), reduce(D))
13608 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13609 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13610 unsigned N0RedOp = 0;
13611 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13612 N0RedOp = 1;
13613 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13614 return SDValue();
13615 }
13616
13617 unsigned N1RedOp = 0;
13618 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13619 N1RedOp = 1;
13620 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13621 return SDValue();
13622
13623 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13624 N1.getOperand(1 - N1RedOp));
13625 SDValue Add1 =
13626 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13627 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13628 }
13629 return SDValue();
13630 };
13631 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13632 return R;
13633 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13634 return R;
13635
13636 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13637 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13638 // by ascending load offsets. This can help cores prefetch if the order of
13639 // loads is more predictable.
13640 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13641 // Check if two reductions are known to load data where one is before/after
13642 // another. Return negative if N0 loads data before N1, positive if N1 is
13643 // before N0 and 0 otherwise if nothing is known.
13644 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13645 // Look through to the first operand of a MUL, for the VMLA case.
13646 // Currently only looks at the first operand, in the hope they are equal.
13647 if (N0.getOpcode() == ISD::MUL)
13648 N0 = N0.getOperand(0);
13649 if (N1.getOpcode() == ISD::MUL)
13650 N1 = N1.getOperand(0);
13651
13652 // Return true if the two operands are loads to the same object and the
13653 // offset of the first is known to be less than the offset of the second.
13654 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13655 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13656 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13657 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13658 Load1->isIndexed())
13659 return 0;
13660
13661 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13662 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13663
13664 if (!BaseLocDecomp0.getBase() ||
13665 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13666 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13667 return 0;
13668 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13669 return -1;
13670 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13671 return 1;
13672 return 0;
13673 };
13674
13675 SDValue X;
13676 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13677 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13678 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13679 N0.getOperand(1).getOperand(0));
13680 if (IsBefore < 0) {
13681 X = N0.getOperand(0);
13682 N0 = N0.getOperand(1);
13683 } else if (IsBefore > 0) {
13684 X = N0.getOperand(1);
13685 N0 = N0.getOperand(0);
13686 } else
13687 return SDValue();
13688 } else if (IsVecReduce(N0.getOperand(0))) {
13689 X = N0.getOperand(1);
13690 N0 = N0.getOperand(0);
13691 } else if (IsVecReduce(N0.getOperand(1))) {
13692 X = N0.getOperand(0);
13693 N0 = N0.getOperand(1);
13694 } else
13695 return SDValue();
13696 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13697 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13698 // Note this is backward to how you would expect. We create
13699 // add(reduce(load + 16), reduce(load + 0)) so that the
13700 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13701 // the X as VADDV(load + 0)
13702 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13703 } else
13704 return SDValue();
13705
13706 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13707 return SDValue();
13708
13709 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13710 return SDValue();
13711
13712 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13713 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13714 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13715 };
13716 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13717 return R;
13718 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13719 return R;
13720 return SDValue();
13721}
13722
13724 const ARMSubtarget *Subtarget) {
13725 if (!Subtarget->hasMVEIntegerOps())
13726 return SDValue();
13727
13729 return R;
13730
13731 EVT VT = N->getValueType(0);
13732 SDValue N0 = N->getOperand(0);
13733 SDValue N1 = N->getOperand(1);
13734 SDLoc dl(N);
13735
13736 if (VT != MVT::i64)
13737 return SDValue();
13738
13739 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13740 // will look like:
13741 // t1: i32,i32 = ARMISD::VADDLVs x
13742 // t2: i64 = build_pair t1, t1:1
13743 // t3: i64 = add t2, y
13744 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13745 // the add to be simplified separately.
13746 // We also need to check for sext / zext and commutitive adds.
13747 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13748 SDValue NB) {
13749 if (NB->getOpcode() != ISD::BUILD_PAIR)
13750 return SDValue();
13751 SDValue VecRed = NB->getOperand(0);
13752 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13753 VecRed.getResNo() != 0 ||
13754 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13755 return SDValue();
13756
13757 if (VecRed->getOpcode() == OpcodeA) {
13758 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13759 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13760 VecRed.getOperand(0), VecRed.getOperand(1));
13761 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13762 }
13763
13765 std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
13766
13767 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13768 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13769 Ops.push_back(VecRed->getOperand(I));
13770 SDValue Red =
13771 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13772 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13773 SDValue(Red.getNode(), 1));
13774 };
13775
13776 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13777 return M;
13778 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13779 return M;
13780 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13781 return M;
13782 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13783 return M;
13784 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13785 return M;
13786 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13787 return M;
13788 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13789 return M;
13790 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13791 return M;
13792 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13793 return M;
13794 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13795 return M;
13796 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13797 return M;
13798 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13799 return M;
13800 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13801 return M;
13802 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13803 return M;
13804 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13805 return M;
13806 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13807 return M;
13808 return SDValue();
13809}
13810
13811bool
13813 CombineLevel Level) const {
13814 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13815 N->getOpcode() == ISD::SRL) &&
13816 "Expected shift op");
13817
13818 SDValue ShiftLHS = N->getOperand(0);
13819 if (!ShiftLHS->hasOneUse())
13820 return false;
13821
13822 if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
13823 !ShiftLHS.getOperand(0)->hasOneUse())
13824 return false;
13825
13826 if (Level == BeforeLegalizeTypes)
13827 return true;
13828
13829 if (N->getOpcode() != ISD::SHL)
13830 return true;
13831
13832 if (Subtarget->isThumb1Only()) {
13833 // Avoid making expensive immediates by commuting shifts. (This logic
13834 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13835 // for free.)
13836 if (N->getOpcode() != ISD::SHL)
13837 return true;
13838 SDValue N1 = N->getOperand(0);
13839 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13840 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13841 return true;
13842 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13843 if (Const->getAPIntValue().ult(256))
13844 return false;
13845 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13846 Const->getAPIntValue().sgt(-256))
13847 return false;
13848 }
13849 return true;
13850 }
13851
13852 // Turn off commute-with-shift transform after legalization, so it doesn't
13853 // conflict with PerformSHLSimplify. (We could try to detect when
13854 // PerformSHLSimplify would trigger more precisely, but it isn't
13855 // really necessary.)
13856 return false;
13857}
13858
13860 const SDNode *N) const {
13861 assert(N->getOpcode() == ISD::XOR &&
13862 (N->getOperand(0).getOpcode() == ISD::SHL ||
13863 N->getOperand(0).getOpcode() == ISD::SRL) &&
13864 "Expected XOR(SHIFT) pattern");
13865
13866 // Only commute if the entire NOT mask is a hidden shifted mask.
13867 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13868 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13869 if (XorC && ShiftC) {
13870 unsigned MaskIdx, MaskLen;
13871 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13872 unsigned ShiftAmt = ShiftC->getZExtValue();
13873 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13874 if (N->getOperand(0).getOpcode() == ISD::SHL)
13875 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13876 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13877 }
13878 }
13879
13880 return false;
13881}
13882
13884 const SDNode *N, CombineLevel Level) const {
13885 assert(((N->getOpcode() == ISD::SHL &&
13886 N->getOperand(0).getOpcode() == ISD::SRL) ||
13887 (N->getOpcode() == ISD::SRL &&
13888 N->getOperand(0).getOpcode() == ISD::SHL)) &&
13889 "Expected shift-shift mask");
13890
13891 if (!Subtarget->isThumb1Only())
13892 return true;
13893
13894 if (Level == BeforeLegalizeTypes)
13895 return true;
13896
13897 return false;
13898}
13899
13901 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
13902 SDValue Y) const {
13903 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT) &&
13904 SelectOpcode == ISD::VSELECT;
13905}
13906
13908 if (!Subtarget->hasNEON()) {
13909 if (Subtarget->isThumb1Only())
13910 return VT.getScalarSizeInBits() <= 32;
13911 return true;
13912 }
13913 return VT.isScalarInteger();
13914}
13915
13917 EVT VT) const {
13918 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13919 return false;
13920
13921 switch (FPVT.getSimpleVT().SimpleTy) {
13922 case MVT::f16:
13923 return Subtarget->hasVFP2Base();
13924 case MVT::f32:
13925 return Subtarget->hasVFP2Base();
13926 case MVT::f64:
13927 return Subtarget->hasFP64();
13928 case MVT::v4f32:
13929 case MVT::v8f16:
13930 return Subtarget->hasMVEFloatOps();
13931 default:
13932 return false;
13933 }
13934}
13935
13938 const ARMSubtarget *ST) {
13939 // Allow the generic combiner to identify potential bswaps.
13940 if (DCI.isBeforeLegalize())
13941 return SDValue();
13942
13943 // DAG combiner will fold:
13944 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13945 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13946 // Other code patterns that can be also be modified have the following form:
13947 // b + ((a << 1) | 510)
13948 // b + ((a << 1) & 510)
13949 // b + ((a << 1) ^ 510)
13950 // b + ((a << 1) + 510)
13951
13952 // Many instructions can perform the shift for free, but it requires both
13953 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13954 // instruction will needed. So, unfold back to the original pattern if:
13955 // - if c1 and c2 are small enough that they don't require mov imms.
13956 // - the user(s) of the node can perform an shl
13957
13958 // No shifted operands for 16-bit instructions.
13959 if (ST->isThumb() && ST->isThumb1Only())
13960 return SDValue();
13961
13962 // Check that all the users could perform the shl themselves.
13963 for (auto *U : N->users()) {
13964 switch(U->getOpcode()) {
13965 default:
13966 return SDValue();
13967 case ISD::SUB:
13968 case ISD::ADD:
13969 case ISD::AND:
13970 case ISD::OR:
13971 case ISD::XOR:
13972 case ISD::SETCC:
13973 case ARMISD::CMP:
13974 // Check that the user isn't already using a constant because there
13975 // aren't any instructions that support an immediate operand and a
13976 // shifted operand.
13977 if (isa<ConstantSDNode>(U->getOperand(0)) ||
13978 isa<ConstantSDNode>(U->getOperand(1)))
13979 return SDValue();
13980
13981 // Check that it's not already using a shift.
13982 if (U->getOperand(0).getOpcode() == ISD::SHL ||
13983 U->getOperand(1).getOpcode() == ISD::SHL)
13984 return SDValue();
13985 break;
13986 }
13987 }
13988
13989 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13990 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13991 return SDValue();
13992
13993 if (N->getOperand(0).getOpcode() != ISD::SHL)
13994 return SDValue();
13995
13996 SDValue SHL = N->getOperand(0);
13997
13998 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13999 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
14000 if (!C1ShlC2 || !C2)
14001 return SDValue();
14002
14003 APInt C2Int = C2->getAPIntValue();
14004 APInt C1Int = C1ShlC2->getAPIntValue();
14005 unsigned C2Width = C2Int.getBitWidth();
14006 if (C2Int.uge(C2Width))
14007 return SDValue();
14008 uint64_t C2Value = C2Int.getZExtValue();
14009
14010 // Check that performing a lshr will not lose any information.
14011 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
14012 if ((C1Int & Mask) != C1Int)
14013 return SDValue();
14014
14015 // Shift the first constant.
14016 C1Int.lshrInPlace(C2Int);
14017
14018 // The immediates are encoded as an 8-bit value that can be rotated.
14019 auto LargeImm = [](const APInt &Imm) {
14020 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
14021 return Imm.getBitWidth() - Zeros > 8;
14022 };
14023
14024 if (LargeImm(C1Int) || LargeImm(C2Int))
14025 return SDValue();
14026
14027 SelectionDAG &DAG = DCI.DAG;
14028 SDLoc dl(N);
14029 SDValue X = SHL.getOperand(0);
14030 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
14031 DAG.getConstant(C1Int, dl, MVT::i32));
14032 // Shift left to compensate for the lshr of C1Int.
14033 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
14034
14035 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
14036 SHL.dump(); N->dump());
14037 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
14038 return Res;
14039}
14040
14041
14042/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
14043///
14046 const ARMSubtarget *Subtarget) {
14047 SDValue N0 = N->getOperand(0);
14048 SDValue N1 = N->getOperand(1);
14049
14050 // Only works one way, because it needs an immediate operand.
14051 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14052 return Result;
14053
14054 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
14055 return Result;
14056
14057 // First try with the default operand order.
14058 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
14059 return Result;
14060
14061 // If that didn't work, try again with the operands commuted.
14062 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
14063}
14064
14065// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
14066// providing -X is as cheap as X (currently, just a constant).
14068 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
14069 return SDValue();
14070 SDValue CSINC = N->getOperand(1);
14071 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
14072 return SDValue();
14073
14074 ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
14075 if (!X)
14076 return SDValue();
14077
14078 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
14079 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
14080 CSINC.getOperand(0)),
14081 CSINC.getOperand(1), CSINC.getOperand(2),
14082 CSINC.getOperand(3));
14083}
14084
14085/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14086///
14089 const ARMSubtarget *Subtarget) {
14090 SDValue N0 = N->getOperand(0);
14091 SDValue N1 = N->getOperand(1);
14092
14093 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
14094 if (N1.getNode()->hasOneUse())
14095 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
14096 return Result;
14097
14098 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
14099 return R;
14100
14101 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14102 return SDValue();
14103
14104 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
14105 // so that we can readily pattern match more mve instructions which can use
14106 // a scalar operand.
14107 SDValue VDup = N->getOperand(1);
14108 if (VDup->getOpcode() != ARMISD::VDUP)
14109 return SDValue();
14110
14111 SDValue VMov = N->getOperand(0);
14112 if (VMov->getOpcode() == ISD::BITCAST)
14113 VMov = VMov->getOperand(0);
14114
14115 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14116 return SDValue();
14117
14118 SDLoc dl(N);
14119 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
14120 DCI.DAG.getConstant(0, dl, MVT::i32),
14121 VDup->getOperand(0));
14122 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14123}
14124
14125/// PerformVMULCombine
14126/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
14127/// special multiplier accumulator forwarding.
14128/// vmul d3, d0, d2
14129/// vmla d3, d1, d2
14130/// is faster than
14131/// vadd d3, d0, d1
14132/// vmul d3, d3, d2
14133// However, for (A + B) * (A + B),
14134// vadd d2, d0, d1
14135// vmul d3, d0, d2
14136// vmla d3, d1, d2
14137// is slower than
14138// vadd d2, d0, d1
14139// vmul d3, d2, d2
14142 const ARMSubtarget *Subtarget) {
14143 if (!Subtarget->hasVMLxForwarding())
14144 return SDValue();
14145
14146 SelectionDAG &DAG = DCI.DAG;
14147 SDValue N0 = N->getOperand(0);
14148 SDValue N1 = N->getOperand(1);
14149 unsigned Opcode = N0.getOpcode();
14150 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14151 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
14152 Opcode = N1.getOpcode();
14153 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14154 Opcode != ISD::FADD && Opcode != ISD::FSUB)
14155 return SDValue();
14156 std::swap(N0, N1);
14157 }
14158
14159 if (N0 == N1)
14160 return SDValue();
14161
14162 EVT VT = N->getValueType(0);
14163 SDLoc DL(N);
14164 SDValue N00 = N0->getOperand(0);
14165 SDValue N01 = N0->getOperand(1);
14166 return DAG.getNode(Opcode, DL, VT,
14167 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
14168 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
14169}
14170
14172 const ARMSubtarget *Subtarget) {
14173 EVT VT = N->getValueType(0);
14174 if (VT != MVT::v2i64)
14175 return SDValue();
14176
14177 SDValue N0 = N->getOperand(0);
14178 SDValue N1 = N->getOperand(1);
14179
14180 auto IsSignExt = [&](SDValue Op) {
14181 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14182 return SDValue();
14183 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14184 if (VT.getScalarSizeInBits() == 32)
14185 return Op->getOperand(0);
14186 return SDValue();
14187 };
14188 auto IsZeroExt = [&](SDValue Op) {
14189 // Zero extends are a little more awkward. At the point we are matching
14190 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14191 // That might be before of after a bitcast depending on how the and is
14192 // placed. Because this has to look through bitcasts, it is currently only
14193 // supported on LE.
14194 if (!Subtarget->isLittle())
14195 return SDValue();
14196
14197 SDValue And = Op;
14198 if (And->getOpcode() == ISD::BITCAST)
14199 And = And->getOperand(0);
14200 if (And->getOpcode() != ISD::AND)
14201 return SDValue();
14202 SDValue Mask = And->getOperand(1);
14203 if (Mask->getOpcode() == ISD::BITCAST)
14204 Mask = Mask->getOperand(0);
14205
14206 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14207 Mask.getValueType() != MVT::v4i32)
14208 return SDValue();
14209 if (isAllOnesConstant(Mask->getOperand(0)) &&
14210 isNullConstant(Mask->getOperand(1)) &&
14211 isAllOnesConstant(Mask->getOperand(2)) &&
14212 isNullConstant(Mask->getOperand(3)))
14213 return And->getOperand(0);
14214 return SDValue();
14215 };
14216
14217 SDLoc dl(N);
14218 if (SDValue Op0 = IsSignExt(N0)) {
14219 if (SDValue Op1 = IsSignExt(N1)) {
14220 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14221 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14222 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14223 }
14224 }
14225 if (SDValue Op0 = IsZeroExt(N0)) {
14226 if (SDValue Op1 = IsZeroExt(N1)) {
14227 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14228 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14229 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14230 }
14231 }
14232
14233 return SDValue();
14234}
14235
14238 const ARMSubtarget *Subtarget) {
14239 SelectionDAG &DAG = DCI.DAG;
14240
14241 EVT VT = N->getValueType(0);
14242 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14243 return PerformMVEVMULLCombine(N, DAG, Subtarget);
14244
14245 if (Subtarget->isThumb1Only())
14246 return SDValue();
14247
14248 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14249 return SDValue();
14250
14251 if (VT.is64BitVector() || VT.is128BitVector())
14252 return PerformVMULCombine(N, DCI, Subtarget);
14253 if (VT != MVT::i32)
14254 return SDValue();
14255
14256 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14257 if (!C)
14258 return SDValue();
14259
14260 int64_t MulAmt = C->getSExtValue();
14261 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14262
14263 ShiftAmt = ShiftAmt & (32 - 1);
14264 SDValue V = N->getOperand(0);
14265 SDLoc DL(N);
14266
14267 SDValue Res;
14268 MulAmt >>= ShiftAmt;
14269
14270 if (MulAmt >= 0) {
14271 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14272 // (mul x, 2^N + 1) => (add (shl x, N), x)
14273 Res = DAG.getNode(ISD::ADD, DL, VT,
14274 V,
14275 DAG.getNode(ISD::SHL, DL, VT,
14276 V,
14277 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14278 MVT::i32)));
14279 } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14280 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14281 Res = DAG.getNode(ISD::SUB, DL, VT,
14282 DAG.getNode(ISD::SHL, DL, VT,
14283 V,
14284 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14285 MVT::i32)),
14286 V);
14287 } else
14288 return SDValue();
14289 } else {
14290 uint64_t MulAmtAbs = -MulAmt;
14291 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14292 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14293 Res = DAG.getNode(ISD::SUB, DL, VT,
14294 V,
14295 DAG.getNode(ISD::SHL, DL, VT,
14296 V,
14297 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14298 MVT::i32)));
14299 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14300 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14301 Res = DAG.getNode(ISD::ADD, DL, VT,
14302 V,
14303 DAG.getNode(ISD::SHL, DL, VT,
14304 V,
14305 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14306 MVT::i32)));
14307 Res = DAG.getNode(ISD::SUB, DL, VT,
14308 DAG.getConstant(0, DL, MVT::i32), Res);
14309 } else
14310 return SDValue();
14311 }
14312
14313 if (ShiftAmt != 0)
14314 Res = DAG.getNode(ISD::SHL, DL, VT,
14315 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14316
14317 // Do not add new nodes to DAG combiner worklist.
14318 DCI.CombineTo(N, Res, false);
14319 return SDValue();
14320}
14321
14324 const ARMSubtarget *Subtarget) {
14325 // Allow DAGCombine to pattern-match before we touch the canonical form.
14326 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14327 return SDValue();
14328
14329 if (N->getValueType(0) != MVT::i32)
14330 return SDValue();
14331
14332 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14333 if (!N1C)
14334 return SDValue();
14335
14336 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14337 // Don't transform uxtb/uxth.
14338 if (C1 == 255 || C1 == 65535)
14339 return SDValue();
14340
14341 SDNode *N0 = N->getOperand(0).getNode();
14342 if (!N0->hasOneUse())
14343 return SDValue();
14344
14345 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14346 return SDValue();
14347
14348 bool LeftShift = N0->getOpcode() == ISD::SHL;
14349
14351 if (!N01C)
14352 return SDValue();
14353
14354 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14355 if (!C2 || C2 >= 32)
14356 return SDValue();
14357
14358 // Clear irrelevant bits in the mask.
14359 if (LeftShift)
14360 C1 &= (-1U << C2);
14361 else
14362 C1 &= (-1U >> C2);
14363
14364 SelectionDAG &DAG = DCI.DAG;
14365 SDLoc DL(N);
14366
14367 // We have a pattern of the form "(and (shl x, c2) c1)" or
14368 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14369 // transform to a pair of shifts, to save materializing c1.
14370
14371 // First pattern: right shift, then mask off leading bits.
14372 // FIXME: Use demanded bits?
14373 if (!LeftShift && isMask_32(C1)) {
14374 uint32_t C3 = llvm::countl_zero(C1);
14375 if (C2 < C3) {
14376 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14377 DAG.getConstant(C3 - C2, DL, MVT::i32));
14378 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14379 DAG.getConstant(C3, DL, MVT::i32));
14380 }
14381 }
14382
14383 // First pattern, reversed: left shift, then mask off trailing bits.
14384 if (LeftShift && isMask_32(~C1)) {
14385 uint32_t C3 = llvm::countr_zero(C1);
14386 if (C2 < C3) {
14387 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14388 DAG.getConstant(C3 - C2, DL, MVT::i32));
14389 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14390 DAG.getConstant(C3, DL, MVT::i32));
14391 }
14392 }
14393
14394 // Second pattern: left shift, then mask off leading bits.
14395 // FIXME: Use demanded bits?
14396 if (LeftShift && isShiftedMask_32(C1)) {
14397 uint32_t Trailing = llvm::countr_zero(C1);
14398 uint32_t C3 = llvm::countl_zero(C1);
14399 if (Trailing == C2 && C2 + C3 < 32) {
14400 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14401 DAG.getConstant(C2 + C3, DL, MVT::i32));
14402 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14403 DAG.getConstant(C3, DL, MVT::i32));
14404 }
14405 }
14406
14407 // Second pattern, reversed: right shift, then mask off trailing bits.
14408 // FIXME: Handle other patterns of known/demanded bits.
14409 if (!LeftShift && isShiftedMask_32(C1)) {
14410 uint32_t Leading = llvm::countl_zero(C1);
14411 uint32_t C3 = llvm::countr_zero(C1);
14412 if (Leading == C2 && C2 + C3 < 32) {
14413 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14414 DAG.getConstant(C2 + C3, DL, MVT::i32));
14415 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14416 DAG.getConstant(C3, DL, MVT::i32));
14417 }
14418 }
14419
14420 // Transform "(and (shl x, c2) c1)" into "(shl (and x, c1>>c2), c2)"
14421 // if "c1 >> c2" is a cheaper immediate than "c1"
14422 if (LeftShift &&
14423 HasLowerConstantMaterializationCost(C1 >> C2, C1, Subtarget)) {
14424
14425 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i32, N0->getOperand(0),
14426 DAG.getConstant(C1 >> C2, DL, MVT::i32));
14427 return DAG.getNode(ISD::SHL, DL, MVT::i32, And,
14428 DAG.getConstant(C2, DL, MVT::i32));
14429 }
14430
14431 return SDValue();
14432}
14433
14436 const ARMSubtarget *Subtarget) {
14437 // Attempt to use immediate-form VBIC
14438 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14439 SDLoc dl(N);
14440 EVT VT = N->getValueType(0);
14441 SelectionDAG &DAG = DCI.DAG;
14442
14443 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14444 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14445 return SDValue();
14446
14447 APInt SplatBits, SplatUndef;
14448 unsigned SplatBitSize;
14449 bool HasAnyUndefs;
14450 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14451 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14452 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14453 SplatBitSize == 64) {
14454 EVT VbicVT;
14455 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14456 SplatUndef.getZExtValue(), SplatBitSize,
14457 DAG, dl, VbicVT, VT, OtherModImm);
14458 if (Val.getNode()) {
14459 SDValue Input =
14460 DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT, N->getOperand(0));
14461 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14462 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic);
14463 }
14464 }
14465 }
14466
14467 if (!Subtarget->isThumb1Only()) {
14468 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14469 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14470 return Result;
14471
14472 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14473 return Result;
14474 }
14475
14476 if (Subtarget->isThumb1Only())
14477 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14478 return Result;
14479
14480 return SDValue();
14481}
14482
14483// Try combining OR nodes to SMULWB, SMULWT.
14486 const ARMSubtarget *Subtarget) {
14487 if (!Subtarget->hasV6Ops() ||
14488 (Subtarget->isThumb() &&
14489 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14490 return SDValue();
14491
14492 SDValue SRL = OR->getOperand(0);
14493 SDValue SHL = OR->getOperand(1);
14494
14495 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14496 SRL = OR->getOperand(1);
14497 SHL = OR->getOperand(0);
14498 }
14499 if (!isSRL16(SRL) || !isSHL16(SHL))
14500 return SDValue();
14501
14502 // The first operands to the shifts need to be the two results from the
14503 // same smul_lohi node.
14504 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14505 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14506 return SDValue();
14507
14508 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14509 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14510 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14511 return SDValue();
14512
14513 // Now we have:
14514 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14515 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14516 // For SMUWB the 16-bit value will signed extended somehow.
14517 // For SMULWT only the SRA is required.
14518 // Check both sides of SMUL_LOHI
14519 SDValue OpS16 = SMULLOHI->getOperand(0);
14520 SDValue OpS32 = SMULLOHI->getOperand(1);
14521
14522 SelectionDAG &DAG = DCI.DAG;
14523 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14524 OpS16 = OpS32;
14525 OpS32 = SMULLOHI->getOperand(0);
14526 }
14527
14528 SDLoc dl(OR);
14529 unsigned Opcode = 0;
14530 if (isS16(OpS16, DAG))
14531 Opcode = ARMISD::SMULWB;
14532 else if (isSRA16(OpS16)) {
14533 Opcode = ARMISD::SMULWT;
14534 OpS16 = OpS16->getOperand(0);
14535 }
14536 else
14537 return SDValue();
14538
14539 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14540 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14541 return SDValue(OR, 0);
14542}
14543
14546 const ARMSubtarget *Subtarget) {
14547 // BFI is only available on V6T2+
14548 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14549 return SDValue();
14550
14551 EVT VT = N->getValueType(0);
14552 SDValue N0 = N->getOperand(0);
14553 SDValue N1 = N->getOperand(1);
14554 SelectionDAG &DAG = DCI.DAG;
14555 SDLoc DL(N);
14556 // 1) or (and A, mask), val => ARMbfi A, val, mask
14557 // iff (val & mask) == val
14558 //
14559 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14560 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14561 // && mask == ~mask2
14562 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14563 // && ~mask == mask2
14564 // (i.e., copy a bitfield value into another bitfield of the same width)
14565
14566 if (VT != MVT::i32)
14567 return SDValue();
14568
14569 SDValue N00 = N0.getOperand(0);
14570
14571 // The value and the mask need to be constants so we can verify this is
14572 // actually a bitfield set. If the mask is 0xffff, we can do better
14573 // via a movt instruction, so don't use BFI in that case.
14574 SDValue MaskOp = N0.getOperand(1);
14576 if (!MaskC)
14577 return SDValue();
14578 unsigned Mask = MaskC->getZExtValue();
14579 if (Mask == 0xffff)
14580 return SDValue();
14581 SDValue Res;
14582 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14584 if (N1C) {
14585 unsigned Val = N1C->getZExtValue();
14586 if ((Val & ~Mask) != Val)
14587 return SDValue();
14588
14589 if (ARM::isBitFieldInvertedMask(Mask)) {
14590 Val >>= llvm::countr_zero(~Mask);
14591
14592 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14593 DAG.getConstant(Val, DL, MVT::i32),
14594 DAG.getConstant(Mask, DL, MVT::i32));
14595
14596 DCI.CombineTo(N, Res, false);
14597 // Return value from the original node to inform the combiner than N is
14598 // now dead.
14599 return SDValue(N, 0);
14600 }
14601 } else if (N1.getOpcode() == ISD::AND) {
14602 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14604 if (!N11C)
14605 return SDValue();
14606 unsigned Mask2 = N11C->getZExtValue();
14607
14608 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14609 // as is to match.
14610 if (ARM::isBitFieldInvertedMask(Mask) &&
14611 (Mask == ~Mask2)) {
14612 // The pack halfword instruction works better for masks that fit it,
14613 // so use that when it's available.
14614 if (Subtarget->hasDSP() &&
14615 (Mask == 0xffff || Mask == 0xffff0000))
14616 return SDValue();
14617 // 2a
14618 unsigned amt = llvm::countr_zero(Mask2);
14619 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14620 DAG.getConstant(amt, DL, MVT::i32));
14621 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14622 DAG.getConstant(Mask, DL, MVT::i32));
14623 DCI.CombineTo(N, Res, false);
14624 // Return value from the original node to inform the combiner than N is
14625 // now dead.
14626 return SDValue(N, 0);
14627 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14628 (~Mask == Mask2)) {
14629 // The pack halfword instruction works better for masks that fit it,
14630 // so use that when it's available.
14631 if (Subtarget->hasDSP() &&
14632 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14633 return SDValue();
14634 // 2b
14635 unsigned lsb = llvm::countr_zero(Mask);
14636 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14637 DAG.getConstant(lsb, DL, MVT::i32));
14638 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14639 DAG.getConstant(Mask2, DL, MVT::i32));
14640 DCI.CombineTo(N, Res, false);
14641 // Return value from the original node to inform the combiner than N is
14642 // now dead.
14643 return SDValue(N, 0);
14644 }
14645 }
14646
14647 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14648 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14650 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14651 // where lsb(mask) == #shamt and masked bits of B are known zero.
14652 SDValue ShAmt = N00.getOperand(1);
14653 unsigned ShAmtC = ShAmt->getAsZExtVal();
14654 unsigned LSB = llvm::countr_zero(Mask);
14655 if (ShAmtC != LSB)
14656 return SDValue();
14657
14658 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14659 DAG.getConstant(~Mask, DL, MVT::i32));
14660
14661 DCI.CombineTo(N, Res, false);
14662 // Return value from the original node to inform the combiner than N is
14663 // now dead.
14664 return SDValue(N, 0);
14665 }
14666
14667 return SDValue();
14668}
14669
14670static bool isValidMVECond(unsigned CC, bool IsFloat) {
14671 switch (CC) {
14672 case ARMCC::EQ:
14673 case ARMCC::NE:
14674 case ARMCC::LE:
14675 case ARMCC::GT:
14676 case ARMCC::GE:
14677 case ARMCC::LT:
14678 return true;
14679 case ARMCC::HS:
14680 case ARMCC::HI:
14681 return !IsFloat;
14682 default:
14683 return false;
14684 };
14685}
14686
14688 if (N->getOpcode() == ARMISD::VCMP)
14689 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14690 else if (N->getOpcode() == ARMISD::VCMPZ)
14691 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14692 else
14693 llvm_unreachable("Not a VCMP/VCMPZ!");
14694}
14695
14698 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14699}
14700
14702 const ARMSubtarget *Subtarget) {
14703 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14704 // together with predicates
14705 EVT VT = N->getValueType(0);
14706 SDLoc DL(N);
14707 SDValue N0 = N->getOperand(0);
14708 SDValue N1 = N->getOperand(1);
14709
14710 auto IsFreelyInvertable = [&](SDValue V) {
14711 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14712 return CanInvertMVEVCMP(V);
14713 return false;
14714 };
14715
14716 // At least one operand must be freely invertable.
14717 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14718 return SDValue();
14719
14720 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14721 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14722 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14723 return DAG.getLogicalNOT(DL, And, VT);
14724}
14725
14726/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14729 const ARMSubtarget *Subtarget) {
14730 // Attempt to use immediate-form VORR
14731 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14732 SDLoc dl(N);
14733 EVT VT = N->getValueType(0);
14734 SelectionDAG &DAG = DCI.DAG;
14735
14736 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14737 return SDValue();
14738
14739 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14740 VT == MVT::v8i1 || VT == MVT::v16i1))
14741 return PerformORCombine_i1(N, DAG, Subtarget);
14742
14743 APInt SplatBits, SplatUndef;
14744 unsigned SplatBitSize;
14745 bool HasAnyUndefs;
14746 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14747 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14748 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14749 SplatBitSize == 64) {
14750 EVT VorrVT;
14751 SDValue Val =
14752 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14753 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14754 if (Val.getNode()) {
14755 SDValue Input =
14756 DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT, N->getOperand(0));
14757 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14758 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr);
14759 }
14760 }
14761 }
14762
14763 if (!Subtarget->isThumb1Only()) {
14764 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14765 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14766 return Result;
14767 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14768 return Result;
14769 }
14770
14771 SDValue N0 = N->getOperand(0);
14772 SDValue N1 = N->getOperand(1);
14773
14774 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14775 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14777
14778 // The code below optimizes (or (and X, Y), Z).
14779 // The AND operand needs to have a single user to make these optimizations
14780 // profitable.
14781 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14782 return SDValue();
14783
14784 APInt SplatUndef;
14785 unsigned SplatBitSize;
14786 bool HasAnyUndefs;
14787
14788 APInt SplatBits0, SplatBits1;
14791 // Ensure that the second operand of both ands are constants
14792 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14793 HasAnyUndefs) && !HasAnyUndefs) {
14794 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14795 HasAnyUndefs) && !HasAnyUndefs) {
14796 // Ensure that the bit width of the constants are the same and that
14797 // the splat arguments are logical inverses as per the pattern we
14798 // are trying to simplify.
14799 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14800 SplatBits0 == ~SplatBits1) {
14801 // Canonicalize the vector type to make instruction selection
14802 // simpler.
14803 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14804 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14805 N0->getOperand(1),
14806 N0->getOperand(0),
14807 N1->getOperand(0));
14808 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result);
14809 }
14810 }
14811 }
14812 }
14813
14814 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14815 // reasonable.
14816 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14817 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14818 return Res;
14819 }
14820
14821 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14822 return Result;
14823
14824 return SDValue();
14825}
14826
14829 const ARMSubtarget *Subtarget) {
14830 EVT VT = N->getValueType(0);
14831 SelectionDAG &DAG = DCI.DAG;
14832
14833 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14834 return SDValue();
14835
14836 if (!Subtarget->isThumb1Only()) {
14837 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14838 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14839 return Result;
14840
14841 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14842 return Result;
14843 }
14844
14845 if (Subtarget->hasMVEIntegerOps()) {
14846 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14847 SDValue N0 = N->getOperand(0);
14848 SDValue N1 = N->getOperand(1);
14849 const TargetLowering *TLI = Subtarget->getTargetLowering();
14850 if (TLI->isConstTrueVal(N1) &&
14851 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14852 if (CanInvertMVEVCMP(N0)) {
14853 SDLoc DL(N0);
14855
14857 Ops.push_back(N0->getOperand(0));
14858 if (N0->getOpcode() == ARMISD::VCMP)
14859 Ops.push_back(N0->getOperand(1));
14860 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14861 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14862 }
14863 }
14864 }
14865
14866 return SDValue();
14867}
14868
14869// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14870// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14871// their position in "to" (Rd).
14872static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14873 assert(N->getOpcode() == ARMISD::BFI);
14874
14875 SDValue From = N->getOperand(1);
14876 ToMask = ~N->getConstantOperandAPInt(2);
14877 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
14878
14879 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14880 // #C in the base of the SHR.
14881 if (From->getOpcode() == ISD::SRL &&
14882 isa<ConstantSDNode>(From->getOperand(1))) {
14883 APInt Shift = From->getConstantOperandAPInt(1);
14884 assert(Shift.getLimitedValue() < 32 && "Shift too large!");
14885 FromMask <<= Shift.getLimitedValue(31);
14886 From = From->getOperand(0);
14887 }
14888
14889 return From;
14890}
14891
14892// If A and B contain one contiguous set of bits, does A | B == A . B?
14893//
14894// Neither A nor B must be zero.
14895static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14896 unsigned LastActiveBitInA = A.countr_zero();
14897 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14898 return LastActiveBitInA - 1 == FirstActiveBitInB;
14899}
14900
14902 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14903 APInt ToMask, FromMask;
14904 SDValue From = ParseBFI(N, ToMask, FromMask);
14905 SDValue To = N->getOperand(0);
14906
14907 SDValue V = To;
14908 if (V.getOpcode() != ARMISD::BFI)
14909 return SDValue();
14910
14911 APInt NewToMask, NewFromMask;
14912 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14913 if (NewFrom != From)
14914 return SDValue();
14915
14916 // Do the written bits conflict with any we've seen so far?
14917 if ((NewToMask & ToMask).getBoolValue())
14918 // Conflicting bits.
14919 return SDValue();
14920
14921 // Are the new bits contiguous when combined with the old bits?
14922 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14923 BitsProperlyConcatenate(FromMask, NewFromMask))
14924 return V;
14925 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14926 BitsProperlyConcatenate(NewFromMask, FromMask))
14927 return V;
14928
14929 return SDValue();
14930}
14931
14933 SDValue N0 = N->getOperand(0);
14934 SDValue N1 = N->getOperand(1);
14935
14936 if (N1.getOpcode() == ISD::AND) {
14937 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14938 // the bits being cleared by the AND are not demanded by the BFI.
14940 if (!N11C)
14941 return SDValue();
14942 unsigned InvMask = N->getConstantOperandVal(2);
14943 unsigned LSB = llvm::countr_zero(~InvMask);
14944 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14945 assert(Width <
14946 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14947 "undefined behavior");
14948 unsigned Mask = (1u << Width) - 1;
14949 unsigned Mask2 = N11C->getZExtValue();
14950 if ((Mask & (~Mask2)) == 0)
14951 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14952 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14953 return SDValue();
14954 }
14955
14956 // Look for another BFI to combine with.
14957 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14958 // We've found a BFI.
14959 APInt ToMask1, FromMask1;
14960 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14961
14962 APInt ToMask2, FromMask2;
14963 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14964 assert(From1 == From2);
14965 (void)From2;
14966
14967 // Create a new BFI, combining the two together.
14968 APInt NewFromMask = FromMask1 | FromMask2;
14969 APInt NewToMask = ToMask1 | ToMask2;
14970
14971 EVT VT = N->getValueType(0);
14972 SDLoc dl(N);
14973
14974 if (NewFromMask[0] == 0)
14975 From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
14976 DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
14977 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
14978 DAG.getConstant(~NewToMask, dl, VT));
14979 }
14980
14981 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
14982 // that lower bit insertions are performed first, providing that M1 and M2
14983 // do no overlap. This can allow multiple BFI instructions to be combined
14984 // together by the other folds above.
14985 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14986 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14987 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
14988
14989 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14990 ToMask1.countl_zero() < ToMask2.countl_zero())
14991 return SDValue();
14992
14993 EVT VT = N->getValueType(0);
14994 SDLoc dl(N);
14995 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
14996 N->getOperand(1), N->getOperand(2));
14997 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
14998 N0.getOperand(2));
14999 }
15000
15001 return SDValue();
15002}
15003
15004// Check that N is CMPZ(CSINC(0, 0, CC, X)),
15005// or CMPZ(CMOV(1, 0, CC, X))
15006// return X if valid.
15008 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
15009 return SDValue();
15010 SDValue CSInc = Cmp->getOperand(0);
15011
15012 // Ignore any `And 1` nodes that may not yet have been removed. We are
15013 // looking for a value that produces 1/0, so these have no effect on the
15014 // code.
15015 while (CSInc.getOpcode() == ISD::AND &&
15016 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
15017 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
15018 CSInc = CSInc.getOperand(0);
15019
15020 if (CSInc.getOpcode() == ARMISD::CSINC &&
15021 isNullConstant(CSInc.getOperand(0)) &&
15022 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15024 return CSInc.getOperand(3);
15025 }
15026 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
15027 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
15029 return CSInc.getOperand(3);
15030 }
15031 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
15032 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
15035 return CSInc.getOperand(3);
15036 }
15037 return SDValue();
15038}
15039
15041 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
15042 // t92: flags = ARMISD::CMPZ t74, 0
15043 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
15044 // t96: flags = ARMISD::CMPZ t93, 0
15045 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
15047 if (SDValue C = IsCMPZCSINC(N, Cond))
15048 if (Cond == ARMCC::EQ)
15049 return C;
15050 return SDValue();
15051}
15052
15054 // Fold away an unneccessary CMPZ/CSINC
15055 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
15056 // if C1==EQ -> CSXYZ A, B, C2, D
15057 // if C1==NE -> CSXYZ A, B, NOT(C2), D
15059 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
15060 if (N->getConstantOperandVal(2) == ARMCC::EQ)
15061 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15062 N->getOperand(1),
15063 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
15064 if (N->getConstantOperandVal(2) == ARMCC::NE)
15065 return DAG.getNode(
15066 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15067 N->getOperand(1),
15069 }
15070 return SDValue();
15071}
15072
15073/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
15074/// ARMISD::VMOVRRD.
15077 const ARMSubtarget *Subtarget) {
15078 // vmovrrd(vmovdrr x, y) -> x,y
15079 SDValue InDouble = N->getOperand(0);
15080 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15081 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
15082
15083 // vmovrrd(load f64) -> (load i32), (load i32)
15084 SDNode *InNode = InDouble.getNode();
15085 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
15086 InNode->getValueType(0) == MVT::f64 &&
15087 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
15088 !cast<LoadSDNode>(InNode)->isVolatile()) {
15089 // TODO: Should this be done for non-FrameIndex operands?
15090 LoadSDNode *LD = cast<LoadSDNode>(InNode);
15091
15092 SelectionDAG &DAG = DCI.DAG;
15093 SDLoc DL(LD);
15094 SDValue BasePtr = LD->getBasePtr();
15095 SDValue NewLD1 =
15096 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15097 LD->getAlign(), LD->getMemOperand()->getFlags());
15098
15099 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
15100 DAG.getConstant(4, DL, MVT::i32));
15101
15102 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
15103 LD->getPointerInfo().getWithOffset(4),
15104 commonAlignment(LD->getAlign(), 4),
15105 LD->getMemOperand()->getFlags());
15106
15107 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
15108 if (DCI.DAG.getDataLayout().isBigEndian())
15109 std::swap (NewLD1, NewLD2);
15110 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
15111 return Result;
15112 }
15113
15114 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15115 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15116 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15117 isa<ConstantSDNode>(InDouble.getOperand(1))) {
15118 SDValue BV = InDouble.getOperand(0);
15119 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
15120 // change lane order under big endian.
15121 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
15122 while (
15123 (BV.getOpcode() == ISD::BITCAST ||
15125 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
15126 BVSwap = BV.getOpcode() == ISD::BITCAST;
15127 BV = BV.getOperand(0);
15128 }
15129 if (BV.getValueType() != MVT::v4i32)
15130 return SDValue();
15131
15132 // Handle buildvectors, pulling out the correct lane depending on
15133 // endianness.
15134 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
15135 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
15136 SDValue Op0 = BV.getOperand(Offset);
15137 SDValue Op1 = BV.getOperand(Offset + 1);
15138 if (!Subtarget->isLittle() && BVSwap)
15139 std::swap(Op0, Op1);
15140
15141 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15142 }
15143
15144 // A chain of insert_vectors, grabbing the correct value of the chain of
15145 // inserts.
15146 SDValue Op0, Op1;
15147 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
15148 if (isa<ConstantSDNode>(BV.getOperand(2))) {
15149 if (BV.getConstantOperandVal(2) == Offset && !Op0)
15150 Op0 = BV.getOperand(1);
15151 if (BV.getConstantOperandVal(2) == Offset + 1 && !Op1)
15152 Op1 = BV.getOperand(1);
15153 }
15154 BV = BV.getOperand(0);
15155 }
15156 if (!Subtarget->isLittle() && BVSwap)
15157 std::swap(Op0, Op1);
15158 if (Op0 && Op1)
15159 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15160 }
15161
15162 return SDValue();
15163}
15164
15165/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15166/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
15168 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15169 SDValue Op0 = N->getOperand(0);
15170 SDValue Op1 = N->getOperand(1);
15171 if (Op0.getOpcode() == ISD::BITCAST)
15172 Op0 = Op0.getOperand(0);
15173 if (Op1.getOpcode() == ISD::BITCAST)
15174 Op1 = Op1.getOperand(0);
15175 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
15176 Op0.getNode() == Op1.getNode() &&
15177 Op0.getResNo() == 0 && Op1.getResNo() == 1)
15178 return DAG.getNode(ISD::BITCAST, SDLoc(N),
15179 N->getValueType(0), Op0.getOperand(0));
15180 return SDValue();
15181}
15182
15185 SDValue Op0 = N->getOperand(0);
15186
15187 // VMOVhr (VMOVrh (X)) -> X
15188 if (Op0->getOpcode() == ARMISD::VMOVrh)
15189 return Op0->getOperand(0);
15190
15191 // FullFP16: half values are passed in S-registers, and we don't
15192 // need any of the bitcast and moves:
15193 //
15194 // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
15195 // t5: i32 = bitcast t2
15196 // t18: f16 = ARMISD::VMOVhr t5
15197 // =>
15198 // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
15199 if (Op0->getOpcode() == ISD::BITCAST) {
15200 SDValue Copy = Op0->getOperand(0);
15201 if (Copy.getValueType() == MVT::f32 &&
15202 Copy->getOpcode() == ISD::CopyFromReg) {
15203 bool HasGlue = Copy->getNumOperands() == 3;
15204 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15205 HasGlue ? Copy->getOperand(2) : SDValue()};
15206 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15207 SDValue NewCopy =
15209 DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
15210 ArrayRef(Ops, HasGlue ? 3 : 2));
15211
15212 // Update Users, Chains, and Potential Glue.
15213 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
15214 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
15215 if (HasGlue)
15216 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
15217 NewCopy.getValue(2));
15218
15219 return NewCopy;
15220 }
15221 }
15222
15223 // fold (VMOVhr (load x)) -> (load (f16*)x)
15224 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15225 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15226 LN0->getMemoryVT() == MVT::i16) {
15227 SDValue Load =
15228 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15229 LN0->getBasePtr(), LN0->getMemOperand());
15230 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15231 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
15232 return Load;
15233 }
15234 }
15235
15236 // Only the bottom 16 bits of the source register are used.
15237 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15238 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15239 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15240 return SDValue(N, 0);
15241
15242 return SDValue();
15243}
15244
15246 SDValue N0 = N->getOperand(0);
15247 EVT VT = N->getValueType(0);
15248
15249 // fold (VMOVrh (fpconst x)) -> const x
15251 APFloat V = C->getValueAPF();
15252 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15253 }
15254
15255 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15256 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15257 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15258
15259 SDValue Load =
15260 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15261 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15262 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15263 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15264 return Load;
15265 }
15266
15267 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15268 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15270 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15271 N0->getOperand(1));
15272
15273 return SDValue();
15274}
15275
15276/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15277/// are normal, non-volatile loads. If so, it is profitable to bitcast an
15278/// i64 vector to have f64 elements, since the value can then be loaded
15279/// directly into a VFP register.
15281 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15282 for (unsigned i = 0; i < NumElts; ++i) {
15283 SDNode *Elt = N->getOperand(i).getNode();
15284 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15285 return true;
15286 }
15287 return false;
15288}
15289
15290/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15291/// ISD::BUILD_VECTOR.
15294 const ARMSubtarget *Subtarget) {
15295 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15296 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15297 // into a pair of GPRs, which is fine when the value is used as a scalar,
15298 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15299 SelectionDAG &DAG = DCI.DAG;
15300 if (N->getNumOperands() == 2)
15301 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15302 return RV;
15303
15304 // Load i64 elements as f64 values so that type legalization does not split
15305 // them up into i32 values.
15306 EVT VT = N->getValueType(0);
15307 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15308 return SDValue();
15309 SDLoc dl(N);
15311 unsigned NumElts = VT.getVectorNumElements();
15312 for (unsigned i = 0; i < NumElts; ++i) {
15313 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15314 Ops.push_back(V);
15315 // Make the DAGCombiner fold the bitcast.
15316 DCI.AddToWorklist(V.getNode());
15317 }
15318 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15319 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15320 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15321}
15322
15323/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15324static SDValue
15326 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15327 // At that time, we may have inserted bitcasts from integer to float.
15328 // If these bitcasts have survived DAGCombine, change the lowering of this
15329 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15330 // force to use floating point types.
15331
15332 // Make sure we can change the type of the vector.
15333 // This is possible iff:
15334 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15335 // 1.1. Vector is used only once.
15336 // 1.2. Use is a bit convert to an integer type.
15337 // 2. The size of its operands are 32-bits (64-bits are not legal).
15338 EVT VT = N->getValueType(0);
15339 EVT EltVT = VT.getVectorElementType();
15340
15341 // Check 1.1. and 2.
15342 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15343 return SDValue();
15344
15345 // By construction, the input type must be float.
15346 assert(EltVT == MVT::f32 && "Unexpected type!");
15347
15348 // Check 1.2.
15349 SDNode *Use = *N->user_begin();
15350 if (Use->getOpcode() != ISD::BITCAST ||
15351 Use->getValueType(0).isFloatingPoint())
15352 return SDValue();
15353
15354 // Check profitability.
15355 // Model is, if more than half of the relevant operands are bitcast from
15356 // i32, turn the build_vector into a sequence of insert_vector_elt.
15357 // Relevant operands are everything that is not statically
15358 // (i.e., at compile time) bitcasted.
15359 unsigned NumOfBitCastedElts = 0;
15360 unsigned NumElts = VT.getVectorNumElements();
15361 unsigned NumOfRelevantElts = NumElts;
15362 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15363 SDValue Elt = N->getOperand(Idx);
15364 if (Elt->getOpcode() == ISD::BITCAST) {
15365 // Assume only bit cast to i32 will go away.
15366 if (Elt->getOperand(0).getValueType() == MVT::i32)
15367 ++NumOfBitCastedElts;
15368 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15369 // Constants are statically casted, thus do not count them as
15370 // relevant operands.
15371 --NumOfRelevantElts;
15372 }
15373
15374 // Check if more than half of the elements require a non-free bitcast.
15375 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15376 return SDValue();
15377
15378 SelectionDAG &DAG = DCI.DAG;
15379 // Create the new vector type.
15380 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15381 // Check if the type is legal.
15382 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15383 if (!TLI.isTypeLegal(VecVT))
15384 return SDValue();
15385
15386 // Combine:
15387 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15388 // => BITCAST INSERT_VECTOR_ELT
15389 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15390 // (BITCAST EN), N.
15391 SDValue Vec = DAG.getUNDEF(VecVT);
15392 SDLoc dl(N);
15393 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15394 SDValue V = N->getOperand(Idx);
15395 if (V.isUndef())
15396 continue;
15397 if (V.getOpcode() == ISD::BITCAST &&
15398 V->getOperand(0).getValueType() == MVT::i32)
15399 // Fold obvious case.
15400 V = V.getOperand(0);
15401 else {
15402 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15403 // Make the DAGCombiner fold the bitcasts.
15404 DCI.AddToWorklist(V.getNode());
15405 }
15406 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15407 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15408 }
15409 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15410 // Make the DAGCombiner fold the bitcasts.
15411 DCI.AddToWorklist(Vec.getNode());
15412 return Vec;
15413}
15414
15415static SDValue
15417 EVT VT = N->getValueType(0);
15418 SDValue Op = N->getOperand(0);
15419 SDLoc dl(N);
15420
15421 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15422 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15423 // If the valuetypes are the same, we can remove the cast entirely.
15424 if (Op->getOperand(0).getValueType() == VT)
15425 return Op->getOperand(0);
15426 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15427 }
15428
15429 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15430 // more VPNOT which might get folded as else predicates.
15431 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15432 SDValue X =
15433 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15435 DCI.DAG.getConstant(65535, dl, MVT::i32));
15436 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15437 }
15438
15439 // Only the bottom 16 bits of the source register are used.
15440 if (Op.getValueType() == MVT::i32) {
15441 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15442 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15443 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15444 return SDValue(N, 0);
15445 }
15446 return SDValue();
15447}
15448
15450 const ARMSubtarget *ST) {
15451 EVT VT = N->getValueType(0);
15452 SDValue Op = N->getOperand(0);
15453 SDLoc dl(N);
15454
15455 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15456 if (ST->isLittle())
15457 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15458
15459 // VT VECTOR_REG_CAST (VT Op) -> Op
15460 if (Op.getValueType() == VT)
15461 return Op;
15462 // VECTOR_REG_CAST undef -> undef
15463 if (Op.isUndef())
15464 return DAG.getUNDEF(VT);
15465
15466 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15467 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15468 // If the valuetypes are the same, we can remove the cast entirely.
15469 if (Op->getOperand(0).getValueType() == VT)
15470 return Op->getOperand(0);
15471 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15472 }
15473
15474 return SDValue();
15475}
15476
15478 const ARMSubtarget *Subtarget) {
15479 if (!Subtarget->hasMVEIntegerOps())
15480 return SDValue();
15481
15482 EVT VT = N->getValueType(0);
15483 SDValue Op0 = N->getOperand(0);
15484 SDValue Op1 = N->getOperand(1);
15485 ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15486 SDLoc dl(N);
15487
15488 // vcmp X, 0, cc -> vcmpz X, cc
15489 if (isZeroVector(Op1))
15490 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15491
15492 unsigned SwappedCond = getSwappedCondition(Cond);
15493 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15494 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15495 if (isZeroVector(Op0))
15496 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15497 DAG.getConstant(SwappedCond, dl, MVT::i32));
15498 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15499 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15500 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15501 DAG.getConstant(SwappedCond, dl, MVT::i32));
15502 }
15503
15504 return SDValue();
15505}
15506
15507/// PerformInsertEltCombine - Target-specific dag combine xforms for
15508/// ISD::INSERT_VECTOR_ELT.
15511 // Bitcast an i64 load inserted into a vector to f64.
15512 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15513 EVT VT = N->getValueType(0);
15514 SDNode *Elt = N->getOperand(1).getNode();
15515 if (VT.getVectorElementType() != MVT::i64 ||
15516 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15517 return SDValue();
15518
15519 SelectionDAG &DAG = DCI.DAG;
15520 SDLoc dl(N);
15521 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15523 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15524 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15525 // Make the DAGCombiner fold the bitcasts.
15526 DCI.AddToWorklist(Vec.getNode());
15527 DCI.AddToWorklist(V.getNode());
15528 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15529 Vec, V, N->getOperand(2));
15530 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15531}
15532
15533// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15534// directly or bitcast to an integer if the original is a float vector.
15535// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15536// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15537static SDValue
15539 EVT VT = N->getValueType(0);
15540 SDLoc dl(N);
15541
15542 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15543 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15544 return SDValue();
15545
15546 SDValue Ext = SDValue(N, 0);
15547 if (Ext.getOpcode() == ISD::BITCAST &&
15548 Ext.getOperand(0).getValueType() == MVT::f32)
15549 Ext = Ext.getOperand(0);
15550 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15551 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15552 Ext.getConstantOperandVal(1) % 2 != 0)
15553 return SDValue();
15554 if (Ext->hasOneUse() && (Ext->user_begin()->getOpcode() == ISD::SINT_TO_FP ||
15555 Ext->user_begin()->getOpcode() == ISD::UINT_TO_FP))
15556 return SDValue();
15557
15558 SDValue Op0 = Ext.getOperand(0);
15559 EVT VecVT = Op0.getValueType();
15560 unsigned ResNo = Op0.getResNo();
15561 unsigned Lane = Ext.getConstantOperandVal(1);
15562 if (VecVT.getVectorNumElements() != 4)
15563 return SDValue();
15564
15565 // Find another extract, of Lane + 1
15566 auto OtherIt = find_if(Op0->users(), [&](SDNode *V) {
15567 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15568 isa<ConstantSDNode>(V->getOperand(1)) &&
15569 V->getConstantOperandVal(1) == Lane + 1 &&
15570 V->getOperand(0).getResNo() == ResNo;
15571 });
15572 if (OtherIt == Op0->users().end())
15573 return SDValue();
15574
15575 // For float extracts, we need to be converting to a i32 for both vector
15576 // lanes.
15577 SDValue OtherExt(*OtherIt, 0);
15578 if (OtherExt.getValueType() != MVT::i32) {
15579 if (!OtherExt->hasOneUse() ||
15580 OtherExt->user_begin()->getOpcode() != ISD::BITCAST ||
15581 OtherExt->user_begin()->getValueType(0) != MVT::i32)
15582 return SDValue();
15583 OtherExt = SDValue(*OtherExt->user_begin(), 0);
15584 }
15585
15586 // Convert the type to a f64 and extract with a VMOVRRD.
15587 SDValue F64 = DCI.DAG.getNode(
15588 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15589 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15590 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15591 SDValue VMOVRRD =
15592 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15593
15594 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15595 return VMOVRRD;
15596}
15597
15600 const ARMSubtarget *ST) {
15601 SDValue Op0 = N->getOperand(0);
15602 EVT VT = N->getValueType(0);
15603 SDLoc dl(N);
15604
15605 // extract (vdup x) -> x
15606 if (Op0->getOpcode() == ARMISD::VDUP) {
15607 SDValue X = Op0->getOperand(0);
15608 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15609 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15610 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15611 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15612 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15613 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15614
15615 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15616 X = X->getOperand(0);
15617 if (X.getValueType() == VT)
15618 return X;
15619 }
15620
15621 // extract ARM_BUILD_VECTOR -> x
15622 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15623 isa<ConstantSDNode>(N->getOperand(1)) &&
15624 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15625 return Op0.getOperand(N->getConstantOperandVal(1));
15626 }
15627
15628 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15629 if (Op0.getValueType() == MVT::v4i32 &&
15630 isa<ConstantSDNode>(N->getOperand(1)) &&
15631 Op0.getOpcode() == ISD::BITCAST &&
15633 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15634 SDValue BV = Op0.getOperand(0);
15635 unsigned Offset = N->getConstantOperandVal(1);
15636 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15637 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15638 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15639 }
15640
15641 // extract x, n; extract x, n+1 -> VMOVRRD x
15642 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15643 return R;
15644
15645 // extract (MVETrunc(x)) -> extract x
15646 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15647 unsigned Idx = N->getConstantOperandVal(1);
15648 unsigned Vec =
15650 unsigned SubIdx =
15652 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15653 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15654 }
15655
15656 return SDValue();
15657}
15658
15660 SDValue Op = N->getOperand(0);
15661 EVT VT = N->getValueType(0);
15662
15663 // sext_inreg(VGETLANEu) -> VGETLANEs
15664 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15665 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15666 Op.getOperand(0).getValueType().getScalarType())
15667 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15668 Op.getOperand(1));
15669
15670 return SDValue();
15671}
15672
15673static SDValue
15675 SDValue Vec = N->getOperand(0);
15676 SDValue SubVec = N->getOperand(1);
15677 uint64_t IdxVal = N->getConstantOperandVal(2);
15678 EVT VecVT = Vec.getValueType();
15679 EVT SubVT = SubVec.getValueType();
15680
15681 // Only do this for legal fixed vector types.
15682 if (!VecVT.isFixedLengthVector() ||
15683 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15685 return SDValue();
15686
15687 // Ignore widening patterns.
15688 if (IdxVal == 0 && Vec.isUndef())
15689 return SDValue();
15690
15691 // Subvector must be half the width and an "aligned" insertion.
15692 unsigned NumSubElts = SubVT.getVectorNumElements();
15693 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15694 (IdxVal != 0 && IdxVal != NumSubElts))
15695 return SDValue();
15696
15697 // Fold insert_subvector -> concat_vectors
15698 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15699 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15700 SDLoc DL(N);
15701 SDValue Lo, Hi;
15702 if (IdxVal == 0) {
15703 Lo = SubVec;
15704 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15705 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15706 } else {
15707 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15708 DCI.DAG.getVectorIdxConstant(0, DL));
15709 Hi = SubVec;
15710 }
15711 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15712}
15713
15714// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15716 SelectionDAG &DAG) {
15717 SDValue Trunc = N->getOperand(0);
15718 EVT VT = Trunc.getValueType();
15719 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15720 return SDValue();
15721
15722 SDLoc DL(Trunc);
15723 if (isVMOVNTruncMask(N->getMask(), VT, false))
15724 return DAG.getNode(
15725 ARMISD::VMOVN, DL, VT,
15726 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15727 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15728 DAG.getConstant(1, DL, MVT::i32));
15729 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15730 return DAG.getNode(
15731 ARMISD::VMOVN, DL, VT,
15732 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15733 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15734 DAG.getConstant(1, DL, MVT::i32));
15735 return SDValue();
15736}
15737
15738/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15739/// ISD::VECTOR_SHUFFLE.
15742 return R;
15743
15744 // The LLVM shufflevector instruction does not require the shuffle mask
15745 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15746 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15747 // operands do not match the mask length, they are extended by concatenating
15748 // them with undef vectors. That is probably the right thing for other
15749 // targets, but for NEON it is better to concatenate two double-register
15750 // size vector operands into a single quad-register size vector. Do that
15751 // transformation here:
15752 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15753 // shuffle(concat(v1, v2), undef)
15754 SDValue Op0 = N->getOperand(0);
15755 SDValue Op1 = N->getOperand(1);
15756 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15757 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15758 Op0.getNumOperands() != 2 ||
15759 Op1.getNumOperands() != 2)
15760 return SDValue();
15761 SDValue Concat0Op1 = Op0.getOperand(1);
15762 SDValue Concat1Op1 = Op1.getOperand(1);
15763 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15764 return SDValue();
15765 // Skip the transformation if any of the types are illegal.
15766 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15767 EVT VT = N->getValueType(0);
15768 if (!TLI.isTypeLegal(VT) ||
15769 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15770 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15771 return SDValue();
15772
15773 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15774 Op0.getOperand(0), Op1.getOperand(0));
15775 // Translate the shuffle mask.
15776 SmallVector<int, 16> NewMask;
15777 unsigned NumElts = VT.getVectorNumElements();
15778 unsigned HalfElts = NumElts/2;
15780 for (unsigned n = 0; n < NumElts; ++n) {
15781 int MaskElt = SVN->getMaskElt(n);
15782 int NewElt = -1;
15783 if (MaskElt < (int)HalfElts)
15784 NewElt = MaskElt;
15785 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15786 NewElt = HalfElts + MaskElt - NumElts;
15787 NewMask.push_back(NewElt);
15788 }
15789 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15790 DAG.getUNDEF(VT), NewMask);
15791}
15792
15793/// Load/store instruction that can be merged with a base address
15794/// update
15799 unsigned AddrOpIdx;
15800};
15801
15803 /// Instruction that updates a pointer
15805 /// Pointer increment operand
15807 /// Pointer increment value if it is a constant, or 0 otherwise
15808 unsigned ConstInc;
15809};
15810
15812 // Check that the add is independent of the load/store.
15813 // Otherwise, folding it would create a cycle. Search through Addr
15814 // as well, since the User may not be a direct user of Addr and
15815 // only share a base pointer.
15818 Worklist.push_back(N);
15819 Worklist.push_back(User);
15820 const unsigned MaxSteps = 1024;
15821 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
15822 SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
15823 return false;
15824 return true;
15825}
15826
15828 struct BaseUpdateUser &User,
15829 bool SimpleConstIncOnly,
15831 SelectionDAG &DAG = DCI.DAG;
15832 SDNode *N = Target.N;
15833 MemSDNode *MemN = cast<MemSDNode>(N);
15834 SDLoc dl(N);
15835
15836 // Find the new opcode for the updating load/store.
15837 bool isLoadOp = true;
15838 bool isLaneOp = false;
15839 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15840 // as an operand.
15841 bool hasAlignment = true;
15842 unsigned NewOpc = 0;
15843 unsigned NumVecs = 0;
15844 if (Target.isIntrinsic) {
15845 unsigned IntNo = N->getConstantOperandVal(1);
15846 switch (IntNo) {
15847 default:
15848 llvm_unreachable("unexpected intrinsic for Neon base update");
15849 case Intrinsic::arm_neon_vld1:
15850 NewOpc = ARMISD::VLD1_UPD;
15851 NumVecs = 1;
15852 break;
15853 case Intrinsic::arm_neon_vld2:
15854 NewOpc = ARMISD::VLD2_UPD;
15855 NumVecs = 2;
15856 break;
15857 case Intrinsic::arm_neon_vld3:
15858 NewOpc = ARMISD::VLD3_UPD;
15859 NumVecs = 3;
15860 break;
15861 case Intrinsic::arm_neon_vld4:
15862 NewOpc = ARMISD::VLD4_UPD;
15863 NumVecs = 4;
15864 break;
15865 case Intrinsic::arm_neon_vld1x2:
15866 NewOpc = ARMISD::VLD1x2_UPD;
15867 NumVecs = 2;
15868 hasAlignment = false;
15869 break;
15870 case Intrinsic::arm_neon_vld1x3:
15871 NewOpc = ARMISD::VLD1x3_UPD;
15872 NumVecs = 3;
15873 hasAlignment = false;
15874 break;
15875 case Intrinsic::arm_neon_vld1x4:
15876 NewOpc = ARMISD::VLD1x4_UPD;
15877 NumVecs = 4;
15878 hasAlignment = false;
15879 break;
15880 case Intrinsic::arm_neon_vld2dup:
15881 NewOpc = ARMISD::VLD2DUP_UPD;
15882 NumVecs = 2;
15883 break;
15884 case Intrinsic::arm_neon_vld3dup:
15885 NewOpc = ARMISD::VLD3DUP_UPD;
15886 NumVecs = 3;
15887 break;
15888 case Intrinsic::arm_neon_vld4dup:
15889 NewOpc = ARMISD::VLD4DUP_UPD;
15890 NumVecs = 4;
15891 break;
15892 case Intrinsic::arm_neon_vld2lane:
15893 NewOpc = ARMISD::VLD2LN_UPD;
15894 NumVecs = 2;
15895 isLaneOp = true;
15896 break;
15897 case Intrinsic::arm_neon_vld3lane:
15898 NewOpc = ARMISD::VLD3LN_UPD;
15899 NumVecs = 3;
15900 isLaneOp = true;
15901 break;
15902 case Intrinsic::arm_neon_vld4lane:
15903 NewOpc = ARMISD::VLD4LN_UPD;
15904 NumVecs = 4;
15905 isLaneOp = true;
15906 break;
15907 case Intrinsic::arm_neon_vst1:
15908 NewOpc = ARMISD::VST1_UPD;
15909 NumVecs = 1;
15910 isLoadOp = false;
15911 break;
15912 case Intrinsic::arm_neon_vst2:
15913 NewOpc = ARMISD::VST2_UPD;
15914 NumVecs = 2;
15915 isLoadOp = false;
15916 break;
15917 case Intrinsic::arm_neon_vst3:
15918 NewOpc = ARMISD::VST3_UPD;
15919 NumVecs = 3;
15920 isLoadOp = false;
15921 break;
15922 case Intrinsic::arm_neon_vst4:
15923 NewOpc = ARMISD::VST4_UPD;
15924 NumVecs = 4;
15925 isLoadOp = false;
15926 break;
15927 case Intrinsic::arm_neon_vst2lane:
15928 NewOpc = ARMISD::VST2LN_UPD;
15929 NumVecs = 2;
15930 isLoadOp = false;
15931 isLaneOp = true;
15932 break;
15933 case Intrinsic::arm_neon_vst3lane:
15934 NewOpc = ARMISD::VST3LN_UPD;
15935 NumVecs = 3;
15936 isLoadOp = false;
15937 isLaneOp = true;
15938 break;
15939 case Intrinsic::arm_neon_vst4lane:
15940 NewOpc = ARMISD::VST4LN_UPD;
15941 NumVecs = 4;
15942 isLoadOp = false;
15943 isLaneOp = true;
15944 break;
15945 case Intrinsic::arm_neon_vst1x2:
15946 NewOpc = ARMISD::VST1x2_UPD;
15947 NumVecs = 2;
15948 isLoadOp = false;
15949 hasAlignment = false;
15950 break;
15951 case Intrinsic::arm_neon_vst1x3:
15952 NewOpc = ARMISD::VST1x3_UPD;
15953 NumVecs = 3;
15954 isLoadOp = false;
15955 hasAlignment = false;
15956 break;
15957 case Intrinsic::arm_neon_vst1x4:
15958 NewOpc = ARMISD::VST1x4_UPD;
15959 NumVecs = 4;
15960 isLoadOp = false;
15961 hasAlignment = false;
15962 break;
15963 }
15964 } else {
15965 isLaneOp = true;
15966 switch (N->getOpcode()) {
15967 default:
15968 llvm_unreachable("unexpected opcode for Neon base update");
15969 case ARMISD::VLD1DUP:
15970 NewOpc = ARMISD::VLD1DUP_UPD;
15971 NumVecs = 1;
15972 break;
15973 case ARMISD::VLD2DUP:
15974 NewOpc = ARMISD::VLD2DUP_UPD;
15975 NumVecs = 2;
15976 break;
15977 case ARMISD::VLD3DUP:
15978 NewOpc = ARMISD::VLD3DUP_UPD;
15979 NumVecs = 3;
15980 break;
15981 case ARMISD::VLD4DUP:
15982 NewOpc = ARMISD::VLD4DUP_UPD;
15983 NumVecs = 4;
15984 break;
15985 case ISD::LOAD:
15986 NewOpc = ARMISD::VLD1_UPD;
15987 NumVecs = 1;
15988 isLaneOp = false;
15989 break;
15990 case ISD::STORE:
15991 NewOpc = ARMISD::VST1_UPD;
15992 NumVecs = 1;
15993 isLaneOp = false;
15994 isLoadOp = false;
15995 break;
15996 }
15997 }
15998
15999 // Find the size of memory referenced by the load/store.
16000 EVT VecTy;
16001 if (isLoadOp) {
16002 VecTy = N->getValueType(0);
16003 } else if (Target.isIntrinsic) {
16004 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
16005 } else {
16006 assert(Target.isStore &&
16007 "Node has to be a load, a store, or an intrinsic!");
16008 VecTy = N->getOperand(1).getValueType();
16009 }
16010
16011 bool isVLDDUPOp =
16012 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
16013 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
16014
16015 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16016 if (isLaneOp || isVLDDUPOp)
16017 NumBytes /= VecTy.getVectorNumElements();
16018
16019 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
16020 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
16021 // separate instructions that make it harder to use a non-constant update.
16022 return false;
16023 }
16024
16025 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
16026 return false;
16027
16028 if (!isValidBaseUpdate(N, User.N))
16029 return false;
16030
16031 // OK, we found an ADD we can fold into the base update.
16032 // Now, create a _UPD node, taking care of not breaking alignment.
16033
16034 EVT AlignedVecTy = VecTy;
16035 Align Alignment = MemN->getAlign();
16036
16037 // If this is a less-than-standard-aligned load/store, change the type to
16038 // match the standard alignment.
16039 // The alignment is overlooked when selecting _UPD variants; and it's
16040 // easier to introduce bitcasts here than fix that.
16041 // There are 3 ways to get to this base-update combine:
16042 // - intrinsics: they are assumed to be properly aligned (to the standard
16043 // alignment of the memory type), so we don't need to do anything.
16044 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
16045 // intrinsics, so, likewise, there's nothing to do.
16046 // - generic load/store instructions: the alignment is specified as an
16047 // explicit operand, rather than implicitly as the standard alignment
16048 // of the memory type (like the intrisics). We need to change the
16049 // memory type to match the explicit alignment. That way, we don't
16050 // generate non-standard-aligned ARMISD::VLDx nodes.
16051 if (isa<LSBaseSDNode>(N)) {
16052 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
16053 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
16054 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
16055 assert(!isLaneOp && "Unexpected generic load/store lane.");
16056 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
16057 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
16058 }
16059 // Don't set an explicit alignment on regular load/stores that we want
16060 // to transform to VLD/VST 1_UPD nodes.
16061 // This matches the behavior of regular load/stores, which only get an
16062 // explicit alignment if the MMO alignment is larger than the standard
16063 // alignment of the memory type.
16064 // Intrinsics, however, always get an explicit alignment, set to the
16065 // alignment of the MMO.
16066 Alignment = Align(1);
16067 }
16068
16069 // Create the new updating load/store node.
16070 // First, create an SDVTList for the new updating node's results.
16071 EVT Tys[6];
16072 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16073 unsigned n;
16074 for (n = 0; n < NumResultVecs; ++n)
16075 Tys[n] = AlignedVecTy;
16076 Tys[n++] = MVT::i32;
16077 Tys[n] = MVT::Other;
16078 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16079
16080 // Then, gather the new node's operands.
16082 Ops.push_back(N->getOperand(0)); // incoming chain
16083 Ops.push_back(N->getOperand(Target.AddrOpIdx));
16084 Ops.push_back(User.Inc);
16085
16086 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
16087 // Try to match the intrinsic's signature
16088 Ops.push_back(StN->getValue());
16089 } else {
16090 // Loads (and of course intrinsics) match the intrinsics' signature,
16091 // so just add all but the alignment operand.
16092 unsigned LastOperand =
16093 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
16094 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
16095 Ops.push_back(N->getOperand(i));
16096 }
16097
16098 // For all node types, the alignment operand is always the last one.
16099 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
16100
16101 // If this is a non-standard-aligned STORE, the penultimate operand is the
16102 // stored value. Bitcast it to the aligned type.
16103 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
16104 SDValue &StVal = Ops[Ops.size() - 2];
16105 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
16106 }
16107
16108 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
16109 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
16110 MemN->getMemOperand());
16111
16112 // Update the uses.
16113 SmallVector<SDValue, 5> NewResults;
16114 for (unsigned i = 0; i < NumResultVecs; ++i)
16115 NewResults.push_back(SDValue(UpdN.getNode(), i));
16116
16117 // If this is an non-standard-aligned LOAD, the first result is the loaded
16118 // value. Bitcast it to the expected result type.
16119 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
16120 SDValue &LdVal = NewResults[0];
16121 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
16122 }
16123
16124 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16125 DCI.CombineTo(N, NewResults);
16126 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
16127
16128 return true;
16129}
16130
16131// If (opcode ptr inc) is and ADD-like instruction, return the
16132// increment value. Otherwise return 0.
16133static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
16134 SDValue Inc, const SelectionDAG &DAG) {
16136 if (!CInc)
16137 return 0;
16138
16139 switch (Opcode) {
16140 case ARMISD::VLD1_UPD:
16141 case ISD::ADD:
16142 return CInc->getZExtValue();
16143 case ISD::OR: {
16144 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
16145 // (OR ptr inc) is the same as (ADD ptr inc)
16146 return CInc->getZExtValue();
16147 }
16148 return 0;
16149 }
16150 default:
16151 return 0;
16152 }
16153}
16154
16156 switch (N->getOpcode()) {
16157 case ISD::ADD:
16158 case ISD::OR: {
16159 if (isa<ConstantSDNode>(N->getOperand(1))) {
16160 *Ptr = N->getOperand(0);
16161 *CInc = N->getOperand(1);
16162 return true;
16163 }
16164 return false;
16165 }
16166 case ARMISD::VLD1_UPD: {
16167 if (isa<ConstantSDNode>(N->getOperand(2))) {
16168 *Ptr = N->getOperand(1);
16169 *CInc = N->getOperand(2);
16170 return true;
16171 }
16172 return false;
16173 }
16174 default:
16175 return false;
16176 }
16177}
16178
16179/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16180/// NEON load/store intrinsics, and generic vector load/stores, to merge
16181/// base address updates.
16182/// For generic load/stores, the memory type is assumed to be a vector.
16183/// The caller is assumed to have checked legality.
16186 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16187 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16188 const bool isStore = N->getOpcode() == ISD::STORE;
16189 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16190 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16191
16192 // Limit the number of possible base-updates we look at to prevent degenerate
16193 // cases.
16194 unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;
16195
16196 SDValue Addr = N->getOperand(AddrOpIdx);
16197
16199
16200 // Search for a use of the address operand that is an increment.
16201 for (SDUse &Use : Addr->uses()) {
16202 SDNode *User = Use.getUser();
16203 if (Use.getResNo() != Addr.getResNo() || User->getNumOperands() != 2)
16204 continue;
16205
16206 SDValue Inc = User->getOperand(Use.getOperandNo() == 1 ? 0 : 1);
16207 unsigned ConstInc =
16208 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16209
16210 if (ConstInc || User->getOpcode() == ISD::ADD) {
16211 BaseUpdates.push_back({User, Inc, ConstInc});
16212 if (BaseUpdates.size() >= MaxBaseUpdates)
16213 break;
16214 }
16215 }
16216
16217 // If the address is a constant pointer increment itself, find
16218 // another constant increment that has the same base operand
16219 SDValue Base;
16220 SDValue CInc;
16221 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
16222 unsigned Offset =
16223 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16224 for (SDUse &Use : Base->uses()) {
16225
16226 SDNode *User = Use.getUser();
16227 if (Use.getResNo() != Base.getResNo() || User == Addr.getNode() ||
16228 User->getNumOperands() != 2)
16229 continue;
16230
16231 SDValue UserInc = User->getOperand(Use.getOperandNo() == 0 ? 1 : 0);
16232 unsigned UserOffset =
16233 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16234
16235 if (!UserOffset || UserOffset <= Offset)
16236 continue;
16237
16238 unsigned NewConstInc = UserOffset - Offset;
16239 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16240 BaseUpdates.push_back({User, NewInc, NewConstInc});
16241 if (BaseUpdates.size() >= MaxBaseUpdates)
16242 break;
16243 }
16244 }
16245
16246 // Try to fold the load/store with an update that matches memory
16247 // access size. This should work well for sequential loads.
16248 unsigned NumValidUpd = BaseUpdates.size();
16249 for (unsigned I = 0; I < NumValidUpd; I++) {
16250 BaseUpdateUser &User = BaseUpdates[I];
16251 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16252 return SDValue();
16253 }
16254
16255 // Try to fold with other users. Non-constant updates are considered
16256 // first, and constant updates are sorted to not break a sequence of
16257 // strided accesses (if there is any).
16258 llvm::stable_sort(BaseUpdates,
16259 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16260 return LHS.ConstInc < RHS.ConstInc;
16261 });
16262 for (BaseUpdateUser &User : BaseUpdates) {
16263 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16264 return SDValue();
16265 }
16266 return SDValue();
16267}
16268
16271 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16272 return SDValue();
16273
16274 return CombineBaseUpdate(N, DCI);
16275}
16276
16279 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16280 return SDValue();
16281
16282 SelectionDAG &DAG = DCI.DAG;
16283 SDValue Addr = N->getOperand(2);
16284 MemSDNode *MemN = cast<MemSDNode>(N);
16285 SDLoc dl(N);
16286
16287 // For the stores, where there are multiple intrinsics we only actually want
16288 // to post-inc the last of the them.
16289 unsigned IntNo = N->getConstantOperandVal(1);
16290 if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16291 return SDValue();
16292 if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16293 return SDValue();
16294
16295 // Search for a use of the address operand that is an increment.
16296 for (SDUse &Use : Addr->uses()) {
16297 SDNode *User = Use.getUser();
16298 if (User->getOpcode() != ISD::ADD || Use.getResNo() != Addr.getResNo())
16299 continue;
16300
16301 // Check that the add is independent of the load/store. Otherwise, folding
16302 // it would create a cycle. We can avoid searching through Addr as it's a
16303 // predecessor to both.
16306 Visited.insert(Addr.getNode());
16307 Worklist.push_back(N);
16308 Worklist.push_back(User);
16309 const unsigned MaxSteps = 1024;
16310 if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
16311 SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
16312 continue;
16313
16314 // Find the new opcode for the updating load/store.
16315 bool isLoadOp = true;
16316 unsigned NewOpc = 0;
16317 unsigned NumVecs = 0;
16318 switch (IntNo) {
16319 default:
16320 llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
16321 case Intrinsic::arm_mve_vld2q:
16322 NewOpc = ARMISD::VLD2_UPD;
16323 NumVecs = 2;
16324 break;
16325 case Intrinsic::arm_mve_vld4q:
16326 NewOpc = ARMISD::VLD4_UPD;
16327 NumVecs = 4;
16328 break;
16329 case Intrinsic::arm_mve_vst2q:
16330 NewOpc = ARMISD::VST2_UPD;
16331 NumVecs = 2;
16332 isLoadOp = false;
16333 break;
16334 case Intrinsic::arm_mve_vst4q:
16335 NewOpc = ARMISD::VST4_UPD;
16336 NumVecs = 4;
16337 isLoadOp = false;
16338 break;
16339 }
16340
16341 // Find the size of memory referenced by the load/store.
16342 EVT VecTy;
16343 if (isLoadOp) {
16344 VecTy = N->getValueType(0);
16345 } else {
16346 VecTy = N->getOperand(3).getValueType();
16347 }
16348
16349 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16350
16351 // If the increment is a constant, it must match the memory ref size.
16352 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16354 if (!CInc || CInc->getZExtValue() != NumBytes)
16355 continue;
16356
16357 // Create the new updating load/store node.
16358 // First, create an SDVTList for the new updating node's results.
16359 EVT Tys[6];
16360 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16361 unsigned n;
16362 for (n = 0; n < NumResultVecs; ++n)
16363 Tys[n] = VecTy;
16364 Tys[n++] = MVT::i32;
16365 Tys[n] = MVT::Other;
16366 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16367
16368 // Then, gather the new node's operands.
16370 Ops.push_back(N->getOperand(0)); // incoming chain
16371 Ops.push_back(N->getOperand(2)); // ptr
16372 Ops.push_back(Inc);
16373
16374 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16375 Ops.push_back(N->getOperand(i));
16376
16377 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16378 MemN->getMemOperand());
16379
16380 // Update the uses.
16381 SmallVector<SDValue, 5> NewResults;
16382 for (unsigned i = 0; i < NumResultVecs; ++i)
16383 NewResults.push_back(SDValue(UpdN.getNode(), i));
16384
16385 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16386 DCI.CombineTo(N, NewResults);
16387 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16388
16389 break;
16390 }
16391
16392 return SDValue();
16393}
16394
16395/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16396/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16397/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16398/// return true.
16400 SelectionDAG &DAG = DCI.DAG;
16401 EVT VT = N->getValueType(0);
16402 // vldN-dup instructions only support 64-bit vectors for N > 1.
16403 if (!VT.is64BitVector())
16404 return false;
16405
16406 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16407 SDNode *VLD = N->getOperand(0).getNode();
16408 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16409 return false;
16410 unsigned NumVecs = 0;
16411 unsigned NewOpc = 0;
16412 unsigned IntNo = VLD->getConstantOperandVal(1);
16413 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16414 NumVecs = 2;
16415 NewOpc = ARMISD::VLD2DUP;
16416 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16417 NumVecs = 3;
16418 NewOpc = ARMISD::VLD3DUP;
16419 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16420 NumVecs = 4;
16421 NewOpc = ARMISD::VLD4DUP;
16422 } else {
16423 return false;
16424 }
16425
16426 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16427 // numbers match the load.
16428 unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16429 for (SDUse &Use : VLD->uses()) {
16430 // Ignore uses of the chain result.
16431 if (Use.getResNo() == NumVecs)
16432 continue;
16433 SDNode *User = Use.getUser();
16434 if (User->getOpcode() != ARMISD::VDUPLANE ||
16435 VLDLaneNo != User->getConstantOperandVal(1))
16436 return false;
16437 }
16438
16439 // Create the vldN-dup node.
16440 EVT Tys[5];
16441 unsigned n;
16442 for (n = 0; n < NumVecs; ++n)
16443 Tys[n] = VT;
16444 Tys[n] = MVT::Other;
16445 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16446 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16448 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16449 Ops, VLDMemInt->getMemoryVT(),
16450 VLDMemInt->getMemOperand());
16451
16452 // Update the uses.
16453 for (SDUse &Use : VLD->uses()) {
16454 unsigned ResNo = Use.getResNo();
16455 // Ignore uses of the chain result.
16456 if (ResNo == NumVecs)
16457 continue;
16458 DCI.CombineTo(Use.getUser(), SDValue(VLDDup.getNode(), ResNo));
16459 }
16460
16461 // Now the vldN-lane intrinsic is dead except for its chain result.
16462 // Update uses of the chain.
16463 std::vector<SDValue> VLDDupResults;
16464 for (unsigned n = 0; n < NumVecs; ++n)
16465 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16466 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16467 DCI.CombineTo(VLD, VLDDupResults);
16468
16469 return true;
16470}
16471
16472/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16473/// ARMISD::VDUPLANE.
16476 const ARMSubtarget *Subtarget) {
16477 SDValue Op = N->getOperand(0);
16478 EVT VT = N->getValueType(0);
16479
16480 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16481 if (Subtarget->hasMVEIntegerOps()) {
16482 EVT ExtractVT = VT.getVectorElementType();
16483 // We need to ensure we are creating a legal type.
16484 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16485 ExtractVT = MVT::i32;
16486 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16487 N->getOperand(0), N->getOperand(1));
16488 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16489 }
16490
16491 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16492 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16493 if (CombineVLDDUP(N, DCI))
16494 return SDValue(N, 0);
16495
16496 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16497 // redundant. Ignore bit_converts for now; element sizes are checked below.
16498 while (Op.getOpcode() == ISD::BITCAST)
16499 Op = Op.getOperand(0);
16500 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16501 return SDValue();
16502
16503 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16504 unsigned EltSize = Op.getScalarValueSizeInBits();
16505 // The canonical VMOV for a zero vector uses a 32-bit element size.
16506 unsigned Imm = Op.getConstantOperandVal(0);
16507 unsigned EltBits;
16508 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16509 EltSize = 8;
16510 if (EltSize > VT.getScalarSizeInBits())
16511 return SDValue();
16512
16513 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16514}
16515
16516/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16518 const ARMSubtarget *Subtarget) {
16519 SDValue Op = N->getOperand(0);
16520 SDLoc dl(N);
16521
16522 if (Subtarget->hasMVEIntegerOps()) {
16523 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16524 // need to come from a GPR.
16525 if (Op.getValueType() == MVT::f32)
16526 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16527 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16528 else if (Op.getValueType() == MVT::f16)
16529 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16530 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16531 }
16532
16533 if (!Subtarget->hasNEON())
16534 return SDValue();
16535
16536 // Match VDUP(LOAD) -> VLD1DUP.
16537 // We match this pattern here rather than waiting for isel because the
16538 // transform is only legal for unindexed loads.
16539 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16540 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16541 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16542 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16543 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16544 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16545 SDValue VLDDup =
16547 LD->getMemoryVT(), LD->getMemOperand());
16548 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16549 return VLDDup;
16550 }
16551
16552 return SDValue();
16553}
16554
16557 const ARMSubtarget *Subtarget) {
16558 EVT VT = N->getValueType(0);
16559
16560 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16561 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16563 return CombineBaseUpdate(N, DCI);
16564
16565 return SDValue();
16566}
16567
16568// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16569// pack all of the elements in one place. Next, store to memory in fewer
16570// chunks.
16572 SelectionDAG &DAG) {
16573 SDValue StVal = St->getValue();
16574 EVT VT = StVal.getValueType();
16575 if (!St->isTruncatingStore() || !VT.isVector())
16576 return SDValue();
16577 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16578 EVT StVT = St->getMemoryVT();
16579 unsigned NumElems = VT.getVectorNumElements();
16580 assert(StVT != VT && "Cannot truncate to the same type");
16581 unsigned FromEltSz = VT.getScalarSizeInBits();
16582 unsigned ToEltSz = StVT.getScalarSizeInBits();
16583
16584 // From, To sizes and ElemCount must be pow of two
16585 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16586 return SDValue();
16587
16588 // We are going to use the original vector elt for storing.
16589 // Accumulated smaller vector elements must be a multiple of the store size.
16590 if (0 != (NumElems * FromEltSz) % ToEltSz)
16591 return SDValue();
16592
16593 unsigned SizeRatio = FromEltSz / ToEltSz;
16594 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
16595
16596 // Create a type on which we perform the shuffle.
16597 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16598 NumElems * SizeRatio);
16599 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
16600
16601 SDLoc DL(St);
16602 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16603 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16604 for (unsigned i = 0; i < NumElems; ++i)
16605 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16606 : i * SizeRatio;
16607
16608 // Can't shuffle using an illegal type.
16609 if (!TLI.isTypeLegal(WideVecVT))
16610 return SDValue();
16611
16612 SDValue Shuff = DAG.getVectorShuffle(
16613 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16614 // At this point all of the data is stored at the bottom of the
16615 // register. We now need to save it to mem.
16616
16617 // Find the largest store unit
16618 MVT StoreType = MVT::i8;
16619 for (MVT Tp : MVT::integer_valuetypes()) {
16620 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16621 StoreType = Tp;
16622 }
16623 // Didn't find a legal store type.
16624 if (!TLI.isTypeLegal(StoreType))
16625 return SDValue();
16626
16627 // Bitcast the original vector into a vector of store-size units
16628 EVT StoreVecVT =
16629 EVT::getVectorVT(*DAG.getContext(), StoreType,
16630 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16631 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
16632 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16634 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16635 TLI.getPointerTy(DAG.getDataLayout()));
16636 SDValue BasePtr = St->getBasePtr();
16637
16638 // Perform one or more big stores into memory.
16639 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16640 for (unsigned I = 0; I < E; I++) {
16641 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16642 ShuffWide, DAG.getIntPtrConstant(I, DL));
16643 SDValue Ch =
16644 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16645 St->getAlign(), St->getMemOperand()->getFlags());
16646 BasePtr =
16647 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16648 Chains.push_back(Ch);
16649 }
16650 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16651}
16652
16653// Try taking a single vector store from an fpround (which would otherwise turn
16654// into an expensive buildvector) and splitting it into a series of narrowing
16655// stores.
16657 SelectionDAG &DAG) {
16658 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16659 return SDValue();
16660 SDValue Trunc = St->getValue();
16661 if (Trunc->getOpcode() != ISD::FP_ROUND)
16662 return SDValue();
16663 EVT FromVT = Trunc->getOperand(0).getValueType();
16664 EVT ToVT = Trunc.getValueType();
16665 if (!ToVT.isVector())
16666 return SDValue();
16668 EVT ToEltVT = ToVT.getVectorElementType();
16669 EVT FromEltVT = FromVT.getVectorElementType();
16670
16671 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16672 return SDValue();
16673
16674 unsigned NumElements = 4;
16675 if (FromVT.getVectorNumElements() % NumElements != 0)
16676 return SDValue();
16677
16678 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16679 // use the VMOVN over splitting the store. We are looking for patterns of:
16680 // !rev: 0 N 1 N+1 2 N+2 ...
16681 // rev: N 0 N+1 1 N+2 2 ...
16682 // The shuffle may either be a single source (in which case N = NumElts/2) or
16683 // two inputs extended with concat to the same size (in which case N =
16684 // NumElts).
16685 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16686 ArrayRef<int> M = SVN->getMask();
16687 unsigned NumElts = ToVT.getVectorNumElements();
16688 if (SVN->getOperand(1).isUndef())
16689 NumElts /= 2;
16690
16691 unsigned Off0 = Rev ? NumElts : 0;
16692 unsigned Off1 = Rev ? 0 : NumElts;
16693
16694 for (unsigned I = 0; I < NumElts; I += 2) {
16695 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16696 return false;
16697 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16698 return false;
16699 }
16700
16701 return true;
16702 };
16703
16704 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16705 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16706 return SDValue();
16707
16708 LLVMContext &C = *DAG.getContext();
16709 SDLoc DL(St);
16710 // Details about the old store
16711 SDValue Ch = St->getChain();
16712 SDValue BasePtr = St->getBasePtr();
16713 Align Alignment = St->getBaseAlign();
16715 AAMDNodes AAInfo = St->getAAInfo();
16716
16717 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16718 // and then stored as truncating integer stores.
16719 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16720 EVT NewToVT = EVT::getVectorVT(
16721 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16722
16724 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16725 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16726 SDValue NewPtr =
16727 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16728
16729 SDValue Extract =
16730 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16731 DAG.getConstant(i * NumElements, DL, MVT::i32));
16732
16733 SDValue FPTrunc =
16734 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16735 Extract, DAG.getConstant(0, DL, MVT::i32));
16736 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16737
16738 SDValue Store = DAG.getTruncStore(
16739 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16740 NewToVT, Alignment, MMOFlags, AAInfo);
16741 Stores.push_back(Store);
16742 }
16743 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16744}
16745
16746// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16747// into an expensive buildvector) and splitting it into a series of narrowing
16748// stores.
16750 SelectionDAG &DAG) {
16751 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16752 return SDValue();
16753 SDValue Trunc = St->getValue();
16754 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16755 return SDValue();
16756 EVT FromVT = Trunc->getOperand(0).getValueType();
16757 EVT ToVT = Trunc.getValueType();
16758
16759 LLVMContext &C = *DAG.getContext();
16760 SDLoc DL(St);
16761 // Details about the old store
16762 SDValue Ch = St->getChain();
16763 SDValue BasePtr = St->getBasePtr();
16764 Align Alignment = St->getBaseAlign();
16766 AAMDNodes AAInfo = St->getAAInfo();
16767
16768 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16769 FromVT.getVectorNumElements());
16770
16772 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16773 unsigned NewOffset =
16774 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16775 SDValue NewPtr =
16776 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16777
16778 SDValue Extract = Trunc.getOperand(i);
16779 SDValue Store = DAG.getTruncStore(
16780 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16781 NewToVT, Alignment, MMOFlags, AAInfo);
16782 Stores.push_back(Store);
16783 }
16784 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16785}
16786
16787// Given a floating point store from an extracted vector, with an integer
16788// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16789// help reduce fp register pressure, doesn't require the fp extract and allows
16790// use of more integer post-inc stores not available with vstr.
16792 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16793 return SDValue();
16794 SDValue Extract = St->getValue();
16795 EVT VT = Extract.getValueType();
16796 // For now only uses f16. This may be useful for f32 too, but that will
16797 // be bitcast(extract), not the VGETLANEu we currently check here.
16798 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16799 return SDValue();
16800
16801 SDNode *GetLane =
16802 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16803 {Extract.getOperand(0), Extract.getOperand(1)});
16804 if (!GetLane)
16805 return SDValue();
16806
16807 LLVMContext &C = *DAG.getContext();
16808 SDLoc DL(St);
16809 // Create a new integer store to replace the existing floating point version.
16810 SDValue Ch = St->getChain();
16811 SDValue BasePtr = St->getBasePtr();
16812 Align Alignment = St->getBaseAlign();
16814 AAMDNodes AAInfo = St->getAAInfo();
16815 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16816 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16817 St->getPointerInfo(), NewToVT, Alignment,
16818 MMOFlags, AAInfo);
16819
16820 return Store;
16821}
16822
16823/// PerformSTORECombine - Target-specific dag combine xforms for
16824/// ISD::STORE.
16827 const ARMSubtarget *Subtarget) {
16829 if (St->isVolatile())
16830 return SDValue();
16831 SDValue StVal = St->getValue();
16832 EVT VT = StVal.getValueType();
16833
16834 if (Subtarget->hasNEON())
16835 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16836 return Store;
16837
16838 if (Subtarget->hasMVEFloatOps())
16839 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16840 return NewToken;
16841
16842 if (Subtarget->hasMVEIntegerOps()) {
16843 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16844 return NewChain;
16845 if (SDValue NewToken =
16847 return NewToken;
16848 }
16849
16850 if (!ISD::isNormalStore(St))
16851 return SDValue();
16852
16853 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16854 // ARM stores of arguments in the same cache line.
16855 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16856 StVal.getNode()->hasOneUse()) {
16857 SelectionDAG &DAG = DCI.DAG;
16858 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16859 SDLoc DL(St);
16860 SDValue BasePtr = St->getBasePtr();
16861 SDValue NewST1 = DAG.getStore(
16862 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16863 BasePtr, St->getPointerInfo(), St->getBaseAlign(),
16864 St->getMemOperand()->getFlags());
16865
16866 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16867 DAG.getConstant(4, DL, MVT::i32));
16868 return DAG.getStore(NewST1.getValue(0), DL,
16869 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16870 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16871 St->getBaseAlign(), St->getMemOperand()->getFlags());
16872 }
16873
16874 if (StVal.getValueType() == MVT::i64 &&
16876
16877 // Bitcast an i64 store extracted from a vector to f64.
16878 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16879 SelectionDAG &DAG = DCI.DAG;
16880 SDLoc dl(StVal);
16881 SDValue IntVec = StVal.getOperand(0);
16882 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16884 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16885 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16886 Vec, StVal.getOperand(1));
16887 dl = SDLoc(N);
16888 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16889 // Make the DAGCombiner fold the bitcasts.
16890 DCI.AddToWorklist(Vec.getNode());
16891 DCI.AddToWorklist(ExtElt.getNode());
16892 DCI.AddToWorklist(V.getNode());
16893 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16894 St->getPointerInfo(), St->getAlign(),
16895 St->getMemOperand()->getFlags(), St->getAAInfo());
16896 }
16897
16898 // If this is a legal vector store, try to combine it into a VST1_UPD.
16899 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16901 return CombineBaseUpdate(N, DCI);
16902
16903 return SDValue();
16904}
16905
16906/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16907/// can replace combinations of VMUL and VCVT (floating-point to integer)
16908/// when the VMUL has a constant operand that is a power of 2.
16909///
16910/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16911/// vmul.f32 d16, d17, d16
16912/// vcvt.s32.f32 d16, d16
16913/// becomes:
16914/// vcvt.s32.f32 d16, d16, #3
16916 const ARMSubtarget *Subtarget) {
16917 if (!Subtarget->hasNEON())
16918 return SDValue();
16919
16920 SDValue Op = N->getOperand(0);
16921 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16922 Op.getOpcode() != ISD::FMUL)
16923 return SDValue();
16924
16925 SDValue ConstVec = Op->getOperand(1);
16926 if (!isa<BuildVectorSDNode>(ConstVec))
16927 return SDValue();
16928
16929 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16930 uint32_t FloatBits = FloatTy.getSizeInBits();
16931 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16932 uint32_t IntBits = IntTy.getSizeInBits();
16933 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16934 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16935 // These instructions only exist converting from f32 to i32. We can handle
16936 // smaller integers by generating an extra truncate, but larger ones would
16937 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16938 // these intructions only support v2i32/v4i32 types.
16939 return SDValue();
16940 }
16941
16942 BitVector UndefElements;
16944 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16945 if (C == -1 || C == 0 || C > 32)
16946 return SDValue();
16947
16948 SDLoc dl(N);
16949 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16950 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16951 Intrinsic::arm_neon_vcvtfp2fxu;
16952 SDValue FixConv = DAG.getNode(
16953 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16954 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16955 DAG.getConstant(C, dl, MVT::i32));
16956
16957 if (IntBits < FloatBits)
16958 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16959
16960 return FixConv;
16961}
16962
16964 const ARMSubtarget *Subtarget) {
16965 if (!Subtarget->hasMVEFloatOps())
16966 return SDValue();
16967
16968 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16969 // The second form can be more easily turned into a predicated vadd, and
16970 // possibly combined into a fma to become a predicated vfma.
16971 SDValue Op0 = N->getOperand(0);
16972 SDValue Op1 = N->getOperand(1);
16973 EVT VT = N->getValueType(0);
16974 SDLoc DL(N);
16975
16976 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16977 // which these VMOV's represent.
16978 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
16979 if (Op.getOpcode() != ISD::BITCAST ||
16980 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
16981 return false;
16982 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
16983 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16984 return true;
16985 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16986 return true;
16987 return false;
16988 };
16989
16990 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
16991 std::swap(Op0, Op1);
16992
16993 if (Op1.getOpcode() != ISD::VSELECT)
16994 return SDValue();
16995
16996 SDNodeFlags FaddFlags = N->getFlags();
16997 bool NSZ = FaddFlags.hasNoSignedZeros();
16998 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
16999 return SDValue();
17000
17001 SDValue FAdd =
17002 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
17003 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
17004}
17005
17007 SDValue LHS = N->getOperand(0);
17008 SDValue RHS = N->getOperand(1);
17009 EVT VT = N->getValueType(0);
17010 SDLoc DL(N);
17011
17012 if (!N->getFlags().hasAllowReassociation())
17013 return SDValue();
17014
17015 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
17016 auto ReassocComplex = [&](SDValue A, SDValue B) {
17017 if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
17018 return SDValue();
17019 unsigned Opc = A.getConstantOperandVal(0);
17020 if (Opc != Intrinsic::arm_mve_vcmlaq)
17021 return SDValue();
17022 SDValue VCMLA = DAG.getNode(
17023 ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
17024 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
17025 A.getOperand(3), A.getOperand(4));
17026 VCMLA->setFlags(A->getFlags());
17027 return VCMLA;
17028 };
17029 if (SDValue R = ReassocComplex(LHS, RHS))
17030 return R;
17031 if (SDValue R = ReassocComplex(RHS, LHS))
17032 return R;
17033
17034 return SDValue();
17035}
17036
17038 const ARMSubtarget *Subtarget) {
17039 if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
17040 return S;
17041 if (SDValue S = PerformFADDVCMLACombine(N, DAG))
17042 return S;
17043 return SDValue();
17044}
17045
17046/// PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
17047/// can replace combinations of VCVT (integer to floating-point) and VMUL
17048/// when the VMUL has a constant operand that is a power of 2.
17049///
17050/// Example (assume d17 = <float 0.125, float 0.125>):
17051/// vcvt.f32.s32 d16, d16
17052/// vmul.f32 d16, d16, d17
17053/// becomes:
17054/// vcvt.f32.s32 d16, d16, #3
17056 const ARMSubtarget *Subtarget) {
17057 if (!Subtarget->hasNEON())
17058 return SDValue();
17059
17060 SDValue Op = N->getOperand(0);
17061 unsigned OpOpcode = Op.getNode()->getOpcode();
17062 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
17063 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
17064 return SDValue();
17065
17066 SDValue ConstVec = N->getOperand(1);
17067 if (!isa<BuildVectorSDNode>(ConstVec))
17068 return SDValue();
17069
17070 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
17071 uint32_t FloatBits = FloatTy.getSizeInBits();
17072 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
17073 uint32_t IntBits = IntTy.getSizeInBits();
17074 unsigned NumLanes = Op.getValueType().getVectorNumElements();
17075 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
17076 // These instructions only exist converting from i32 to f32. We can handle
17077 // smaller integers by generating an extra extend, but larger ones would
17078 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
17079 // these intructions only support v2i32/v4i32 types.
17080 return SDValue();
17081 }
17082
17083 ConstantFPSDNode *CN = isConstOrConstSplatFP(ConstVec, true);
17084 APFloat Recip(0.0f);
17085 if (!CN || !CN->getValueAPF().getExactInverse(&Recip))
17086 return SDValue();
17087
17088 bool IsExact;
17089 APSInt IntVal(33);
17090 if (Recip.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
17091 APFloat::opOK ||
17092 !IsExact)
17093 return SDValue();
17094
17095 int32_t C = IntVal.exactLogBase2();
17096 if (C == -1 || C == 0 || C > 32)
17097 return SDValue();
17098
17099 SDLoc DL(N);
17100 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
17101 SDValue ConvInput = Op.getOperand(0);
17102 if (IntBits < FloatBits)
17104 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput);
17105
17106 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp
17107 : Intrinsic::arm_neon_vcvtfxu2fp;
17108 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
17109 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
17110 DAG.getConstant(C, DL, MVT::i32));
17111}
17112
17114 const ARMSubtarget *ST) {
17115 if (!ST->hasMVEIntegerOps())
17116 return SDValue();
17117
17118 assert(N->getOpcode() == ISD::VECREDUCE_ADD);
17119 EVT ResVT = N->getValueType(0);
17120 SDValue N0 = N->getOperand(0);
17121 SDLoc dl(N);
17122
17123 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
17124 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
17125 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
17126 N0.getValueType() == MVT::v16i8)) {
17127 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
17128 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
17129 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
17130 }
17131
17132 // We are looking for something that will have illegal types if left alone,
17133 // but that we can convert to a single instruction under MVE. For example
17134 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
17135 // or
17136 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
17137
17138 // The legal cases are:
17139 // VADDV u/s 8/16/32
17140 // VMLAV u/s 8/16/32
17141 // VADDLV u/s 32
17142 // VMLALV u/s 16/32
17143
17144 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
17145 // extend it and use v4i32 instead.
17146 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
17147 EVT AVT = A.getValueType();
17148 return any_of(ExtTypes, [&](MVT Ty) {
17149 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
17150 AVT.bitsLE(Ty);
17151 });
17152 };
17153 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
17154 EVT AVT = A.getValueType();
17155 if (!AVT.is128BitVector())
17156 A = DAG.getNode(ExtendCode, dl,
17158 128 / AVT.getVectorMinNumElements())),
17159 A);
17160 return A;
17161 };
17162 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
17163 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17164 return SDValue();
17165 SDValue A = N0->getOperand(0);
17166 if (ExtTypeMatches(A, ExtTypes))
17167 return ExtendIfNeeded(A, ExtendCode);
17168 return SDValue();
17169 };
17170 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
17171 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
17172 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17174 return SDValue();
17175 Mask = N0->getOperand(0);
17176 SDValue Ext = N0->getOperand(1);
17177 if (Ext->getOpcode() != ExtendCode)
17178 return SDValue();
17179 SDValue A = Ext->getOperand(0);
17180 if (ExtTypeMatches(A, ExtTypes))
17181 return ExtendIfNeeded(A, ExtendCode);
17182 return SDValue();
17183 };
17184 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17185 SDValue &A, SDValue &B) {
17186 // For a vmla we are trying to match a larger pattern:
17187 // ExtA = sext/zext A
17188 // ExtB = sext/zext B
17189 // Mul = mul ExtA, ExtB
17190 // vecreduce.add Mul
17191 // There might also be en extra extend between the mul and the addreduce, so
17192 // long as the bitwidth is high enough to make them equivalent (for example
17193 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
17194 if (ResVT != RetTy)
17195 return false;
17196 SDValue Mul = N0;
17197 if (Mul->getOpcode() == ExtendCode &&
17198 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17199 ResVT.getScalarSizeInBits())
17200 Mul = Mul->getOperand(0);
17201 if (Mul->getOpcode() != ISD::MUL)
17202 return false;
17203 SDValue ExtA = Mul->getOperand(0);
17204 SDValue ExtB = Mul->getOperand(1);
17205 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17206 return false;
17207 A = ExtA->getOperand(0);
17208 B = ExtB->getOperand(0);
17209 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17210 A = ExtendIfNeeded(A, ExtendCode);
17211 B = ExtendIfNeeded(B, ExtendCode);
17212 return true;
17213 }
17214 return false;
17215 };
17216 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17217 SDValue &A, SDValue &B, SDValue &Mask) {
17218 // Same as the pattern above with a select for the zero predicated lanes
17219 // ExtA = sext/zext A
17220 // ExtB = sext/zext B
17221 // Mul = mul ExtA, ExtB
17222 // N0 = select Mask, Mul, 0
17223 // vecreduce.add N0
17224 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17226 return false;
17227 Mask = N0->getOperand(0);
17228 SDValue Mul = N0->getOperand(1);
17229 if (Mul->getOpcode() == ExtendCode &&
17230 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17231 ResVT.getScalarSizeInBits())
17232 Mul = Mul->getOperand(0);
17233 if (Mul->getOpcode() != ISD::MUL)
17234 return false;
17235 SDValue ExtA = Mul->getOperand(0);
17236 SDValue ExtB = Mul->getOperand(1);
17237 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17238 return false;
17239 A = ExtA->getOperand(0);
17240 B = ExtB->getOperand(0);
17241 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17242 A = ExtendIfNeeded(A, ExtendCode);
17243 B = ExtendIfNeeded(B, ExtendCode);
17244 return true;
17245 }
17246 return false;
17247 };
17248 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17249 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17250 // reductions. The operands are extended with MVEEXT, but as they are
17251 // reductions the lane orders do not matter. MVEEXT may be combined with
17252 // loads to produce two extending loads, or else they will be expanded to
17253 // VREV/VMOVL.
17254 EVT VT = Ops[0].getValueType();
17255 if (VT == MVT::v16i8) {
17256 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17257 "Unexpected illegal long reduction opcode");
17258 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17259
17260 SDValue Ext0 =
17261 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17262 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17263 SDValue Ext1 =
17264 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17265 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17266
17267 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17268 Ext0, Ext1);
17269 SDValue MLA1 =
17270 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17271 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17272 Ext0.getValue(1), Ext1.getValue(1));
17273 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17274 }
17275 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17276 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17277 SDValue(Node.getNode(), 1));
17278 };
17279
17280 SDValue A, B;
17281 SDValue Mask;
17282 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17283 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17284 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17285 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17286 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17287 A, B))
17288 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17289 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17290 A, B))
17291 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17292 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17293 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17294 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17295 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17296 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17297 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17298
17299 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17300 Mask))
17301 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17302 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17303 Mask))
17304 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17305 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17306 Mask))
17307 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17308 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17309 Mask))
17310 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17311 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17312 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17313 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17314 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17315 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17316 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17317
17318 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17319 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17320 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17321 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17322 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17323 return Create64bitNode(ARMISD::VADDLVs, {A});
17324 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17325 return Create64bitNode(ARMISD::VADDLVu, {A});
17326 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17327 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17328 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17329 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17330 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17331 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17332
17333 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17334 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17335 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17336 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17337 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17338 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17339 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17340 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17341 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17342 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17343 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17344 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17345 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17346 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17347
17348 // Some complications. We can get a case where the two inputs of the mul are
17349 // the same, then the output sext will have been helpfully converted to a
17350 // zext. Turn it back.
17351 SDValue Op = N0;
17352 if (Op->getOpcode() == ISD::VSELECT)
17353 Op = Op->getOperand(1);
17354 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17355 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17356 SDValue Mul = Op->getOperand(0);
17357 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17358 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17359 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17360 if (Op != N0)
17361 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17362 N0->getOperand(0), Ext, N0->getOperand(2));
17363 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17364 }
17365 }
17366
17367 return SDValue();
17368}
17369
17370// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17371// the lanes are used. Due to the reduction being commutative the shuffle can be
17372// removed.
17374 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17375 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17376 if (!Shuf || !Shuf->getOperand(1).isUndef())
17377 return SDValue();
17378
17379 // Check all elements are used once in the mask.
17380 ArrayRef<int> Mask = Shuf->getMask();
17381 APInt SetElts(Mask.size(), 0);
17382 for (int E : Mask) {
17383 if (E < 0 || E >= (int)Mask.size())
17384 return SDValue();
17385 SetElts.setBit(E);
17386 }
17387 if (!SetElts.isAllOnes())
17388 return SDValue();
17389
17390 if (N->getNumOperands() != VecOp + 1) {
17391 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17392 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17393 return SDValue();
17394 }
17395
17397 for (SDValue Op : N->ops()) {
17398 if (Op.getValueType().isVector())
17399 Ops.push_back(Op.getOperand(0));
17400 else
17401 Ops.push_back(Op);
17402 }
17403 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17404}
17405
17408 SDValue Op0 = N->getOperand(0);
17409 SDValue Op1 = N->getOperand(1);
17410 unsigned IsTop = N->getConstantOperandVal(2);
17411
17412 // VMOVNT a undef -> a
17413 // VMOVNB a undef -> a
17414 // VMOVNB undef a -> a
17415 if (Op1->isUndef())
17416 return Op0;
17417 if (Op0->isUndef() && !IsTop)
17418 return Op1;
17419
17420 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17421 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17422 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17423 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17424 Op1->getConstantOperandVal(2) == 0)
17425 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17426 Op0, Op1->getOperand(1), N->getOperand(2));
17427
17428 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17429 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17430 // into the top or bottom lanes.
17431 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17432 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17433 APInt Op0DemandedElts =
17434 IsTop ? Op1DemandedElts
17435 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17436
17437 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17438 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17439 return SDValue(N, 0);
17440 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17441 return SDValue(N, 0);
17442
17443 return SDValue();
17444}
17445
17448 SDValue Op0 = N->getOperand(0);
17449 unsigned IsTop = N->getConstantOperandVal(2);
17450
17451 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17452 APInt Op0DemandedElts =
17453 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17454 : APInt::getHighBitsSet(2, 1));
17455
17456 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17457 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17458 return SDValue(N, 0);
17459 return SDValue();
17460}
17461
17464 EVT VT = N->getValueType(0);
17465 SDValue LHS = N->getOperand(0);
17466 SDValue RHS = N->getOperand(1);
17467
17468 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17469 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17470 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17471 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17472 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17473 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17474 SDLoc DL(N);
17475 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17476 LHS.getOperand(0), RHS.getOperand(0));
17477 SDValue UndefV = LHS.getOperand(1);
17478 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17479 }
17480 return SDValue();
17481}
17482
17484 SDLoc DL(N);
17485 SDValue Op0 = N->getOperand(0);
17486 SDValue Op1 = N->getOperand(1);
17487
17488 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17489 // uses of the intrinsics.
17490 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17491 int ShiftAmt = C->getSExtValue();
17492 if (ShiftAmt == 0) {
17493 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17494 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17495 return SDValue();
17496 }
17497
17498 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17499 unsigned NewOpcode =
17500 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17501 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17502 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17503 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17504 return NewShift;
17505 }
17506 }
17507
17508 return SDValue();
17509}
17510
17511/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17513 DAGCombinerInfo &DCI) const {
17514 SelectionDAG &DAG = DCI.DAG;
17515 unsigned IntNo = N->getConstantOperandVal(0);
17516 switch (IntNo) {
17517 default:
17518 // Don't do anything for most intrinsics.
17519 break;
17520
17521 // Vector shifts: check for immediate versions and lower them.
17522 // Note: This is done during DAG combining instead of DAG legalizing because
17523 // the build_vectors for 64-bit vector element shift counts are generally
17524 // not legal, and it is hard to see their values after they get legalized to
17525 // loads from a constant pool.
17526 case Intrinsic::arm_neon_vshifts:
17527 case Intrinsic::arm_neon_vshiftu:
17528 case Intrinsic::arm_neon_vrshifts:
17529 case Intrinsic::arm_neon_vrshiftu:
17530 case Intrinsic::arm_neon_vrshiftn:
17531 case Intrinsic::arm_neon_vqshifts:
17532 case Intrinsic::arm_neon_vqshiftu:
17533 case Intrinsic::arm_neon_vqshiftsu:
17534 case Intrinsic::arm_neon_vqshiftns:
17535 case Intrinsic::arm_neon_vqshiftnu:
17536 case Intrinsic::arm_neon_vqshiftnsu:
17537 case Intrinsic::arm_neon_vqrshiftns:
17538 case Intrinsic::arm_neon_vqrshiftnu:
17539 case Intrinsic::arm_neon_vqrshiftnsu: {
17540 EVT VT = N->getOperand(1).getValueType();
17541 int64_t Cnt;
17542 unsigned VShiftOpc = 0;
17543
17544 switch (IntNo) {
17545 case Intrinsic::arm_neon_vshifts:
17546 case Intrinsic::arm_neon_vshiftu:
17547 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17548 VShiftOpc = ARMISD::VSHLIMM;
17549 break;
17550 }
17551 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17552 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17554 break;
17555 }
17556 return SDValue();
17557
17558 case Intrinsic::arm_neon_vrshifts:
17559 case Intrinsic::arm_neon_vrshiftu:
17560 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17561 break;
17562 return SDValue();
17563
17564 case Intrinsic::arm_neon_vqshifts:
17565 case Intrinsic::arm_neon_vqshiftu:
17566 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17567 break;
17568 return SDValue();
17569
17570 case Intrinsic::arm_neon_vqshiftsu:
17571 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17572 break;
17573 llvm_unreachable("invalid shift count for vqshlu intrinsic");
17574
17575 case Intrinsic::arm_neon_vrshiftn:
17576 case Intrinsic::arm_neon_vqshiftns:
17577 case Intrinsic::arm_neon_vqshiftnu:
17578 case Intrinsic::arm_neon_vqshiftnsu:
17579 case Intrinsic::arm_neon_vqrshiftns:
17580 case Intrinsic::arm_neon_vqrshiftnu:
17581 case Intrinsic::arm_neon_vqrshiftnsu:
17582 // Narrowing shifts require an immediate right shift.
17583 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17584 break;
17585 llvm_unreachable("invalid shift count for narrowing vector shift "
17586 "intrinsic");
17587
17588 default:
17589 llvm_unreachable("unhandled vector shift");
17590 }
17591
17592 switch (IntNo) {
17593 case Intrinsic::arm_neon_vshifts:
17594 case Intrinsic::arm_neon_vshiftu:
17595 // Opcode already set above.
17596 break;
17597 case Intrinsic::arm_neon_vrshifts:
17598 VShiftOpc = ARMISD::VRSHRsIMM;
17599 break;
17600 case Intrinsic::arm_neon_vrshiftu:
17601 VShiftOpc = ARMISD::VRSHRuIMM;
17602 break;
17603 case Intrinsic::arm_neon_vrshiftn:
17604 VShiftOpc = ARMISD::VRSHRNIMM;
17605 break;
17606 case Intrinsic::arm_neon_vqshifts:
17607 VShiftOpc = ARMISD::VQSHLsIMM;
17608 break;
17609 case Intrinsic::arm_neon_vqshiftu:
17610 VShiftOpc = ARMISD::VQSHLuIMM;
17611 break;
17612 case Intrinsic::arm_neon_vqshiftsu:
17613 VShiftOpc = ARMISD::VQSHLsuIMM;
17614 break;
17615 case Intrinsic::arm_neon_vqshiftns:
17616 VShiftOpc = ARMISD::VQSHRNsIMM;
17617 break;
17618 case Intrinsic::arm_neon_vqshiftnu:
17619 VShiftOpc = ARMISD::VQSHRNuIMM;
17620 break;
17621 case Intrinsic::arm_neon_vqshiftnsu:
17622 VShiftOpc = ARMISD::VQSHRNsuIMM;
17623 break;
17624 case Intrinsic::arm_neon_vqrshiftns:
17625 VShiftOpc = ARMISD::VQRSHRNsIMM;
17626 break;
17627 case Intrinsic::arm_neon_vqrshiftnu:
17628 VShiftOpc = ARMISD::VQRSHRNuIMM;
17629 break;
17630 case Intrinsic::arm_neon_vqrshiftnsu:
17631 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17632 break;
17633 }
17634
17635 SDLoc dl(N);
17636 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17637 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17638 }
17639
17640 case Intrinsic::arm_neon_vshiftins: {
17641 EVT VT = N->getOperand(1).getValueType();
17642 int64_t Cnt;
17643 unsigned VShiftOpc = 0;
17644
17645 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17646 VShiftOpc = ARMISD::VSLIIMM;
17647 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17648 VShiftOpc = ARMISD::VSRIIMM;
17649 else {
17650 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
17651 }
17652
17653 SDLoc dl(N);
17654 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17655 N->getOperand(1), N->getOperand(2),
17656 DAG.getConstant(Cnt, dl, MVT::i32));
17657 }
17658
17659 case Intrinsic::arm_neon_vqrshifts:
17660 case Intrinsic::arm_neon_vqrshiftu:
17661 // No immediate versions of these to check for.
17662 break;
17663
17664 case Intrinsic::arm_neon_vbsl: {
17665 SDLoc dl(N);
17666 return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1),
17667 N->getOperand(2), N->getOperand(3));
17668 }
17669 case Intrinsic::arm_mve_vqdmlah:
17670 case Intrinsic::arm_mve_vqdmlash:
17671 case Intrinsic::arm_mve_vqrdmlah:
17672 case Intrinsic::arm_mve_vqrdmlash:
17673 case Intrinsic::arm_mve_vmla_n_predicated:
17674 case Intrinsic::arm_mve_vmlas_n_predicated:
17675 case Intrinsic::arm_mve_vqdmlah_predicated:
17676 case Intrinsic::arm_mve_vqdmlash_predicated:
17677 case Intrinsic::arm_mve_vqrdmlah_predicated:
17678 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17679 // These intrinsics all take an i32 scalar operand which is narrowed to the
17680 // size of a single lane of the vector type they return. So we don't need
17681 // any bits of that operand above that point, which allows us to eliminate
17682 // uxth/sxth.
17683 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17684 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17685 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17686 return SDValue();
17687 break;
17688 }
17689
17690 case Intrinsic::arm_mve_minv:
17691 case Intrinsic::arm_mve_maxv:
17692 case Intrinsic::arm_mve_minav:
17693 case Intrinsic::arm_mve_maxav:
17694 case Intrinsic::arm_mve_minv_predicated:
17695 case Intrinsic::arm_mve_maxv_predicated:
17696 case Intrinsic::arm_mve_minav_predicated:
17697 case Intrinsic::arm_mve_maxav_predicated: {
17698 // These intrinsics all take an i32 scalar operand which is narrowed to the
17699 // size of a single lane of the vector type they take as the other input.
17700 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17701 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17702 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17703 return SDValue();
17704 break;
17705 }
17706
17707 case Intrinsic::arm_mve_addv: {
17708 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17709 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17710 bool Unsigned = N->getConstantOperandVal(2);
17712 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17713 }
17714
17715 case Intrinsic::arm_mve_addlv:
17716 case Intrinsic::arm_mve_addlv_predicated: {
17717 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17718 // which recombines the two outputs into an i64
17719 bool Unsigned = N->getConstantOperandVal(2);
17720 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17723
17725 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17726 if (i != 2) // skip the unsigned flag
17727 Ops.push_back(N->getOperand(i));
17728
17729 SDLoc dl(N);
17730 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17731 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17732 val.getValue(1));
17733 }
17734 }
17735
17736 return SDValue();
17737}
17738
17739/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17740/// lowers them. As with the vector shift intrinsics, this is done during DAG
17741/// combining instead of DAG legalizing because the build_vectors for 64-bit
17742/// vector element shift counts are generally not legal, and it is hard to see
17743/// their values after they get legalized to loads from a constant pool.
17746 const ARMSubtarget *ST) {
17747 SelectionDAG &DAG = DCI.DAG;
17748 EVT VT = N->getValueType(0);
17749
17750 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17751 N->getOperand(0)->getOpcode() == ISD::AND &&
17752 N->getOperand(0)->hasOneUse()) {
17753 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17754 return SDValue();
17755 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17756 // usually show up because instcombine prefers to canonicalize it to
17757 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17758 // out of GEP lowering in some cases.
17759 SDValue N0 = N->getOperand(0);
17760 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17761 if (!ShiftAmtNode)
17762 return SDValue();
17763 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17764 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17765 if (!AndMaskNode)
17766 return SDValue();
17767 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17768 // Don't transform uxtb/uxth.
17769 if (AndMask == 255 || AndMask == 65535)
17770 return SDValue();
17771 if (isMask_32(AndMask)) {
17772 uint32_t MaskedBits = llvm::countl_zero(AndMask);
17773 if (MaskedBits > ShiftAmt) {
17774 SDLoc DL(N);
17775 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17776 DAG.getConstant(MaskedBits, DL, MVT::i32));
17777 return DAG.getNode(
17778 ISD::SRL, DL, MVT::i32, SHL,
17779 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17780 }
17781 }
17782 }
17783
17784 // Nothing to be done for scalar shifts.
17785 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17786 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17787 return SDValue();
17788 if (ST->hasMVEIntegerOps())
17789 return SDValue();
17790
17791 int64_t Cnt;
17792
17793 switch (N->getOpcode()) {
17794 default: llvm_unreachable("unexpected shift opcode");
17795
17796 case ISD::SHL:
17797 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17798 SDLoc dl(N);
17799 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17800 DAG.getConstant(Cnt, dl, MVT::i32));
17801 }
17802 break;
17803
17804 case ISD::SRA:
17805 case ISD::SRL:
17806 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17807 unsigned VShiftOpc =
17808 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17809 SDLoc dl(N);
17810 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17811 DAG.getConstant(Cnt, dl, MVT::i32));
17812 }
17813 }
17814 return SDValue();
17815}
17816
17817// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17818// split into multiple extending loads, which are simpler to deal with than an
17819// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17820// to convert the type to an f32.
17822 SDValue N0 = N->getOperand(0);
17823 if (N0.getOpcode() != ISD::LOAD)
17824 return SDValue();
17826 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17827 LD->getExtensionType() != ISD::NON_EXTLOAD)
17828 return SDValue();
17829 EVT FromVT = LD->getValueType(0);
17830 EVT ToVT = N->getValueType(0);
17831 if (!ToVT.isVector())
17832 return SDValue();
17834 EVT ToEltVT = ToVT.getVectorElementType();
17835 EVT FromEltVT = FromVT.getVectorElementType();
17836
17837 unsigned NumElements = 0;
17838 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17839 NumElements = 4;
17840 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17841 NumElements = 4;
17842 if (NumElements == 0 ||
17843 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17844 FromVT.getVectorNumElements() % NumElements != 0 ||
17845 !isPowerOf2_32(NumElements))
17846 return SDValue();
17847
17848 LLVMContext &C = *DAG.getContext();
17849 SDLoc DL(LD);
17850 // Details about the old load
17851 SDValue Ch = LD->getChain();
17852 SDValue BasePtr = LD->getBasePtr();
17853 Align Alignment = LD->getBaseAlign();
17854 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17855 AAMDNodes AAInfo = LD->getAAInfo();
17856
17857 ISD::LoadExtType NewExtType =
17858 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17859 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17860 EVT NewFromVT = EVT::getVectorVT(
17861 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17862 EVT NewToVT = EVT::getVectorVT(
17863 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17864
17867 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17868 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17869 SDValue NewPtr =
17870 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
17871
17872 SDValue NewLoad =
17873 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17874 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17875 Alignment, MMOFlags, AAInfo);
17876 Loads.push_back(NewLoad);
17877 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17878 }
17879
17880 // Float truncs need to extended with VCVTB's into their floating point types.
17881 if (FromEltVT == MVT::f16) {
17883
17884 for (unsigned i = 0; i < Loads.size(); i++) {
17885 SDValue LoadBC =
17886 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17887 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17888 DAG.getConstant(0, DL, MVT::i32));
17889 Extends.push_back(FPExt);
17890 }
17891
17892 Loads = Extends;
17893 }
17894
17895 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17896 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17897 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17898}
17899
17900/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17901/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17903 const ARMSubtarget *ST) {
17904 SDValue N0 = N->getOperand(0);
17905
17906 // Check for sign- and zero-extensions of vector extract operations of 8- and
17907 // 16-bit vector elements. NEON and MVE support these directly. They are
17908 // handled during DAG combining because type legalization will promote them
17909 // to 32-bit types and it is messy to recognize the operations after that.
17910 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17912 SDValue Vec = N0.getOperand(0);
17913 SDValue Lane = N0.getOperand(1);
17914 EVT VT = N->getValueType(0);
17915 EVT EltVT = N0.getValueType();
17916 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17917
17918 if (VT == MVT::i32 &&
17919 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17920 TLI.isTypeLegal(Vec.getValueType()) &&
17921 isa<ConstantSDNode>(Lane)) {
17922
17923 unsigned Opc = 0;
17924 switch (N->getOpcode()) {
17925 default: llvm_unreachable("unexpected opcode");
17926 case ISD::SIGN_EXTEND:
17928 break;
17929 case ISD::ZERO_EXTEND:
17930 case ISD::ANY_EXTEND:
17932 break;
17933 }
17934 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17935 }
17936 }
17937
17938 if (ST->hasMVEIntegerOps())
17939 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17940 return NewLoad;
17941
17942 return SDValue();
17943}
17944
17946 const ARMSubtarget *ST) {
17947 if (ST->hasMVEFloatOps())
17948 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17949 return NewLoad;
17950
17951 return SDValue();
17952}
17953
17954// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17955// constant bounds.
17957 const ARMSubtarget *Subtarget) {
17958 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17959 !Subtarget->isThumb2())
17960 return SDValue();
17961
17962 EVT VT = Op.getValueType();
17963 SDValue Op0 = Op.getOperand(0);
17964
17965 if (VT != MVT::i32 ||
17966 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17967 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17969 return SDValue();
17970
17971 SDValue Min = Op;
17972 SDValue Max = Op0;
17973 SDValue Input = Op0.getOperand(0);
17974 if (Min.getOpcode() == ISD::SMAX)
17975 std::swap(Min, Max);
17976
17977 APInt MinC = Min.getConstantOperandAPInt(1);
17978 APInt MaxC = Max.getConstantOperandAPInt(1);
17979
17980 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
17981 !(MinC + 1).isPowerOf2())
17982 return SDValue();
17983
17984 SDLoc DL(Op);
17985 if (MinC == ~MaxC)
17986 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
17987 DAG.getConstant(MinC.countr_one(), DL, VT));
17988 if (MaxC == 0)
17989 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
17990 DAG.getConstant(MinC.countr_one(), DL, VT));
17991
17992 return SDValue();
17993}
17994
17995/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17996/// saturates.
17998 const ARMSubtarget *ST) {
17999 EVT VT = N->getValueType(0);
18000 SDValue N0 = N->getOperand(0);
18001
18002 if (VT == MVT::i32)
18003 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
18004
18005 if (!ST->hasMVEIntegerOps())
18006 return SDValue();
18007
18008 if (SDValue V = PerformVQDMULHCombine(N, DAG))
18009 return V;
18010
18011 if (VT != MVT::v4i32 && VT != MVT::v8i16)
18012 return SDValue();
18013
18014 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
18015 // Check one is a smin and the other is a smax
18016 if (Min->getOpcode() != ISD::SMIN)
18017 std::swap(Min, Max);
18018 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
18019 return false;
18020
18021 APInt SaturateC;
18022 if (VT == MVT::v4i32)
18023 SaturateC = APInt(32, (1 << 15) - 1, true);
18024 else //if (VT == MVT::v8i16)
18025 SaturateC = APInt(16, (1 << 7) - 1, true);
18026
18027 APInt MinC, MaxC;
18028 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18029 MinC != SaturateC)
18030 return false;
18031 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
18032 MaxC != ~SaturateC)
18033 return false;
18034 return true;
18035 };
18036
18037 if (IsSignedSaturate(N, N0.getNode())) {
18038 SDLoc DL(N);
18039 MVT ExtVT, HalfVT;
18040 if (VT == MVT::v4i32) {
18041 HalfVT = MVT::v8i16;
18042 ExtVT = MVT::v4i16;
18043 } else { // if (VT == MVT::v8i16)
18044 HalfVT = MVT::v16i8;
18045 ExtVT = MVT::v8i8;
18046 }
18047
18048 // Create a VQMOVNB with undef top lanes, then signed extended into the top
18049 // half. That extend will hopefully be removed if only the bottom bits are
18050 // demanded (though a truncating store, for example).
18051 SDValue VQMOVN =
18052 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
18053 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
18054 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18055 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
18056 DAG.getValueType(ExtVT));
18057 }
18058
18059 auto IsUnsignedSaturate = [&](SDNode *Min) {
18060 // For unsigned, we just need to check for <= 0xffff
18061 if (Min->getOpcode() != ISD::UMIN)
18062 return false;
18063
18064 APInt SaturateC;
18065 if (VT == MVT::v4i32)
18066 SaturateC = APInt(32, (1 << 16) - 1, true);
18067 else //if (VT == MVT::v8i16)
18068 SaturateC = APInt(16, (1 << 8) - 1, true);
18069
18070 APInt MinC;
18071 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18072 MinC != SaturateC)
18073 return false;
18074 return true;
18075 };
18076
18077 if (IsUnsignedSaturate(N)) {
18078 SDLoc DL(N);
18079 MVT HalfVT;
18080 unsigned ExtConst;
18081 if (VT == MVT::v4i32) {
18082 HalfVT = MVT::v8i16;
18083 ExtConst = 0x0000FFFF;
18084 } else { //if (VT == MVT::v8i16)
18085 HalfVT = MVT::v16i8;
18086 ExtConst = 0x00FF;
18087 }
18088
18089 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
18090 // an AND. That extend will hopefully be removed if only the bottom bits are
18091 // demanded (though a truncating store, for example).
18092 SDValue VQMOVN =
18093 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
18094 DAG.getConstant(0, DL, MVT::i32));
18095 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18096 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
18097 DAG.getConstant(ExtConst, DL, VT));
18098 }
18099
18100 return SDValue();
18101}
18102
18105 if (!C)
18106 return nullptr;
18107 const APInt *CV = &C->getAPIntValue();
18108 return CV->isPowerOf2() ? CV : nullptr;
18109}
18110
18112 // If we have a CMOV, OR and AND combination such as:
18113 // if (x & CN)
18114 // y |= CM;
18115 //
18116 // And:
18117 // * CN is a single bit;
18118 // * All bits covered by CM are known zero in y
18119 //
18120 // Then we can convert this into a sequence of BFI instructions. This will
18121 // always be a win if CM is a single bit, will always be no worse than the
18122 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
18123 // three bits (due to the extra IT instruction).
18124
18125 SDValue Op0 = CMOV->getOperand(0);
18126 SDValue Op1 = CMOV->getOperand(1);
18127 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
18128 SDValue CmpZ = CMOV->getOperand(3);
18129
18130 // The compare must be against zero.
18131 if (!isNullConstant(CmpZ->getOperand(1)))
18132 return SDValue();
18133
18134 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
18135 SDValue And = CmpZ->getOperand(0);
18136 if (And->getOpcode() != ISD::AND)
18137 return SDValue();
18138 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18139 if (!AndC)
18140 return SDValue();
18141 SDValue X = And->getOperand(0);
18142
18143 if (CC == ARMCC::EQ) {
18144 // We're performing an "equal to zero" compare. Swap the operands so we
18145 // canonicalize on a "not equal to zero" compare.
18146 std::swap(Op0, Op1);
18147 } else {
18148 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
18149 }
18150
18151 if (Op1->getOpcode() != ISD::OR)
18152 return SDValue();
18153
18155 if (!OrC)
18156 return SDValue();
18157 SDValue Y = Op1->getOperand(0);
18158
18159 if (Op0 != Y)
18160 return SDValue();
18161
18162 // Now, is it profitable to continue?
18163 APInt OrCI = OrC->getAPIntValue();
18164 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18165 if (OrCI.popcount() > Heuristic)
18166 return SDValue();
18167
18168 // Lastly, can we determine that the bits defined by OrCI
18169 // are zero in Y?
18170 KnownBits Known = DAG.computeKnownBits(Y);
18171 if ((OrCI & Known.Zero) != OrCI)
18172 return SDValue();
18173
18174 // OK, we can do the combine.
18175 SDValue V = Y;
18176 SDLoc dl(X);
18177 EVT VT = X.getValueType();
18178 unsigned BitInX = AndC->logBase2();
18179
18180 if (BitInX != 0) {
18181 // We must shift X first.
18182 X = DAG.getNode(ISD::SRL, dl, VT, X,
18183 DAG.getConstant(BitInX, dl, VT));
18184 }
18185
18186 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
18187 BitInY < NumActiveBits; ++BitInY) {
18188 if (OrCI[BitInY] == 0)
18189 continue;
18190 APInt Mask(VT.getSizeInBits(), 0);
18191 Mask.setBit(BitInY);
18192 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
18193 // Confusingly, the operand is an *inverted* mask.
18194 DAG.getConstant(~Mask, dl, VT));
18195 }
18196
18197 return V;
18198}
18199
18200// Given N, the value controlling the conditional branch, search for the loop
18201// intrinsic, returning it, along with how the value is used. We need to handle
18202// patterns such as the following:
18203// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
18204// (brcond (setcc (loop.decrement), 0, eq), exit)
18205// (brcond (setcc (loop.decrement), 0, ne), header)
18207 bool &Negate) {
18208 switch (N->getOpcode()) {
18209 default:
18210 break;
18211 case ISD::XOR: {
18212 if (!isa<ConstantSDNode>(N.getOperand(1)))
18213 return SDValue();
18214 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18215 return SDValue();
18216 Negate = !Negate;
18217 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
18218 }
18219 case ISD::SETCC: {
18220 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
18221 if (!Const)
18222 return SDValue();
18223 if (Const->isZero())
18224 Imm = 0;
18225 else if (Const->isOne())
18226 Imm = 1;
18227 else
18228 return SDValue();
18229 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18230 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18231 }
18233 unsigned IntOp = N.getConstantOperandVal(1);
18234 if (IntOp != Intrinsic::test_start_loop_iterations &&
18235 IntOp != Intrinsic::loop_decrement_reg)
18236 return SDValue();
18237 return N;
18238 }
18239 }
18240 return SDValue();
18241}
18242
18245 const ARMSubtarget *ST) {
18246
18247 // The hwloop intrinsics that we're interested are used for control-flow,
18248 // either for entering or exiting the loop:
18249 // - test.start.loop.iterations will test whether its operand is zero. If it
18250 // is zero, the proceeding branch should not enter the loop.
18251 // - loop.decrement.reg also tests whether its operand is zero. If it is
18252 // zero, the proceeding branch should not branch back to the beginning of
18253 // the loop.
18254 // So here, we need to check that how the brcond is using the result of each
18255 // of the intrinsics to ensure that we're branching to the right place at the
18256 // right time.
18257
18258 ISD::CondCode CC;
18259 SDValue Cond;
18260 int Imm = 1;
18261 bool Negate = false;
18262 SDValue Chain = N->getOperand(0);
18263 SDValue Dest;
18264
18265 if (N->getOpcode() == ISD::BRCOND) {
18266 CC = ISD::SETEQ;
18267 Cond = N->getOperand(1);
18268 Dest = N->getOperand(2);
18269 } else {
18270 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18271 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18272 Cond = N->getOperand(2);
18273 Dest = N->getOperand(4);
18274 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18275 if (!Const->isOne() && !Const->isZero())
18276 return SDValue();
18277 Imm = Const->getZExtValue();
18278 } else
18279 return SDValue();
18280 }
18281
18282 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18283 if (!Int)
18284 return SDValue();
18285
18286 if (Negate)
18287 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18288
18289 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18290 return (CC == ISD::SETEQ && Imm == 0) ||
18291 (CC == ISD::SETNE && Imm == 1) ||
18292 (CC == ISD::SETLT && Imm == 1) ||
18293 (CC == ISD::SETULT && Imm == 1);
18294 };
18295
18296 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18297 return (CC == ISD::SETEQ && Imm == 1) ||
18298 (CC == ISD::SETNE && Imm == 0) ||
18299 (CC == ISD::SETGT && Imm == 0) ||
18300 (CC == ISD::SETUGT && Imm == 0) ||
18301 (CC == ISD::SETGE && Imm == 1) ||
18302 (CC == ISD::SETUGE && Imm == 1);
18303 };
18304
18305 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18306 "unsupported condition");
18307
18308 SDLoc dl(Int);
18309 SelectionDAG &DAG = DCI.DAG;
18310 SDValue Elements = Int.getOperand(2);
18311 unsigned IntOp = Int->getConstantOperandVal(1);
18312 assert((N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BR) &&
18313 "expected single br user");
18314 SDNode *Br = *N->user_begin();
18315 SDValue OtherTarget = Br->getOperand(1);
18316
18317 // Update the unconditional branch to branch to the given Dest.
18318 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18319 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18320 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18321 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18322 };
18323
18324 if (IntOp == Intrinsic::test_start_loop_iterations) {
18325 SDValue Res;
18326 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18327 // We expect this 'instruction' to branch when the counter is zero.
18328 if (IsTrueIfZero(CC, Imm)) {
18329 SDValue Ops[] = {Chain, Setup, Dest};
18330 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18331 } else {
18332 // The logic is the reverse of what we need for WLS, so find the other
18333 // basic block target: the target of the proceeding br.
18334 UpdateUncondBr(Br, Dest, DAG);
18335
18336 SDValue Ops[] = {Chain, Setup, OtherTarget};
18337 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18338 }
18339 // Update LR count to the new value
18340 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18341 // Update chain
18342 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18343 return Res;
18344 } else {
18345 SDValue Size =
18346 DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);
18347 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18348 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18349 DAG.getVTList(MVT::i32, MVT::Other), Args);
18350 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18351
18352 // We expect this instruction to branch when the count is not zero.
18353 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18354
18355 // Update the unconditional branch to target the loop preheader if we've
18356 // found the condition has been reversed.
18357 if (Target == OtherTarget)
18358 UpdateUncondBr(Br, Dest, DAG);
18359
18360 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18361 SDValue(LoopDec.getNode(), 1), Chain);
18362
18363 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18364 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18365 }
18366 return SDValue();
18367}
18368
18369/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18370SDValue
18372 SDValue Cmp = N->getOperand(3);
18373 if (Cmp.getOpcode() != ARMISD::CMPZ)
18374 // Only looking at NE cases.
18375 return SDValue();
18376
18377 SDLoc dl(N);
18378 SDValue LHS = Cmp.getOperand(0);
18379 SDValue RHS = Cmp.getOperand(1);
18380 SDValue Chain = N->getOperand(0);
18381 SDValue BB = N->getOperand(1);
18382 SDValue ARMcc = N->getOperand(2);
18384
18385 // (brcond Chain BB ne (cmpz (and (cmov 0 1 CC Flags) 1) 0))
18386 // -> (brcond Chain BB CC Flags)
18387 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18388 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18389 LHS->getOperand(0)->hasOneUse() &&
18390 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18391 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18392 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18393 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB,
18394 LHS->getOperand(0)->getOperand(2),
18395 LHS->getOperand(0)->getOperand(3));
18396 }
18397
18398 return SDValue();
18399}
18400
18401/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18402SDValue
18404 SDValue Cmp = N->getOperand(3);
18405 if (Cmp.getOpcode() != ARMISD::CMPZ)
18406 // Only looking at EQ and NE cases.
18407 return SDValue();
18408
18409 EVT VT = N->getValueType(0);
18410 SDLoc dl(N);
18411 SDValue LHS = Cmp.getOperand(0);
18412 SDValue RHS = Cmp.getOperand(1);
18413 SDValue FalseVal = N->getOperand(0);
18414 SDValue TrueVal = N->getOperand(1);
18415 SDValue ARMcc = N->getOperand(2);
18417
18418 // BFI is only available on V6T2+.
18419 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18421 if (R)
18422 return R;
18423 }
18424
18425 // Simplify
18426 // mov r1, r0
18427 // cmp r1, x
18428 // mov r0, y
18429 // moveq r0, x
18430 // to
18431 // cmp r0, x
18432 // movne r0, y
18433 //
18434 // mov r1, r0
18435 // cmp r1, x
18436 // mov r0, x
18437 // movne r0, y
18438 // to
18439 // cmp r0, x
18440 // movne r0, y
18441 /// FIXME: Turn this into a target neutral optimization?
18442 SDValue Res;
18443 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18444 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp);
18445 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18446 SDValue ARMcc;
18447 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18448 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp);
18449 }
18450
18451 // (cmov F T ne (cmpz (cmov 0 1 CC Flags) 0))
18452 // -> (cmov F T CC Flags)
18453 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18454 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18455 isNullConstant(RHS)) {
18456 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18457 LHS->getOperand(2), LHS->getOperand(3));
18458 }
18459
18460 if (!VT.isInteger())
18461 return SDValue();
18462
18463 // Fold away an unneccessary CMPZ/CMOV
18464 // CMOV A, B, C1, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18465 // if C1==EQ -> CMOV A, B, C2, D
18466 // if C1==NE -> CMOV A, B, NOT(C2), D
18467 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18468 N->getConstantOperandVal(2) == ARMCC::NE) {
18470 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
18471 if (N->getConstantOperandVal(2) == ARMCC::NE)
18473 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18474 N->getOperand(1),
18475 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
18476 }
18477 }
18478
18479 // Materialize a boolean comparison for integers so we can avoid branching.
18480 if (isNullConstant(FalseVal)) {
18481 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18482 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18483 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18484 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18485 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18486 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18487 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18488 DAG.getConstant(5, dl, MVT::i32));
18489 } else {
18490 // CMOV 0, 1, ==, (CMPZ x, y) ->
18491 // (UADDO_CARRY (SUB x, y), t:0, t:1)
18492 // where t = (USUBO_CARRY 0, (SUB x, y), 0)
18493 //
18494 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18495 // x != y. In other words, a carry C == 1 when x == y, C == 0
18496 // otherwise.
18497 // The final UADDO_CARRY computes
18498 // x - y + (0 - (x - y)) + C == C
18499 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18500 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18501 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18502 // ISD::USUBO_CARRY returns a borrow but we want the carry here
18503 // actually.
18504 SDValue Carry =
18505 DAG.getNode(ISD::SUB, dl, MVT::i32,
18506 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18507 Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
18508 }
18509 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18510 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18511 // This seems pointless but will allow us to combine it further below.
18512 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18513 SDValue Sub =
18514 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18515 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18516 Sub.getValue(1));
18517 FalseVal = Sub;
18518 }
18519 } else if (isNullConstant(TrueVal)) {
18520 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18521 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18522 // This seems pointless but will allow us to combine it further below
18523 // Note that we change == for != as this is the dual for the case above.
18524 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
18525 SDValue Sub =
18526 DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18527 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18528 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18529 Sub.getValue(1));
18530 FalseVal = Sub;
18531 }
18532 }
18533
18534 // On Thumb1, the DAG above may be further combined if z is a power of 2
18535 // (z == 2 ^ K).
18536 // CMOV (SUBC x, y), z, !=, (SUBC x, y):1 ->
18537 // t1 = (USUBO (SUB x, y), 1)
18538 // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
18539 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18540 //
18541 // This also handles the special case of comparing against zero; it's
18542 // essentially, the same pattern, except there's no SUBC:
18543 // CMOV x, z, !=, (CMPZ x, 0) ->
18544 // t1 = (USUBO x, 1)
18545 // t2 = (USUBO_CARRY x, t1:0, t1:1)
18546 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18547 const APInt *TrueConst;
18548 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18549 ((FalseVal.getOpcode() == ARMISD::SUBC && FalseVal.getOperand(0) == LHS &&
18550 FalseVal.getOperand(1) == RHS) ||
18551 (FalseVal == LHS && isNullConstant(RHS))) &&
18552 (TrueConst = isPowerOf2Constant(TrueVal))) {
18553 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18554 unsigned ShiftAmount = TrueConst->logBase2();
18555 if (ShiftAmount)
18556 TrueVal = DAG.getConstant(1, dl, VT);
18557 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18558 Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
18559 Subc.getValue(1));
18560
18561 if (ShiftAmount)
18562 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18563 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18564 }
18565
18566 if (Res.getNode()) {
18567 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18568 // Capture demanded bits information that would be otherwise lost.
18569 if (Known.Zero == 0xfffffffe)
18570 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18571 DAG.getValueType(MVT::i1));
18572 else if (Known.Zero == 0xffffff00)
18573 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18574 DAG.getValueType(MVT::i8));
18575 else if (Known.Zero == 0xffff0000)
18576 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18577 DAG.getValueType(MVT::i16));
18578 }
18579
18580 return Res;
18581}
18582
18585 const ARMSubtarget *ST) {
18586 SelectionDAG &DAG = DCI.DAG;
18587 SDValue Src = N->getOperand(0);
18588 EVT DstVT = N->getValueType(0);
18589
18590 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18591 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18592 EVT SrcVT = Src.getValueType();
18593 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18594 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18595 }
18596
18597 // We may have a bitcast of something that has already had this bitcast
18598 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18599 if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST &&
18600 Src.getOperand(0).getValueType().getScalarSizeInBits() <=
18601 Src.getValueType().getScalarSizeInBits())
18602 Src = Src.getOperand(0);
18603
18604 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18605 // would be generated is at least the width of the element type.
18606 EVT SrcVT = Src.getValueType();
18607 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18608 Src.getOpcode() == ARMISD::VMVNIMM ||
18609 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18610 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18611 DAG.getDataLayout().isBigEndian())
18612 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18613
18614 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18615 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18616 return R;
18617
18618 return SDValue();
18619}
18620
18621// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18622// node into stack operations after legalizeOps.
18625 SelectionDAG &DAG = DCI.DAG;
18626 EVT VT = N->getValueType(0);
18627 SDLoc DL(N);
18628
18629 // MVETrunc(Undef, Undef) -> Undef
18630 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18631 return DAG.getUNDEF(VT);
18632
18633 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18634 if (N->getNumOperands() == 2 &&
18635 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18636 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18637 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18638 N->getOperand(0).getOperand(1),
18639 N->getOperand(1).getOperand(0),
18640 N->getOperand(1).getOperand(1));
18641
18642 // MVETrunc(shuffle, shuffle) -> VMOVN
18643 if (N->getNumOperands() == 2 &&
18644 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18645 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18646 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18647 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18648
18649 if (S0->getOperand(0) == S1->getOperand(0) &&
18650 S0->getOperand(1) == S1->getOperand(1)) {
18651 // Construct complete shuffle mask
18652 SmallVector<int, 8> Mask(S0->getMask());
18653 Mask.append(S1->getMask().begin(), S1->getMask().end());
18654
18655 if (isVMOVNTruncMask(Mask, VT, false))
18656 return DAG.getNode(
18657 ARMISD::VMOVN, DL, VT,
18658 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18659 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18660 DAG.getConstant(1, DL, MVT::i32));
18661 if (isVMOVNTruncMask(Mask, VT, true))
18662 return DAG.getNode(
18663 ARMISD::VMOVN, DL, VT,
18664 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18665 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18666 DAG.getConstant(1, DL, MVT::i32));
18667 }
18668 }
18669
18670 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18671 // truncate to a buildvector to allow the generic optimisations to kick in.
18672 if (all_of(N->ops(), [](SDValue Op) {
18673 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18674 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18675 (Op.getOpcode() == ISD::BITCAST &&
18676 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18677 })) {
18678 SmallVector<SDValue, 8> Extracts;
18679 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18680 SDValue O = N->getOperand(Op);
18681 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18682 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18683 DAG.getConstant(i, DL, MVT::i32));
18684 Extracts.push_back(Ext);
18685 }
18686 }
18687 return DAG.getBuildVector(VT, DL, Extracts);
18688 }
18689
18690 // If we are late in the legalization process and nothing has optimised
18691 // the trunc to anything better, lower it to a stack store and reload,
18692 // performing the truncation whilst keeping the lanes in the correct order:
18693 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18694 if (!DCI.isAfterLegalizeDAG())
18695 return SDValue();
18696
18697 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18698 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18699 int NumIns = N->getNumOperands();
18700 assert((NumIns == 2 || NumIns == 4) &&
18701 "Expected 2 or 4 inputs to an MVETrunc");
18702 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18703 if (N->getNumOperands() == 4)
18704 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18705
18706 SmallVector<SDValue> Chains;
18707 for (int I = 0; I < NumIns; I++) {
18708 SDValue Ptr = DAG.getNode(
18709 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18710 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18712 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18713 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18714 Ptr, MPI, StoreVT, Align(4));
18715 Chains.push_back(Ch);
18716 }
18717
18718 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18719 MachinePointerInfo MPI =
18721 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18722}
18723
18724// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18726 SelectionDAG &DAG) {
18727 SDValue N0 = N->getOperand(0);
18729 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18730 return SDValue();
18731
18732 EVT FromVT = LD->getMemoryVT();
18733 EVT ToVT = N->getValueType(0);
18734 if (!ToVT.isVector())
18735 return SDValue();
18736 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);
18737 EVT ToEltVT = ToVT.getVectorElementType();
18738 EVT FromEltVT = FromVT.getVectorElementType();
18739
18740 unsigned NumElements = 0;
18741 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18742 NumElements = 4;
18743 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18744 NumElements = 8;
18745 assert(NumElements != 0);
18746
18747 ISD::LoadExtType NewExtType =
18748 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18749 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18750 LD->getExtensionType() != ISD::EXTLOAD &&
18751 LD->getExtensionType() != NewExtType)
18752 return SDValue();
18753
18754 LLVMContext &C = *DAG.getContext();
18755 SDLoc DL(LD);
18756 // Details about the old load
18757 SDValue Ch = LD->getChain();
18758 SDValue BasePtr = LD->getBasePtr();
18759 Align Alignment = LD->getBaseAlign();
18760 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18761 AAMDNodes AAInfo = LD->getAAInfo();
18762
18763 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18764 EVT NewFromVT = EVT::getVectorVT(
18765 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18766 EVT NewToVT = EVT::getVectorVT(
18767 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18768
18771 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18772 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18773 SDValue NewPtr =
18774 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
18775
18776 SDValue NewLoad =
18777 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18778 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18779 Alignment, MMOFlags, AAInfo);
18780 Loads.push_back(NewLoad);
18781 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18782 }
18783
18784 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18785 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18786 return DAG.getMergeValues(Loads, DL);
18787}
18788
18789// Perform combines for MVEEXT. If it has not be optimized to anything better
18790// before lowering, it gets converted to stack store and extloads performing the
18791// extend whilst still keeping the same lane ordering.
18794 SelectionDAG &DAG = DCI.DAG;
18795 EVT VT = N->getValueType(0);
18796 SDLoc DL(N);
18797 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18798 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");
18799
18800 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18801 *DAG.getContext());
18802 auto Extend = [&](SDValue V) {
18803 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18804 return N->getOpcode() == ARMISD::MVESEXT
18805 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18806 DAG.getValueType(ExtVT))
18807 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18808 };
18809
18810 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18811 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18812 SDValue Ext = Extend(N->getOperand(0));
18813 return DAG.getMergeValues({Ext, Ext}, DL);
18814 }
18815
18816 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18817 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18818 ArrayRef<int> Mask = SVN->getMask();
18819 assert(Mask.size() == 2 * VT.getVectorNumElements());
18820 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18821 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18822 SDValue Op0 = SVN->getOperand(0);
18823 SDValue Op1 = SVN->getOperand(1);
18824
18825 auto CheckInregMask = [&](int Start, int Offset) {
18826 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18827 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18828 return false;
18829 return true;
18830 };
18831 SDValue V0 = SDValue(N, 0);
18832 SDValue V1 = SDValue(N, 1);
18833 if (CheckInregMask(0, 0))
18834 V0 = Extend(Op0);
18835 else if (CheckInregMask(0, 1))
18836 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18837 else if (CheckInregMask(0, Mask.size()))
18838 V0 = Extend(Op1);
18839 else if (CheckInregMask(0, Mask.size() + 1))
18840 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18841
18842 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18843 V1 = Extend(Op1);
18844 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18845 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18846 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18847 V1 = Extend(Op0);
18848 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18849 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18850
18851 if (V0.getNode() != N || V1.getNode() != N)
18852 return DAG.getMergeValues({V0, V1}, DL);
18853 }
18854
18855 // MVEEXT(load) -> extload, extload
18856 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18858 return L;
18859
18860 if (!DCI.isAfterLegalizeDAG())
18861 return SDValue();
18862
18863 // Lower to a stack store and reload:
18864 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18865 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18866 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18867 int NumOuts = N->getNumValues();
18868 assert((NumOuts == 2 || NumOuts == 4) &&
18869 "Expected 2 or 4 outputs to an MVEEXT");
18870 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18871 *DAG.getContext());
18872 if (N->getNumOperands() == 4)
18873 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18874
18875 MachinePointerInfo MPI =
18877 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18878 StackPtr, MPI, Align(4));
18879
18881 for (int I = 0; I < NumOuts; I++) {
18882 SDValue Ptr = DAG.getNode(
18883 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18884 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18886 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18887 SDValue Load = DAG.getExtLoad(
18888 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18889 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18890 Loads.push_back(Load);
18891 }
18892
18893 return DAG.getMergeValues(Loads, DL);
18894}
18895
18897 DAGCombinerInfo &DCI) const {
18898 switch (N->getOpcode()) {
18899 default: break;
18900 case ISD::SELECT_CC:
18901 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18902 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18903 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18904 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18905 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18906 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18907 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18908 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18909 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18910 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18911 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18912 case ISD::BRCOND:
18913 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18914 case ARMISD::ADDC:
18915 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18916 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18917 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18918 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18919 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18920 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18921 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18922 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18923 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18926 return PerformExtractEltCombine(N, DCI, Subtarget);
18930 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18931 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18932 case ISD::FP_TO_SINT:
18933 case ISD::FP_TO_UINT:
18934 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18935 case ISD::FADD:
18936 return PerformFADDCombine(N, DCI.DAG, Subtarget);
18937 case ISD::FMUL:
18938 return PerformVMulVCTPCombine(N, DCI.DAG, Subtarget);
18940 return PerformIntrinsicCombine(N, DCI);
18941 case ISD::SHL:
18942 case ISD::SRA:
18943 case ISD::SRL:
18944 return PerformShiftCombine(N, DCI, Subtarget);
18945 case ISD::SIGN_EXTEND:
18946 case ISD::ZERO_EXTEND:
18947 case ISD::ANY_EXTEND:
18948 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18949 case ISD::FP_EXTEND:
18950 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18951 case ISD::SMIN:
18952 case ISD::UMIN:
18953 case ISD::SMAX:
18954 case ISD::UMAX:
18955 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18956 case ARMISD::CMOV:
18957 return PerformCMOVCombine(N, DCI.DAG);
18958 case ARMISD::BRCOND:
18959 return PerformBRCONDCombine(N, DCI.DAG);
18960 case ARMISD::CMPZ:
18961 return PerformCMPZCombine(N, DCI.DAG);
18962 case ARMISD::CSINC:
18963 case ARMISD::CSINV:
18964 case ARMISD::CSNEG:
18965 return PerformCSETCombine(N, DCI.DAG);
18966 case ISD::LOAD:
18967 return PerformLOADCombine(N, DCI, Subtarget);
18968 case ARMISD::VLD1DUP:
18969 case ARMISD::VLD2DUP:
18970 case ARMISD::VLD3DUP:
18971 case ARMISD::VLD4DUP:
18972 return PerformVLDCombine(N, DCI);
18974 return PerformARMBUILD_VECTORCombine(N, DCI);
18975 case ISD::BITCAST:
18976 return PerformBITCASTCombine(N, DCI, Subtarget);
18978 return PerformPREDICATE_CASTCombine(N, DCI);
18980 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
18981 case ARMISD::MVETRUNC:
18982 return PerformMVETruncCombine(N, DCI);
18983 case ARMISD::MVESEXT:
18984 case ARMISD::MVEZEXT:
18985 return PerformMVEExtCombine(N, DCI);
18986 case ARMISD::VCMP:
18987 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
18988 case ISD::VECREDUCE_ADD:
18989 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
18990 case ARMISD::VADDVs:
18991 case ARMISD::VADDVu:
18992 case ARMISD::VADDLVs:
18993 case ARMISD::VADDLVu:
18994 case ARMISD::VADDLVAs:
18995 case ARMISD::VADDLVAu:
18996 case ARMISD::VMLAVs:
18997 case ARMISD::VMLAVu:
18998 case ARMISD::VMLALVs:
18999 case ARMISD::VMLALVu:
19000 case ARMISD::VMLALVAs:
19001 case ARMISD::VMLALVAu:
19002 return PerformReduceShuffleCombine(N, DCI.DAG);
19003 case ARMISD::VMOVN:
19004 return PerformVMOVNCombine(N, DCI);
19005 case ARMISD::VQMOVNs:
19006 case ARMISD::VQMOVNu:
19007 return PerformVQMOVNCombine(N, DCI);
19008 case ARMISD::VQDMULH:
19009 return PerformVQDMULHCombine(N, DCI);
19010 case ARMISD::ASRL:
19011 case ARMISD::LSRL:
19012 case ARMISD::LSLL:
19013 return PerformLongShiftCombine(N, DCI.DAG);
19014 case ARMISD::SMULWB: {
19015 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19016 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19017 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19018 return SDValue();
19019 break;
19020 }
19021 case ARMISD::SMULWT: {
19022 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19023 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19024 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
19025 return SDValue();
19026 break;
19027 }
19028 case ARMISD::SMLALBB:
19029 case ARMISD::QADD16b:
19030 case ARMISD::QSUB16b:
19031 case ARMISD::UQADD16b:
19032 case ARMISD::UQSUB16b: {
19033 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19034 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
19035 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19036 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19037 return SDValue();
19038 break;
19039 }
19040 case ARMISD::SMLALBT: {
19041 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
19042 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19043 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
19044 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19045 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
19046 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
19047 return SDValue();
19048 break;
19049 }
19050 case ARMISD::SMLALTB: {
19051 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
19052 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19053 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
19054 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19055 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
19056 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
19057 return SDValue();
19058 break;
19059 }
19060 case ARMISD::SMLALTT: {
19061 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19062 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19063 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19064 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19065 return SDValue();
19066 break;
19067 }
19068 case ARMISD::QADD8b:
19069 case ARMISD::QSUB8b:
19070 case ARMISD::UQADD8b:
19071 case ARMISD::UQSUB8b: {
19072 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19073 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
19074 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19075 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19076 return SDValue();
19077 break;
19078 }
19079 case ARMISD::VBSP:
19080 if (N->getOperand(1) == N->getOperand(2))
19081 return N->getOperand(1);
19082 return SDValue();
19085 switch (N->getConstantOperandVal(1)) {
19086 case Intrinsic::arm_neon_vld1:
19087 case Intrinsic::arm_neon_vld1x2:
19088 case Intrinsic::arm_neon_vld1x3:
19089 case Intrinsic::arm_neon_vld1x4:
19090 case Intrinsic::arm_neon_vld2:
19091 case Intrinsic::arm_neon_vld3:
19092 case Intrinsic::arm_neon_vld4:
19093 case Intrinsic::arm_neon_vld2lane:
19094 case Intrinsic::arm_neon_vld3lane:
19095 case Intrinsic::arm_neon_vld4lane:
19096 case Intrinsic::arm_neon_vld2dup:
19097 case Intrinsic::arm_neon_vld3dup:
19098 case Intrinsic::arm_neon_vld4dup:
19099 case Intrinsic::arm_neon_vst1:
19100 case Intrinsic::arm_neon_vst1x2:
19101 case Intrinsic::arm_neon_vst1x3:
19102 case Intrinsic::arm_neon_vst1x4:
19103 case Intrinsic::arm_neon_vst2:
19104 case Intrinsic::arm_neon_vst3:
19105 case Intrinsic::arm_neon_vst4:
19106 case Intrinsic::arm_neon_vst2lane:
19107 case Intrinsic::arm_neon_vst3lane:
19108 case Intrinsic::arm_neon_vst4lane:
19109 return PerformVLDCombine(N, DCI);
19110 case Intrinsic::arm_mve_vld2q:
19111 case Intrinsic::arm_mve_vld4q:
19112 case Intrinsic::arm_mve_vst2q:
19113 case Intrinsic::arm_mve_vst4q:
19114 return PerformMVEVLDCombine(N, DCI);
19115 default: break;
19116 }
19117 break;
19118 }
19119 return SDValue();
19120}
19121
19123 EVT VT) const {
19124 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
19125}
19126
19128 Align Alignment,
19130 unsigned *Fast) const {
19131 // Depends what it gets converted into if the type is weird.
19132 if (!VT.isSimple())
19133 return false;
19134
19135 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
19136 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19137 auto Ty = VT.getSimpleVT().SimpleTy;
19138
19139 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
19140 // Unaligned access can use (for example) LRDB, LRDH, LDR
19141 if (AllowsUnaligned) {
19142 if (Fast)
19143 *Fast = Subtarget->hasV7Ops();
19144 return true;
19145 }
19146 }
19147
19148 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
19149 // For any little-endian targets with neon, we can support unaligned ld/st
19150 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
19151 // A big-endian target may also explicitly support unaligned accesses
19152 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19153 if (Fast)
19154 *Fast = 1;
19155 return true;
19156 }
19157 }
19158
19159 if (!Subtarget->hasMVEIntegerOps())
19160 return false;
19161
19162 // These are for predicates
19163 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
19164 Ty == MVT::v2i1)) {
19165 if (Fast)
19166 *Fast = 1;
19167 return true;
19168 }
19169
19170 // These are for truncated stores/narrowing loads. They are fine so long as
19171 // the alignment is at least the size of the item being loaded
19172 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
19173 Alignment >= VT.getScalarSizeInBits() / 8) {
19174 if (Fast)
19175 *Fast = true;
19176 return true;
19177 }
19178
19179 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19180 // VSTRW.U32 all store the vector register in exactly the same format, and
19181 // differ only in the range of their immediate offset field and the required
19182 // alignment. So there is always a store that can be used, regardless of
19183 // actual type.
19184 //
19185 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
19186 // VREV64.8) pair and get the same effect. This will likely be better than
19187 // aligning the vector through the stack.
19188 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
19189 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
19190 Ty == MVT::v2f64) {
19191 if (Fast)
19192 *Fast = 1;
19193 return true;
19194 }
19195
19196 return false;
19197}
19198
19200 LLVMContext &Context, const MemOp &Op,
19201 const AttributeList &FuncAttributes) const {
19202 // See if we can use NEON instructions for this...
19203 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19204 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
19205 unsigned Fast;
19206 if (Op.size() >= 16 &&
19207 (Op.isAligned(Align(16)) ||
19208 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
19210 Fast))) {
19211 return MVT::v2f64;
19212 } else if (Op.size() >= 8 &&
19213 (Op.isAligned(Align(8)) ||
19215 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
19216 Fast))) {
19217 return MVT::f64;
19218 }
19219 }
19220
19221 // Let the target-independent logic figure it out.
19222 return MVT::Other;
19223}
19224
19225// 64-bit integers are split into their high and low parts and held in two
19226// different registers, so the trunc is free since the low register can just
19227// be used.
19228bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
19229 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19230 return false;
19231 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19232 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19233 return (SrcBits == 64 && DestBits == 32);
19234}
19235
19237 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19238 !DstVT.isInteger())
19239 return false;
19240 unsigned SrcBits = SrcVT.getSizeInBits();
19241 unsigned DestBits = DstVT.getSizeInBits();
19242 return (SrcBits == 64 && DestBits == 32);
19243}
19244
19246 if (Val.getOpcode() != ISD::LOAD)
19247 return false;
19248
19249 EVT VT1 = Val.getValueType();
19250 if (!VT1.isSimple() || !VT1.isInteger() ||
19251 !VT2.isSimple() || !VT2.isInteger())
19252 return false;
19253
19254 switch (VT1.getSimpleVT().SimpleTy) {
19255 default: break;
19256 case MVT::i1:
19257 case MVT::i8:
19258 case MVT::i16:
19259 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19260 return true;
19261 }
19262
19263 return false;
19264}
19265
19267 if (!VT.isSimple())
19268 return false;
19269
19270 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19271 // negate values directly (fneg is free). So, we don't want to let the DAG
19272 // combiner rewrite fneg into xors and some other instructions. For f16 and
19273 // FullFP16 argument passing, some bitcast nodes may be introduced,
19274 // triggering this DAG combine rewrite, so we are avoiding that with this.
19275 switch (VT.getSimpleVT().SimpleTy) {
19276 default: break;
19277 case MVT::f16:
19278 return Subtarget->hasFullFP16();
19279 }
19280
19281 return false;
19282}
19283
19285 if (!Subtarget->hasMVEIntegerOps())
19286 return nullptr;
19287 Type *SVIType = SVI->getType();
19288 Type *ScalarType = SVIType->getScalarType();
19289
19290 if (ScalarType->isFloatTy())
19291 return Type::getInt32Ty(SVIType->getContext());
19292 if (ScalarType->isHalfTy())
19293 return Type::getInt16Ty(SVIType->getContext());
19294 return nullptr;
19295}
19296
19298 EVT VT = ExtVal.getValueType();
19299
19300 if (!isTypeLegal(VT))
19301 return false;
19302
19303 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19304 if (Ld->isExpandingLoad())
19305 return false;
19306 }
19307
19308 if (Subtarget->hasMVEIntegerOps())
19309 return true;
19310
19311 // Don't create a loadext if we can fold the extension into a wide/long
19312 // instruction.
19313 // If there's more than one user instruction, the loadext is desirable no
19314 // matter what. There can be two uses by the same instruction.
19315 if (ExtVal->use_empty() ||
19316 !ExtVal->user_begin()->isOnlyUserOf(ExtVal.getNode()))
19317 return true;
19318
19319 SDNode *U = *ExtVal->user_begin();
19320 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19321 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19322 return false;
19323
19324 return true;
19325}
19326
19328 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19329 return false;
19330
19331 if (!isTypeLegal(EVT::getEVT(Ty1)))
19332 return false;
19333
19334 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19335
19336 // Assuming the caller doesn't have a zeroext or signext return parameter,
19337 // truncation all the way down to i1 is valid.
19338 return true;
19339}
19340
19341/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19342/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19343/// expanded to FMAs when this method returns true, otherwise fmuladd is
19344/// expanded to fmul + fadd.
19345///
19346/// ARM supports both fused and unfused multiply-add operations; we already
19347/// lower a pair of fmul and fadd to the latter so it's not clear that there
19348/// would be a gain or that the gain would be worthwhile enough to risk
19349/// correctness bugs.
19350///
19351/// For MVE, we set this to true as it helps simplify the need for some
19352/// patterns (and we don't have the non-fused floating point instruction).
19353bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19354 EVT VT) const {
19355 if (Subtarget->useSoftFloat())
19356 return false;
19357
19358 if (!VT.isSimple())
19359 return false;
19360
19361 switch (VT.getSimpleVT().SimpleTy) {
19362 case MVT::v4f32:
19363 case MVT::v8f16:
19364 return Subtarget->hasMVEFloatOps();
19365 case MVT::f16:
19366 return Subtarget->useFPVFMx16();
19367 case MVT::f32:
19368 return Subtarget->useFPVFMx();
19369 case MVT::f64:
19370 return Subtarget->useFPVFMx64();
19371 default:
19372 break;
19373 }
19374
19375 return false;
19376}
19377
19378static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19379 if (V < 0)
19380 return false;
19381
19382 unsigned Scale = 1;
19383 switch (VT.getSimpleVT().SimpleTy) {
19384 case MVT::i1:
19385 case MVT::i8:
19386 // Scale == 1;
19387 break;
19388 case MVT::i16:
19389 // Scale == 2;
19390 Scale = 2;
19391 break;
19392 default:
19393 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19394 // Scale == 4;
19395 Scale = 4;
19396 break;
19397 }
19398
19399 if ((V & (Scale - 1)) != 0)
19400 return false;
19401 return isUInt<5>(V / Scale);
19402}
19403
19404static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19405 const ARMSubtarget *Subtarget) {
19406 if (!VT.isInteger() && !VT.isFloatingPoint())
19407 return false;
19408 if (VT.isVector() && Subtarget->hasNEON())
19409 return false;
19410 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19411 !Subtarget->hasMVEFloatOps())
19412 return false;
19413
19414 bool IsNeg = false;
19415 if (V < 0) {
19416 IsNeg = true;
19417 V = -V;
19418 }
19419
19420 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19421
19422 // MVE: size * imm7
19423 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19424 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19425 case MVT::i32:
19426 case MVT::f32:
19427 return isShiftedUInt<7,2>(V);
19428 case MVT::i16:
19429 case MVT::f16:
19430 return isShiftedUInt<7,1>(V);
19431 case MVT::i8:
19432 return isUInt<7>(V);
19433 default:
19434 return false;
19435 }
19436 }
19437
19438 // half VLDR: 2 * imm8
19439 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19440 return isShiftedUInt<8, 1>(V);
19441 // VLDR and LDRD: 4 * imm8
19442 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19443 return isShiftedUInt<8, 2>(V);
19444
19445 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19446 // + imm12 or - imm8
19447 if (IsNeg)
19448 return isUInt<8>(V);
19449 return isUInt<12>(V);
19450 }
19451
19452 return false;
19453}
19454
19455/// isLegalAddressImmediate - Return true if the integer value can be used
19456/// as the offset of the target addressing mode for load / store of the
19457/// given type.
19458static bool isLegalAddressImmediate(int64_t V, EVT VT,
19459 const ARMSubtarget *Subtarget) {
19460 if (V == 0)
19461 return true;
19462
19463 if (!VT.isSimple())
19464 return false;
19465
19466 if (Subtarget->isThumb1Only())
19467 return isLegalT1AddressImmediate(V, VT);
19468 else if (Subtarget->isThumb2())
19469 return isLegalT2AddressImmediate(V, VT, Subtarget);
19470
19471 // ARM mode.
19472 if (V < 0)
19473 V = - V;
19474 switch (VT.getSimpleVT().SimpleTy) {
19475 default: return false;
19476 case MVT::i1:
19477 case MVT::i8:
19478 case MVT::i32:
19479 // +- imm12
19480 return isUInt<12>(V);
19481 case MVT::i16:
19482 // +- imm8
19483 return isUInt<8>(V);
19484 case MVT::f32:
19485 case MVT::f64:
19486 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19487 return false;
19488 return isShiftedUInt<8, 2>(V);
19489 }
19490}
19491
19493 EVT VT) const {
19494 int Scale = AM.Scale;
19495 if (Scale < 0)
19496 return false;
19497
19498 switch (VT.getSimpleVT().SimpleTy) {
19499 default: return false;
19500 case MVT::i1:
19501 case MVT::i8:
19502 case MVT::i16:
19503 case MVT::i32:
19504 if (Scale == 1)
19505 return true;
19506 // r + r << imm
19507 Scale = Scale & ~1;
19508 return Scale == 2 || Scale == 4 || Scale == 8;
19509 case MVT::i64:
19510 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19511 // version in Thumb mode.
19512 // r + r
19513 if (Scale == 1)
19514 return true;
19515 // r * 2 (this can be lowered to r + r).
19516 if (!AM.HasBaseReg && Scale == 2)
19517 return true;
19518 return false;
19519 case MVT::isVoid:
19520 // Note, we allow "void" uses (basically, uses that aren't loads or
19521 // stores), because arm allows folding a scale into many arithmetic
19522 // operations. This should be made more precise and revisited later.
19523
19524 // Allow r << imm, but the imm has to be a multiple of two.
19525 if (Scale & 1) return false;
19526 return isPowerOf2_32(Scale);
19527 }
19528}
19529
19531 EVT VT) const {
19532 const int Scale = AM.Scale;
19533
19534 // Negative scales are not supported in Thumb1.
19535 if (Scale < 0)
19536 return false;
19537
19538 // Thumb1 addressing modes do not support register scaling excepting the
19539 // following cases:
19540 // 1. Scale == 1 means no scaling.
19541 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19542 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19543}
19544
19545/// isLegalAddressingMode - Return true if the addressing mode represented
19546/// by AM is legal for this target, for a load/store of the specified type.
19548 const AddrMode &AM, Type *Ty,
19549 unsigned AS, Instruction *I) const {
19550 EVT VT = getValueType(DL, Ty, true);
19551 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19552 return false;
19553
19554 // Can never fold addr of global into load/store.
19555 if (AM.BaseGV)
19556 return false;
19557
19558 switch (AM.Scale) {
19559 case 0: // no scale reg, must be "r+i" or "r", or "i".
19560 break;
19561 default:
19562 // ARM doesn't support any R+R*scale+imm addr modes.
19563 if (AM.BaseOffs)
19564 return false;
19565
19566 if (!VT.isSimple())
19567 return false;
19568
19569 if (Subtarget->isThumb1Only())
19570 return isLegalT1ScaledAddressingMode(AM, VT);
19571
19572 if (Subtarget->isThumb2())
19573 return isLegalT2ScaledAddressingMode(AM, VT);
19574
19575 int Scale = AM.Scale;
19576 switch (VT.getSimpleVT().SimpleTy) {
19577 default: return false;
19578 case MVT::i1:
19579 case MVT::i8:
19580 case MVT::i32:
19581 if (Scale < 0) Scale = -Scale;
19582 if (Scale == 1)
19583 return true;
19584 // r + r << imm
19585 return isPowerOf2_32(Scale & ~1);
19586 case MVT::i16:
19587 case MVT::i64:
19588 // r +/- r
19589 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19590 return true;
19591 // r * 2 (this can be lowered to r + r).
19592 if (!AM.HasBaseReg && Scale == 2)
19593 return true;
19594 return false;
19595
19596 case MVT::isVoid:
19597 // Note, we allow "void" uses (basically, uses that aren't loads or
19598 // stores), because arm allows folding a scale into many arithmetic
19599 // operations. This should be made more precise and revisited later.
19600
19601 // Allow r << imm, but the imm has to be a multiple of two.
19602 if (Scale & 1) return false;
19603 return isPowerOf2_32(Scale);
19604 }
19605 }
19606 return true;
19607}
19608
19609/// isLegalICmpImmediate - Return true if the specified immediate is legal
19610/// icmp immediate, that is the target has icmp instructions which can compare
19611/// a register against the immediate without having to materialize the
19612/// immediate into a register.
19614 // Thumb2 and ARM modes can use cmn for negative immediates.
19615 if (!Subtarget->isThumb())
19616 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19617 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19618 if (Subtarget->isThumb2())
19619 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19620 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19621 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19622 return Imm >= 0 && Imm <= 255;
19623}
19624
19625/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19626/// *or sub* immediate, that is the target has add or sub instructions which can
19627/// add a register with the immediate without having to materialize the
19628/// immediate into a register.
19630 // Same encoding for add/sub, just flip the sign.
19631 uint64_t AbsImm = AbsoluteValue(Imm);
19632 if (!Subtarget->isThumb())
19633 return ARM_AM::getSOImmVal(AbsImm) != -1;
19634 if (Subtarget->isThumb2())
19635 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19636 // Thumb1 only has 8-bit unsigned immediate.
19637 return AbsImm <= 255;
19638}
19639
19640// Return false to prevent folding
19641// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19642// if the folding leads to worse code.
19644 SDValue ConstNode) const {
19645 // Let the DAGCombiner decide for vector types and large types.
19646 const EVT VT = AddNode.getValueType();
19647 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19648 return true;
19649
19650 // It is worse if c0 is legal add immediate, while c1*c0 is not
19651 // and has to be composed by at least two instructions.
19652 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19653 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19654 const int64_t C0 = C0Node->getSExtValue();
19655 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19657 return true;
19658 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19659 return false;
19660
19661 // Default to true and let the DAGCombiner decide.
19662 return true;
19663}
19664
19666 bool isSEXTLoad, SDValue &Base,
19667 SDValue &Offset, bool &isInc,
19668 SelectionDAG &DAG) {
19669 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19670 return false;
19671
19672 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19673 // AddressingMode 3
19674 Base = Ptr->getOperand(0);
19675 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19676 int RHSC = (int)RHS->getZExtValue();
19677 if (RHSC < 0 && RHSC > -256) {
19678 assert(Ptr->getOpcode() == ISD::ADD);
19679 isInc = false;
19680 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19681 return true;
19682 }
19683 }
19684 isInc = (Ptr->getOpcode() == ISD::ADD);
19685 Offset = Ptr->getOperand(1);
19686 return true;
19687 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19688 // AddressingMode 2
19689 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19690 int RHSC = (int)RHS->getZExtValue();
19691 if (RHSC < 0 && RHSC > -0x1000) {
19692 assert(Ptr->getOpcode() == ISD::ADD);
19693 isInc = false;
19694 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19695 Base = Ptr->getOperand(0);
19696 return true;
19697 }
19698 }
19699
19700 if (Ptr->getOpcode() == ISD::ADD) {
19701 isInc = true;
19702 ARM_AM::ShiftOpc ShOpcVal=
19703 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19704 if (ShOpcVal != ARM_AM::no_shift) {
19705 Base = Ptr->getOperand(1);
19706 Offset = Ptr->getOperand(0);
19707 } else {
19708 Base = Ptr->getOperand(0);
19709 Offset = Ptr->getOperand(1);
19710 }
19711 return true;
19712 }
19713
19714 isInc = (Ptr->getOpcode() == ISD::ADD);
19715 Base = Ptr->getOperand(0);
19716 Offset = Ptr->getOperand(1);
19717 return true;
19718 }
19719
19720 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19721 return false;
19722}
19723
19725 bool isSEXTLoad, SDValue &Base,
19726 SDValue &Offset, bool &isInc,
19727 SelectionDAG &DAG) {
19728 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19729 return false;
19730
19731 Base = Ptr->getOperand(0);
19732 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19733 int RHSC = (int)RHS->getZExtValue();
19734 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19735 assert(Ptr->getOpcode() == ISD::ADD);
19736 isInc = false;
19737 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19738 return true;
19739 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19740 isInc = Ptr->getOpcode() == ISD::ADD;
19741 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19742 return true;
19743 }
19744 }
19745
19746 return false;
19747}
19748
19749static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19750 bool isSEXTLoad, bool IsMasked, bool isLE,
19752 bool &isInc, SelectionDAG &DAG) {
19753 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19754 return false;
19755 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19756 return false;
19757
19758 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19759 // as opposed to a vldrw.32). This can allow extra addressing modes or
19760 // alignments for what is otherwise an equivalent instruction.
19761 bool CanChangeType = isLE && !IsMasked;
19762
19763 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19764 int RHSC = (int)RHS->getZExtValue();
19765
19766 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19767 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19768 assert(Ptr->getOpcode() == ISD::ADD);
19769 isInc = false;
19770 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19771 return true;
19772 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19773 isInc = Ptr->getOpcode() == ISD::ADD;
19774 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19775 return true;
19776 }
19777 return false;
19778 };
19779
19780 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19781 // (in BE/masked) type.
19782 Base = Ptr->getOperand(0);
19783 if (VT == MVT::v4i16) {
19784 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19785 return true;
19786 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19787 if (IsInRange(RHSC, 0x80, 1))
19788 return true;
19789 } else if (Alignment >= 4 &&
19790 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19791 IsInRange(RHSC, 0x80, 4))
19792 return true;
19793 else if (Alignment >= 2 &&
19794 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19795 IsInRange(RHSC, 0x80, 2))
19796 return true;
19797 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19798 return true;
19799 return false;
19800}
19801
19802/// getPreIndexedAddressParts - returns true by value, base pointer and
19803/// offset pointer and addressing mode by reference if the node's address
19804/// can be legally represented as pre-indexed load / store address.
19805bool
19807 SDValue &Offset,
19809 SelectionDAG &DAG) const {
19810 if (Subtarget->isThumb1Only())
19811 return false;
19812
19813 EVT VT;
19814 SDValue Ptr;
19815 Align Alignment;
19816 bool isSEXTLoad = false;
19817 bool IsMasked = false;
19818 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19819 Ptr = LD->getBasePtr();
19820 VT = LD->getMemoryVT();
19821 Alignment = LD->getAlign();
19822 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19823 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19824 Ptr = ST->getBasePtr();
19825 VT = ST->getMemoryVT();
19826 Alignment = ST->getAlign();
19827 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19828 Ptr = LD->getBasePtr();
19829 VT = LD->getMemoryVT();
19830 Alignment = LD->getAlign();
19831 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19832 IsMasked = true;
19834 Ptr = ST->getBasePtr();
19835 VT = ST->getMemoryVT();
19836 Alignment = ST->getAlign();
19837 IsMasked = true;
19838 } else
19839 return false;
19840
19841 bool isInc;
19842 bool isLegal = false;
19843 if (VT.isVector())
19844 isLegal = Subtarget->hasMVEIntegerOps() &&
19846 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19847 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19848 else {
19849 if (Subtarget->isThumb2())
19850 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19851 Offset, isInc, DAG);
19852 else
19853 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19854 Offset, isInc, DAG);
19855 }
19856 if (!isLegal)
19857 return false;
19858
19859 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19860 return true;
19861}
19862
19863/// getPostIndexedAddressParts - returns true by value, base pointer and
19864/// offset pointer and addressing mode by reference if this node can be
19865/// combined with a load / store to form a post-indexed load / store.
19867 SDValue &Base,
19868 SDValue &Offset,
19870 SelectionDAG &DAG) const {
19871 EVT VT;
19872 SDValue Ptr;
19873 Align Alignment;
19874 bool isSEXTLoad = false, isNonExt;
19875 bool IsMasked = false;
19876 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19877 VT = LD->getMemoryVT();
19878 Ptr = LD->getBasePtr();
19879 Alignment = LD->getAlign();
19880 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19881 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19882 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19883 VT = ST->getMemoryVT();
19884 Ptr = ST->getBasePtr();
19885 Alignment = ST->getAlign();
19886 isNonExt = !ST->isTruncatingStore();
19887 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19888 VT = LD->getMemoryVT();
19889 Ptr = LD->getBasePtr();
19890 Alignment = LD->getAlign();
19891 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19892 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19893 IsMasked = true;
19895 VT = ST->getMemoryVT();
19896 Ptr = ST->getBasePtr();
19897 Alignment = ST->getAlign();
19898 isNonExt = !ST->isTruncatingStore();
19899 IsMasked = true;
19900 } else
19901 return false;
19902
19903 if (Subtarget->isThumb1Only()) {
19904 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
19905 // must be non-extending/truncating, i32, with an offset of 4.
19906 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
19907 if (Op->getOpcode() != ISD::ADD || !isNonExt)
19908 return false;
19909 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
19910 if (!RHS || RHS->getZExtValue() != 4)
19911 return false;
19912 if (Alignment < Align(4))
19913 return false;
19914
19915 Offset = Op->getOperand(1);
19916 Base = Op->getOperand(0);
19917 AM = ISD::POST_INC;
19918 return true;
19919 }
19920
19921 bool isInc;
19922 bool isLegal = false;
19923 if (VT.isVector())
19924 isLegal = Subtarget->hasMVEIntegerOps() &&
19925 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
19926 Subtarget->isLittle(), Base, Offset,
19927 isInc, DAG);
19928 else {
19929 if (Subtarget->isThumb2())
19930 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19931 isInc, DAG);
19932 else
19933 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19934 isInc, DAG);
19935 }
19936 if (!isLegal)
19937 return false;
19938
19939 if (Ptr != Base) {
19940 // Swap base ptr and offset to catch more post-index load / store when
19941 // it's legal. In Thumb2 mode, offset must be an immediate.
19942 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
19943 !Subtarget->isThumb2())
19945
19946 // Post-indexed load / store update the base pointer.
19947 if (Ptr != Base)
19948 return false;
19949 }
19950
19951 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
19952 return true;
19953}
19954
19956 KnownBits &Known,
19957 const APInt &DemandedElts,
19958 const SelectionDAG &DAG,
19959 unsigned Depth) const {
19960 unsigned BitWidth = Known.getBitWidth();
19961 Known.resetAll();
19962 switch (Op.getOpcode()) {
19963 default: break;
19964 case ARMISD::ADDC:
19965 case ARMISD::ADDE:
19966 case ARMISD::SUBC:
19967 case ARMISD::SUBE:
19968 // Special cases when we convert a carry to a boolean.
19969 if (Op.getResNo() == 0) {
19970 SDValue LHS = Op.getOperand(0);
19971 SDValue RHS = Op.getOperand(1);
19972 // (ADDE 0, 0, C) will give us a single bit.
19973 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
19974 isNullConstant(RHS)) {
19976 return;
19977 }
19978 }
19979 break;
19980 case ARMISD::CMOV: {
19981 // Bits are known zero/one if known on the LHS and RHS.
19982 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
19983 if (Known.isUnknown())
19984 return;
19985
19986 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
19987 Known = Known.intersectWith(KnownRHS);
19988 return;
19989 }
19991 Intrinsic::ID IntID =
19992 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
19993 switch (IntID) {
19994 default: return;
19995 case Intrinsic::arm_ldaex:
19996 case Intrinsic::arm_ldrex: {
19997 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
19998 unsigned MemBits = VT.getScalarSizeInBits();
19999 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20000 return;
20001 }
20002 }
20003 }
20004 case ARMISD::BFI: {
20005 // Conservatively, we can recurse down the first operand
20006 // and just mask out all affected bits.
20007 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
20008
20009 // The operand to BFI is already a mask suitable for removing the bits it
20010 // sets.
20011 const APInt &Mask = Op.getConstantOperandAPInt(2);
20012 Known.Zero &= Mask;
20013 Known.One &= Mask;
20014 return;
20015 }
20016 case ARMISD::VGETLANEs:
20017 case ARMISD::VGETLANEu: {
20018 const SDValue &SrcSV = Op.getOperand(0);
20019 EVT VecVT = SrcSV.getValueType();
20020 assert(VecVT.isVector() && "VGETLANE expected a vector type");
20021 const unsigned NumSrcElts = VecVT.getVectorNumElements();
20022 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
20023 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
20024 "VGETLANE index out of bounds");
20025 unsigned Idx = Pos->getZExtValue();
20026 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
20027 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
20028
20029 EVT VT = Op.getValueType();
20030 const unsigned DstSz = VT.getScalarSizeInBits();
20031 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
20032 (void)SrcSz;
20033 assert(SrcSz == Known.getBitWidth());
20034 assert(DstSz > SrcSz);
20035 if (Op.getOpcode() == ARMISD::VGETLANEs)
20036 Known = Known.sext(DstSz);
20037 else {
20038 Known = Known.zext(DstSz);
20039 }
20040 assert(DstSz == Known.getBitWidth());
20041 break;
20042 }
20043 case ARMISD::VMOVrh: {
20044 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20045 assert(KnownOp.getBitWidth() == 16);
20046 Known = KnownOp.zext(32);
20047 break;
20048 }
20049 case ARMISD::CSINC:
20050 case ARMISD::CSINV:
20051 case ARMISD::CSNEG: {
20052 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20053 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20054
20055 // The result is either:
20056 // CSINC: KnownOp0 or KnownOp1 + 1
20057 // CSINV: KnownOp0 or ~KnownOp1
20058 // CSNEG: KnownOp0 or KnownOp1 * -1
20059 if (Op.getOpcode() == ARMISD::CSINC)
20060 KnownOp1 =
20061 KnownBits::add(KnownOp1, KnownBits::makeConstant(APInt(32, 1)));
20062 else if (Op.getOpcode() == ARMISD::CSINV)
20063 std::swap(KnownOp1.Zero, KnownOp1.One);
20064 else if (Op.getOpcode() == ARMISD::CSNEG)
20065 KnownOp1 = KnownBits::mul(KnownOp1,
20067
20068 Known = KnownOp0.intersectWith(KnownOp1);
20069 break;
20070 }
20071 case ARMISD::VORRIMM:
20072 case ARMISD::VBICIMM: {
20073 unsigned Encoded = Op.getConstantOperandVal(1);
20074 unsigned DecEltBits = 0;
20075 uint64_t DecodedVal = ARM_AM::decodeVMOVModImm(Encoded, DecEltBits);
20076
20077 unsigned EltBits = Op.getScalarValueSizeInBits();
20078 if (EltBits != DecEltBits) {
20079 // Be conservative: only update Known when EltBits == DecEltBits.
20080 // This is believed to always be true for VORRIMM/VBICIMM today, but if
20081 // that changes in the future, doing nothing here is safer than risking
20082 // subtle bugs.
20083 break;
20084 }
20085
20086 KnownBits KnownLHS = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
20087 bool IsVORR = Op.getOpcode() == ARMISD::VORRIMM;
20088 APInt Imm(DecEltBits, DecodedVal);
20089
20090 Known.One = IsVORR ? (KnownLHS.One | Imm) : (KnownLHS.One & ~Imm);
20091 Known.Zero = IsVORR ? (KnownLHS.Zero & ~Imm) : (KnownLHS.Zero | Imm);
20092 break;
20093 }
20094 }
20095}
20096
20098 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
20099 TargetLoweringOpt &TLO) const {
20100 // Delay optimization, so we don't have to deal with illegal types, or block
20101 // optimizations.
20102 if (!TLO.LegalOps)
20103 return false;
20104
20105 // Only optimize AND for now.
20106 if (Op.getOpcode() != ISD::AND)
20107 return false;
20108
20109 EVT VT = Op.getValueType();
20110
20111 // Ignore vectors.
20112 if (VT.isVector())
20113 return false;
20114
20115 assert(VT == MVT::i32 && "Unexpected integer type");
20116
20117 // Make sure the RHS really is a constant.
20118 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
20119 if (!C)
20120 return false;
20121
20122 unsigned Mask = C->getZExtValue();
20123
20124 unsigned Demanded = DemandedBits.getZExtValue();
20125 unsigned ShrunkMask = Mask & Demanded;
20126 unsigned ExpandedMask = Mask | ~Demanded;
20127
20128 // If the mask is all zeros, let the target-independent code replace the
20129 // result with zero.
20130 if (ShrunkMask == 0)
20131 return false;
20132
20133 // If the mask is all ones, erase the AND. (Currently, the target-independent
20134 // code won't do this, so we have to do it explicitly to avoid an infinite
20135 // loop in obscure cases.)
20136 if (ExpandedMask == ~0U)
20137 return TLO.CombineTo(Op, Op.getOperand(0));
20138
20139 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20140 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20141 };
20142 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20143 if (NewMask == Mask)
20144 return true;
20145 SDLoc DL(Op);
20146 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20147 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20148 return TLO.CombineTo(Op, NewOp);
20149 };
20150
20151 // Prefer uxtb mask.
20152 if (IsLegalMask(0xFF))
20153 return UseMask(0xFF);
20154
20155 // Prefer uxth mask.
20156 if (IsLegalMask(0xFFFF))
20157 return UseMask(0xFFFF);
20158
20159 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20160 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20161 if (ShrunkMask < 256)
20162 return UseMask(ShrunkMask);
20163
20164 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20165 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20166 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20167 return UseMask(ExpandedMask);
20168
20169 // Potential improvements:
20170 //
20171 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20172 // We could try to prefer Thumb1 immediates which can be lowered to a
20173 // two-instruction sequence.
20174 // We could try to recognize more legal ARM/Thumb2 immediates here.
20175
20176 return false;
20177}
20178
20180 SDValue Op, const APInt &OriginalDemandedBits,
20181 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
20182 unsigned Depth) const {
20183 unsigned Opc = Op.getOpcode();
20184
20185 switch (Opc) {
20186 case ARMISD::ASRL:
20187 case ARMISD::LSRL: {
20188 // If this is result 0 and the other result is unused, see if the demand
20189 // bits allow us to shrink this long shift into a standard small shift in
20190 // the opposite direction.
20191 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20192 isa<ConstantSDNode>(Op->getOperand(2))) {
20193 unsigned ShAmt = Op->getConstantOperandVal(2);
20194 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
20195 << (32 - ShAmt)))
20196 return TLO.CombineTo(
20197 Op, TLO.DAG.getNode(
20198 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
20199 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20200 }
20201 break;
20202 }
20203 case ARMISD::VBICIMM: {
20204 SDValue Op0 = Op.getOperand(0);
20205 unsigned ModImm = Op.getConstantOperandVal(1);
20206 unsigned EltBits = 0;
20207 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
20208 if ((OriginalDemandedBits & Mask) == 0)
20209 return TLO.CombineTo(Op, Op0);
20210 }
20211 }
20212
20214 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
20215}
20216
20217//===----------------------------------------------------------------------===//
20218// ARM Inline Assembly Support
20219//===----------------------------------------------------------------------===//
20220
20221const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20222 // At this point, we have to lower this constraint to something else, so we
20223 // lower it to an "r" or "w". However, by doing this we will force the result
20224 // to be in register, while the X constraint is much more permissive.
20225 //
20226 // Although we are correct (we are free to emit anything, without
20227 // constraints), we might break use cases that would expect us to be more
20228 // efficient and emit something else.
20229 if (!Subtarget->hasVFP2Base())
20230 return "r";
20231 if (ConstraintVT.isFloatingPoint())
20232 return "w";
20233 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20234 (ConstraintVT.getSizeInBits() == 64 ||
20235 ConstraintVT.getSizeInBits() == 128))
20236 return "w";
20237
20238 return "r";
20239}
20240
20241/// getConstraintType - Given a constraint letter, return the type of
20242/// constraint it is for this target.
20245 unsigned S = Constraint.size();
20246 if (S == 1) {
20247 switch (Constraint[0]) {
20248 default: break;
20249 case 'l': return C_RegisterClass;
20250 case 'w': return C_RegisterClass;
20251 case 'h': return C_RegisterClass;
20252 case 'x': return C_RegisterClass;
20253 case 't': return C_RegisterClass;
20254 case 'j': return C_Immediate; // Constant for movw.
20255 // An address with a single base register. Due to the way we
20256 // currently handle addresses it is the same as an 'r' memory constraint.
20257 case 'Q': return C_Memory;
20258 }
20259 } else if (S == 2) {
20260 switch (Constraint[0]) {
20261 default: break;
20262 case 'T': return C_RegisterClass;
20263 // All 'U+' constraints are addresses.
20264 case 'U': return C_Memory;
20265 }
20266 }
20267 return TargetLowering::getConstraintType(Constraint);
20268}
20269
20270/// Examine constraint type and operand type and determine a weight value.
20271/// This object must already have been set up with the operand type
20272/// and the current alternative constraint selected.
20275 AsmOperandInfo &info, const char *constraint) const {
20277 Value *CallOperandVal = info.CallOperandVal;
20278 // If we don't have a value, we can't do a match,
20279 // but allow it at the lowest weight.
20280 if (!CallOperandVal)
20281 return CW_Default;
20282 Type *type = CallOperandVal->getType();
20283 // Look at the constraint type.
20284 switch (*constraint) {
20285 default:
20287 break;
20288 case 'l':
20289 if (type->isIntegerTy()) {
20290 if (Subtarget->isThumb())
20291 weight = CW_SpecificReg;
20292 else
20293 weight = CW_Register;
20294 }
20295 break;
20296 case 'w':
20297 if (type->isFloatingPointTy())
20298 weight = CW_Register;
20299 break;
20300 }
20301 return weight;
20302}
20303
20304static bool isIncompatibleReg(const MCPhysReg &PR, MVT VT) {
20305 if (PR == 0 || VT == MVT::Other)
20306 return false;
20307 return (ARM::SPRRegClass.contains(PR) && VT != MVT::f32 && VT != MVT::i32) ||
20308 (ARM::DPRRegClass.contains(PR) && VT != MVT::f64 &&
20309 !VT.is64BitVector());
20310}
20311
20312using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20313
20315 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20316 switch (Constraint.size()) {
20317 case 1:
20318 // GCC ARM Constraint Letters
20319 switch (Constraint[0]) {
20320 case 'l': // Low regs or general regs.
20321 if (Subtarget->isThumb())
20322 return RCPair(0U, &ARM::tGPRRegClass);
20323 return RCPair(0U, &ARM::GPRRegClass);
20324 case 'h': // High regs or no regs.
20325 if (Subtarget->isThumb())
20326 return RCPair(0U, &ARM::hGPRRegClass);
20327 break;
20328 case 'r':
20329 if (Subtarget->isThumb1Only())
20330 return RCPair(0U, &ARM::tGPRRegClass);
20331 return RCPair(0U, &ARM::GPRRegClass);
20332 case 'w':
20333 if (VT == MVT::Other)
20334 break;
20335 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20336 return RCPair(0U, &ARM::SPRRegClass);
20337 if (VT.getSizeInBits() == 64)
20338 return RCPair(0U, &ARM::DPRRegClass);
20339 if (VT.getSizeInBits() == 128)
20340 return RCPair(0U, &ARM::QPRRegClass);
20341 break;
20342 case 'x':
20343 if (VT == MVT::Other)
20344 break;
20345 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20346 return RCPair(0U, &ARM::SPR_8RegClass);
20347 if (VT.getSizeInBits() == 64)
20348 return RCPair(0U, &ARM::DPR_8RegClass);
20349 if (VT.getSizeInBits() == 128)
20350 return RCPair(0U, &ARM::QPR_8RegClass);
20351 break;
20352 case 't':
20353 if (VT == MVT::Other)
20354 break;
20355 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20356 return RCPair(0U, &ARM::SPRRegClass);
20357 if (VT.getSizeInBits() == 64)
20358 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20359 if (VT.getSizeInBits() == 128)
20360 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20361 break;
20362 }
20363 break;
20364
20365 case 2:
20366 if (Constraint[0] == 'T') {
20367 switch (Constraint[1]) {
20368 default:
20369 break;
20370 case 'e':
20371 return RCPair(0U, &ARM::tGPREvenRegClass);
20372 case 'o':
20373 return RCPair(0U, &ARM::tGPROddRegClass);
20374 }
20375 }
20376 break;
20377
20378 default:
20379 break;
20380 }
20381
20382 if (StringRef("{cc}").equals_insensitive(Constraint))
20383 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20384
20385 auto RCP = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20386 if (isIncompatibleReg(RCP.first, VT))
20387 return {0, nullptr};
20388 return RCP;
20389}
20390
20391/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20392/// vector. If it is invalid, don't add anything to Ops.
20394 StringRef Constraint,
20395 std::vector<SDValue> &Ops,
20396 SelectionDAG &DAG) const {
20397 SDValue Result;
20398
20399 // Currently only support length 1 constraints.
20400 if (Constraint.size() != 1)
20401 return;
20402
20403 char ConstraintLetter = Constraint[0];
20404 switch (ConstraintLetter) {
20405 default: break;
20406 case 'j':
20407 case 'I': case 'J': case 'K': case 'L':
20408 case 'M': case 'N': case 'O':
20410 if (!C)
20411 return;
20412
20413 int64_t CVal64 = C->getSExtValue();
20414 int CVal = (int) CVal64;
20415 // None of these constraints allow values larger than 32 bits. Check
20416 // that the value fits in an int.
20417 if (CVal != CVal64)
20418 return;
20419
20420 switch (ConstraintLetter) {
20421 case 'j':
20422 // Constant suitable for movw, must be between 0 and
20423 // 65535.
20424 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20425 if (CVal >= 0 && CVal <= 65535)
20426 break;
20427 return;
20428 case 'I':
20429 if (Subtarget->isThumb1Only()) {
20430 // This must be a constant between 0 and 255, for ADD
20431 // immediates.
20432 if (CVal >= 0 && CVal <= 255)
20433 break;
20434 } else if (Subtarget->isThumb2()) {
20435 // A constant that can be used as an immediate value in a
20436 // data-processing instruction.
20437 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20438 break;
20439 } else {
20440 // A constant that can be used as an immediate value in a
20441 // data-processing instruction.
20442 if (ARM_AM::getSOImmVal(CVal) != -1)
20443 break;
20444 }
20445 return;
20446
20447 case 'J':
20448 if (Subtarget->isThumb1Only()) {
20449 // This must be a constant between -255 and -1, for negated ADD
20450 // immediates. This can be used in GCC with an "n" modifier that
20451 // prints the negated value, for use with SUB instructions. It is
20452 // not useful otherwise but is implemented for compatibility.
20453 if (CVal >= -255 && CVal <= -1)
20454 break;
20455 } else {
20456 // This must be a constant between -4095 and 4095. It is not clear
20457 // what this constraint is intended for. Implemented for
20458 // compatibility with GCC.
20459 if (CVal >= -4095 && CVal <= 4095)
20460 break;
20461 }
20462 return;
20463
20464 case 'K':
20465 if (Subtarget->isThumb1Only()) {
20466 // A 32-bit value where only one byte has a nonzero value. Exclude
20467 // zero to match GCC. This constraint is used by GCC internally for
20468 // constants that can be loaded with a move/shift combination.
20469 // It is not useful otherwise but is implemented for compatibility.
20470 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20471 break;
20472 } else if (Subtarget->isThumb2()) {
20473 // A constant whose bitwise inverse can be used as an immediate
20474 // value in a data-processing instruction. This can be used in GCC
20475 // with a "B" modifier that prints the inverted value, for use with
20476 // BIC and MVN instructions. It is not useful otherwise but is
20477 // implemented for compatibility.
20478 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20479 break;
20480 } else {
20481 // A constant whose bitwise inverse can be used as an immediate
20482 // value in a data-processing instruction. This can be used in GCC
20483 // with a "B" modifier that prints the inverted value, for use with
20484 // BIC and MVN instructions. It is not useful otherwise but is
20485 // implemented for compatibility.
20486 if (ARM_AM::getSOImmVal(~CVal) != -1)
20487 break;
20488 }
20489 return;
20490
20491 case 'L':
20492 if (Subtarget->isThumb1Only()) {
20493 // This must be a constant between -7 and 7,
20494 // for 3-operand ADD/SUB immediate instructions.
20495 if (CVal >= -7 && CVal < 7)
20496 break;
20497 } else if (Subtarget->isThumb2()) {
20498 // A constant whose negation can be used as an immediate value in a
20499 // data-processing instruction. This can be used in GCC with an "n"
20500 // modifier that prints the negated value, for use with SUB
20501 // instructions. It is not useful otherwise but is implemented for
20502 // compatibility.
20503 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20504 break;
20505 } else {
20506 // A constant whose negation can be used as an immediate value in a
20507 // data-processing instruction. This can be used in GCC with an "n"
20508 // modifier that prints the negated value, for use with SUB
20509 // instructions. It is not useful otherwise but is implemented for
20510 // compatibility.
20511 if (ARM_AM::getSOImmVal(-CVal) != -1)
20512 break;
20513 }
20514 return;
20515
20516 case 'M':
20517 if (Subtarget->isThumb1Only()) {
20518 // This must be a multiple of 4 between 0 and 1020, for
20519 // ADD sp + immediate.
20520 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20521 break;
20522 } else {
20523 // A power of two or a constant between 0 and 32. This is used in
20524 // GCC for the shift amount on shifted register operands, but it is
20525 // useful in general for any shift amounts.
20526 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20527 break;
20528 }
20529 return;
20530
20531 case 'N':
20532 if (Subtarget->isThumb1Only()) {
20533 // This must be a constant between 0 and 31, for shift amounts.
20534 if (CVal >= 0 && CVal <= 31)
20535 break;
20536 }
20537 return;
20538
20539 case 'O':
20540 if (Subtarget->isThumb1Only()) {
20541 // This must be a multiple of 4 between -508 and 508, for
20542 // ADD/SUB sp = sp + immediate.
20543 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20544 break;
20545 }
20546 return;
20547 }
20548 Result = DAG.getSignedTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20549 break;
20550 }
20551
20552 if (Result.getNode()) {
20553 Ops.push_back(Result);
20554 return;
20555 }
20556 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20557}
20558
20559static RTLIB::Libcall getDivRemLibcall(
20560 const SDNode *N, MVT::SimpleValueType SVT) {
20561 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20562 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20563 "Unhandled Opcode in getDivRemLibcall");
20564 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20565 N->getOpcode() == ISD::SREM;
20566 RTLIB::Libcall LC;
20567 switch (SVT) {
20568 default: llvm_unreachable("Unexpected request for libcall!");
20569 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20570 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20571 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20572 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20573 }
20574 return LC;
20575}
20576
20578 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20579 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20580 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20581 "Unhandled Opcode in getDivRemArgList");
20582 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20583 N->getOpcode() == ISD::SREM;
20585 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20586 EVT ArgVT = N->getOperand(i).getValueType();
20587 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20588 TargetLowering::ArgListEntry Entry(N->getOperand(i), ArgTy);
20589 Entry.IsSExt = isSigned;
20590 Entry.IsZExt = !isSigned;
20591 Args.push_back(Entry);
20592 }
20593 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20594 std::swap(Args[0], Args[1]);
20595 return Args;
20596}
20597
20598SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20599 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20600 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20601 Subtarget->isTargetWindows()) &&
20602 "Register-based DivRem lowering only");
20603 unsigned Opcode = Op->getOpcode();
20604 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
20605 "Invalid opcode for Div/Rem lowering");
20606 bool isSigned = (Opcode == ISD::SDIVREM);
20607 EVT VT = Op->getValueType(0);
20608 SDLoc dl(Op);
20609
20610 if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20612 if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20613 SDValue Res0 =
20614 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20615 SDValue Res1 =
20616 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20617 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20618 {Res0, Res1});
20619 }
20620 }
20621
20622 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20623
20624 // If the target has hardware divide, use divide + multiply + subtract:
20625 // div = a / b
20626 // rem = a - b * div
20627 // return {div, rem}
20628 // This should be lowered into UDIV/SDIV + MLS later on.
20629 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20630 : Subtarget->hasDivideInARMMode();
20631 if (hasDivide && Op->getValueType(0).isSimple() &&
20632 Op->getSimpleValueType(0) == MVT::i32) {
20633 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20634 const SDValue Dividend = Op->getOperand(0);
20635 const SDValue Divisor = Op->getOperand(1);
20636 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20637 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20638 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20639
20640 SDValue Values[2] = {Div, Rem};
20641 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20642 }
20643
20644 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20645 VT.getSimpleVT().SimpleTy);
20646 SDValue InChain = DAG.getEntryNode();
20647
20649 DAG.getContext(),
20650 Subtarget);
20651
20654
20655 Type *RetTy = StructType::get(Ty, Ty);
20656
20657 if (Subtarget->isTargetWindows())
20658 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20659
20660 TargetLowering::CallLoweringInfo CLI(DAG);
20661 CLI.setDebugLoc(dl).setChain(InChain)
20662 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20664
20665 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20666 return CallInfo.first;
20667}
20668
20669// Lowers REM using divmod helpers
20670// see RTABI section 4.2/4.3
20671SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20672 EVT VT = N->getValueType(0);
20673
20674 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20676 if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20677 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20678 Result[0], Result[1]);
20679 }
20680
20681 // Build return types (div and rem)
20682 std::vector<Type*> RetTyParams;
20683 Type *RetTyElement;
20684
20685 switch (VT.getSimpleVT().SimpleTy) {
20686 default: llvm_unreachable("Unexpected request for libcall!");
20687 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20688 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20689 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20690 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20691 }
20692
20693 RetTyParams.push_back(RetTyElement);
20694 RetTyParams.push_back(RetTyElement);
20695 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20696 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20697
20698 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20699 SimpleTy);
20700 SDValue InChain = DAG.getEntryNode();
20702 Subtarget);
20703 bool isSigned = N->getOpcode() == ISD::SREM;
20706
20707 if (Subtarget->isTargetWindows())
20708 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20709
20710 // Lower call
20711 CallLoweringInfo CLI(DAG);
20712 CLI.setChain(InChain)
20713 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20715 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20716
20717 // Return second (rem) result operand (first contains div)
20718 SDNode *ResNode = CallResult.first.getNode();
20719 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20720 return ResNode->getOperand(1);
20721}
20722
20723SDValue
20724ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20725 assert(Subtarget->isTargetWindows() && "unsupported target platform");
20726 SDLoc DL(Op);
20727
20728 // Get the inputs.
20729 SDValue Chain = Op.getOperand(0);
20730 SDValue Size = Op.getOperand(1);
20731
20733 "no-stack-arg-probe")) {
20734 MaybeAlign Align =
20735 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20736 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20737 Chain = SP.getValue(1);
20738 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20739 if (Align)
20740 SP = DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20741 DAG.getSignedConstant(-Align->value(), DL, MVT::i32));
20742 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20743 SDValue Ops[2] = { SP, Chain };
20744 return DAG.getMergeValues(Ops, DL);
20745 }
20746
20747 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20748 DAG.getConstant(2, DL, MVT::i32));
20749
20750 SDValue Glue;
20751 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
20752 Glue = Chain.getValue(1);
20753
20754 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20755 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
20756
20757 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20758 Chain = NewSP.getValue(1);
20759
20760 SDValue Ops[2] = { NewSP, Chain };
20761 return DAG.getMergeValues(Ops, DL);
20762}
20763
20764SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20765 bool IsStrict = Op->isStrictFPOpcode();
20766 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20767 const unsigned DstSz = Op.getValueType().getSizeInBits();
20768 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20769 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20770 "Unexpected type for custom-lowering FP_EXTEND");
20771
20772 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20773 "With both FP DP and 16, any FP conversion is legal!");
20774
20775 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20776 "With FP16, 16 to 32 conversion is legal!");
20777
20778 // Converting from 32 -> 64 is valid if we have FP64.
20779 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20780 // FIXME: Remove this when we have strict fp instruction selection patterns
20781 if (IsStrict) {
20782 SDLoc Loc(Op);
20783 SDValue Result = DAG.getNode(ISD::FP_EXTEND,
20784 Loc, Op.getValueType(), SrcVal);
20785 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20786 }
20787 return Op;
20788 }
20789
20790 // Either we are converting from 16 -> 64, without FP16 and/or
20791 // FP.double-precision or without Armv8-fp. So we must do it in two
20792 // steps.
20793 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20794 // without FP16. So we must do a function call.
20795 SDLoc Loc(Op);
20796 RTLIB::Libcall LC;
20797 MakeLibCallOptions CallOptions;
20798 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20799 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20800 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20801 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20802 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20803 if (Supported) {
20804 if (IsStrict) {
20805 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20806 {DstVT, MVT::Other}, {Chain, SrcVal});
20807 Chain = SrcVal.getValue(1);
20808 } else {
20809 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20810 }
20811 } else {
20812 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20813 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20814 "Unexpected type for custom-lowering FP_EXTEND");
20815 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20816 Loc, Chain);
20817 }
20818 }
20819
20820 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20821}
20822
20823SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20824 bool IsStrict = Op->isStrictFPOpcode();
20825
20826 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20827 EVT SrcVT = SrcVal.getValueType();
20828 EVT DstVT = Op.getValueType();
20829 const unsigned DstSz = Op.getValueType().getSizeInBits();
20830 const unsigned SrcSz = SrcVT.getSizeInBits();
20831 (void)DstSz;
20832 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20833 "Unexpected type for custom-lowering FP_ROUND");
20834
20835 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20836 "With both FP DP and 16, any FP conversion is legal!");
20837
20838 SDLoc Loc(Op);
20839
20840 // Instruction from 32 -> 16 if hasFP16 is valid
20841 if (SrcSz == 32 && Subtarget->hasFP16())
20842 return Op;
20843
20844 // Lib call from 32 -> 16 / 64 -> [32, 16]
20845 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20846 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20847 "Unexpected type for custom-lowering FP_ROUND");
20848 MakeLibCallOptions CallOptions;
20849 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20851 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20852 Loc, Chain);
20853 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20854}
20855
20856bool
20858 // The ARM target isn't yet aware of offsets.
20859 return false;
20860}
20861
20863 if (v == 0xffffffff)
20864 return false;
20865
20866 // there can be 1's on either or both "outsides", all the "inside"
20867 // bits must be 0's
20868 return isShiftedMask_32(~v);
20869}
20870
20871/// isFPImmLegal - Returns true if the target can instruction select the
20872/// specified FP immediate natively. If false, the legalizer will
20873/// materialize the FP immediate as a load from a constant pool.
20875 bool ForCodeSize) const {
20876 if (!Subtarget->hasVFP3Base())
20877 return false;
20878 if (VT == MVT::f16 && Subtarget->hasFullFP16())
20879 return ARM_AM::getFP16Imm(Imm) != -1;
20880 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
20881 ARM_AM::getFP32FP16Imm(Imm) != -1)
20882 return true;
20883 if (VT == MVT::f32)
20884 return ARM_AM::getFP32Imm(Imm) != -1;
20885 if (VT == MVT::f64 && Subtarget->hasFP64())
20886 return ARM_AM::getFP64Imm(Imm) != -1;
20887 return false;
20888}
20889
20890/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
20891/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
20892/// specified in the intrinsic calls.
20894 const CallInst &I,
20895 MachineFunction &MF,
20896 unsigned Intrinsic) const {
20897 switch (Intrinsic) {
20898 case Intrinsic::arm_neon_vld1:
20899 case Intrinsic::arm_neon_vld2:
20900 case Intrinsic::arm_neon_vld3:
20901 case Intrinsic::arm_neon_vld4:
20902 case Intrinsic::arm_neon_vld2lane:
20903 case Intrinsic::arm_neon_vld3lane:
20904 case Intrinsic::arm_neon_vld4lane:
20905 case Intrinsic::arm_neon_vld2dup:
20906 case Intrinsic::arm_neon_vld3dup:
20907 case Intrinsic::arm_neon_vld4dup: {
20908 Info.opc = ISD::INTRINSIC_W_CHAIN;
20909 // Conservatively set memVT to the entire set of vectors loaded.
20910 auto &DL = I.getDataLayout();
20911 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20912 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20913 Info.ptrVal = I.getArgOperand(0);
20914 Info.offset = 0;
20915 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20916 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20917 // volatile loads with NEON intrinsics not supported
20918 Info.flags = MachineMemOperand::MOLoad;
20919 return true;
20920 }
20921 case Intrinsic::arm_neon_vld1x2:
20922 case Intrinsic::arm_neon_vld1x3:
20923 case Intrinsic::arm_neon_vld1x4: {
20924 Info.opc = ISD::INTRINSIC_W_CHAIN;
20925 // Conservatively set memVT to the entire set of vectors loaded.
20926 auto &DL = I.getDataLayout();
20927 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20928 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20929 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
20930 Info.offset = 0;
20931 Info.align = I.getParamAlign(I.arg_size() - 1).valueOrOne();
20932 // volatile loads with NEON intrinsics not supported
20933 Info.flags = MachineMemOperand::MOLoad;
20934 return true;
20935 }
20936 case Intrinsic::arm_neon_vst1:
20937 case Intrinsic::arm_neon_vst2:
20938 case Intrinsic::arm_neon_vst3:
20939 case Intrinsic::arm_neon_vst4:
20940 case Intrinsic::arm_neon_vst2lane:
20941 case Intrinsic::arm_neon_vst3lane:
20942 case Intrinsic::arm_neon_vst4lane: {
20943 Info.opc = ISD::INTRINSIC_VOID;
20944 // Conservatively set memVT to the entire set of vectors stored.
20945 auto &DL = I.getDataLayout();
20946 unsigned NumElts = 0;
20947 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20948 Type *ArgTy = I.getArgOperand(ArgI)->getType();
20949 if (!ArgTy->isVectorTy())
20950 break;
20951 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20952 }
20953 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20954 Info.ptrVal = I.getArgOperand(0);
20955 Info.offset = 0;
20956 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20957 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20958 // volatile stores with NEON intrinsics not supported
20959 Info.flags = MachineMemOperand::MOStore;
20960 return true;
20961 }
20962 case Intrinsic::arm_neon_vst1x2:
20963 case Intrinsic::arm_neon_vst1x3:
20964 case Intrinsic::arm_neon_vst1x4: {
20965 Info.opc = ISD::INTRINSIC_VOID;
20966 // Conservatively set memVT to the entire set of vectors stored.
20967 auto &DL = I.getDataLayout();
20968 unsigned NumElts = 0;
20969 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20970 Type *ArgTy = I.getArgOperand(ArgI)->getType();
20971 if (!ArgTy->isVectorTy())
20972 break;
20973 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20974 }
20975 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20976 Info.ptrVal = I.getArgOperand(0);
20977 Info.offset = 0;
20978 Info.align = I.getParamAlign(0).valueOrOne();
20979 // volatile stores with NEON intrinsics not supported
20980 Info.flags = MachineMemOperand::MOStore;
20981 return true;
20982 }
20983 case Intrinsic::arm_mve_vld2q:
20984 case Intrinsic::arm_mve_vld4q: {
20985 Info.opc = ISD::INTRINSIC_W_CHAIN;
20986 // Conservatively set memVT to the entire set of vectors loaded.
20987 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
20988 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
20989 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
20990 Info.ptrVal = I.getArgOperand(0);
20991 Info.offset = 0;
20992 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
20993 // volatile loads with MVE intrinsics not supported
20994 Info.flags = MachineMemOperand::MOLoad;
20995 return true;
20996 }
20997 case Intrinsic::arm_mve_vst2q:
20998 case Intrinsic::arm_mve_vst4q: {
20999 Info.opc = ISD::INTRINSIC_VOID;
21000 // Conservatively set memVT to the entire set of vectors stored.
21001 Type *VecTy = I.getArgOperand(1)->getType();
21002 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
21003 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21004 Info.ptrVal = I.getArgOperand(0);
21005 Info.offset = 0;
21006 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21007 // volatile stores with MVE intrinsics not supported
21008 Info.flags = MachineMemOperand::MOStore;
21009 return true;
21010 }
21011 case Intrinsic::arm_mve_vldr_gather_base:
21012 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
21013 Info.opc = ISD::INTRINSIC_W_CHAIN;
21014 Info.ptrVal = nullptr;
21015 Info.memVT = MVT::getVT(I.getType());
21016 Info.align = Align(1);
21017 Info.flags |= MachineMemOperand::MOLoad;
21018 return true;
21019 }
21020 case Intrinsic::arm_mve_vldr_gather_base_wb:
21021 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
21022 Info.opc = ISD::INTRINSIC_W_CHAIN;
21023 Info.ptrVal = nullptr;
21024 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21025 Info.align = Align(1);
21026 Info.flags |= MachineMemOperand::MOLoad;
21027 return true;
21028 }
21029 case Intrinsic::arm_mve_vldr_gather_offset:
21030 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
21031 Info.opc = ISD::INTRINSIC_W_CHAIN;
21032 Info.ptrVal = nullptr;
21033 MVT DataVT = MVT::getVT(I.getType());
21034 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21035 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21036 DataVT.getVectorNumElements());
21037 Info.align = Align(1);
21038 Info.flags |= MachineMemOperand::MOLoad;
21039 return true;
21040 }
21041 case Intrinsic::arm_mve_vstr_scatter_base:
21042 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
21043 Info.opc = ISD::INTRINSIC_VOID;
21044 Info.ptrVal = nullptr;
21045 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21046 Info.align = Align(1);
21047 Info.flags |= MachineMemOperand::MOStore;
21048 return true;
21049 }
21050 case Intrinsic::arm_mve_vstr_scatter_base_wb:
21051 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
21052 Info.opc = ISD::INTRINSIC_W_CHAIN;
21053 Info.ptrVal = nullptr;
21054 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21055 Info.align = Align(1);
21056 Info.flags |= MachineMemOperand::MOStore;
21057 return true;
21058 }
21059 case Intrinsic::arm_mve_vstr_scatter_offset:
21060 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
21061 Info.opc = ISD::INTRINSIC_VOID;
21062 Info.ptrVal = nullptr;
21063 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21064 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21065 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21066 DataVT.getVectorNumElements());
21067 Info.align = Align(1);
21068 Info.flags |= MachineMemOperand::MOStore;
21069 return true;
21070 }
21071 case Intrinsic::arm_ldaex:
21072 case Intrinsic::arm_ldrex: {
21073 auto &DL = I.getDataLayout();
21074 Type *ValTy = I.getParamElementType(0);
21075 Info.opc = ISD::INTRINSIC_W_CHAIN;
21076 Info.memVT = MVT::getVT(ValTy);
21077 Info.ptrVal = I.getArgOperand(0);
21078 Info.offset = 0;
21079 Info.align = DL.getABITypeAlign(ValTy);
21081 return true;
21082 }
21083 case Intrinsic::arm_stlex:
21084 case Intrinsic::arm_strex: {
21085 auto &DL = I.getDataLayout();
21086 Type *ValTy = I.getParamElementType(1);
21087 Info.opc = ISD::INTRINSIC_W_CHAIN;
21088 Info.memVT = MVT::getVT(ValTy);
21089 Info.ptrVal = I.getArgOperand(1);
21090 Info.offset = 0;
21091 Info.align = DL.getABITypeAlign(ValTy);
21093 return true;
21094 }
21095 case Intrinsic::arm_stlexd:
21096 case Intrinsic::arm_strexd:
21097 Info.opc = ISD::INTRINSIC_W_CHAIN;
21098 Info.memVT = MVT::i64;
21099 Info.ptrVal = I.getArgOperand(2);
21100 Info.offset = 0;
21101 Info.align = Align(8);
21103 return true;
21104
21105 case Intrinsic::arm_ldaexd:
21106 case Intrinsic::arm_ldrexd:
21107 Info.opc = ISD::INTRINSIC_W_CHAIN;
21108 Info.memVT = MVT::i64;
21109 Info.ptrVal = I.getArgOperand(0);
21110 Info.offset = 0;
21111 Info.align = Align(8);
21113 return true;
21114
21115 default:
21116 break;
21117 }
21118
21119 return false;
21120}
21121
21122/// Returns true if it is beneficial to convert a load of a constant
21123/// to just the constant itself.
21125 Type *Ty) const {
21126 assert(Ty->isIntegerTy());
21127
21128 unsigned Bits = Ty->getPrimitiveSizeInBits();
21129 if (Bits == 0 || Bits > 32)
21130 return false;
21131 return true;
21132}
21133
21135 unsigned Index) const {
21137 return false;
21138
21139 return (Index == 0 || Index == ResVT.getVectorNumElements());
21140}
21141
21143 ARM_MB::MemBOpt Domain) const {
21144 // First, if the target has no DMB, see what fallback we can use.
21145 if (!Subtarget->hasDataBarrier()) {
21146 // Some ARMv6 cpus can support data barriers with an mcr instruction.
21147 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21148 // here.
21149 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21150 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
21151 Builder.getInt32(0), Builder.getInt32(7),
21152 Builder.getInt32(10), Builder.getInt32(5)};
21153 return Builder.CreateIntrinsic(Intrinsic::arm_mcr, args);
21154 } else {
21155 // Instead of using barriers, atomic accesses on these subtargets use
21156 // libcalls.
21157 llvm_unreachable("makeDMB on a target so old that it has no barriers");
21158 }
21159 } else {
21160 // Only a full system barrier exists in the M-class architectures.
21161 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21162 Constant *CDomain = Builder.getInt32(Domain);
21163 return Builder.CreateIntrinsic(Intrinsic::arm_dmb, CDomain);
21164 }
21165}
21166
21167// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21169 Instruction *Inst,
21170 AtomicOrdering Ord) const {
21171 switch (Ord) {
21174 llvm_unreachable("Invalid fence: unordered/non-atomic");
21177 return nullptr; // Nothing to do
21179 if (!Inst->hasAtomicStore())
21180 return nullptr; // Nothing to do
21181 [[fallthrough]];
21184 if (Subtarget->preferISHSTBarriers())
21185 return makeDMB(Builder, ARM_MB::ISHST);
21186 // FIXME: add a comment with a link to documentation justifying this.
21187 else
21188 return makeDMB(Builder, ARM_MB::ISH);
21189 }
21190 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
21191}
21192
21194 Instruction *Inst,
21195 AtomicOrdering Ord) const {
21196 switch (Ord) {
21199 llvm_unreachable("Invalid fence: unordered/not-atomic");
21202 return nullptr; // Nothing to do
21206 return makeDMB(Builder, ARM_MB::ISH);
21207 }
21208 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
21209}
21210
21211// Loads and stores less than 64-bits are already atomic; ones above that
21212// are doomed anyway, so defer to the default libcall and blame the OS when
21213// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21214// anything for those.
21217 bool has64BitAtomicStore;
21218 if (Subtarget->isMClass())
21219 has64BitAtomicStore = false;
21220 else if (Subtarget->isThumb())
21221 has64BitAtomicStore = Subtarget->hasV7Ops();
21222 else
21223 has64BitAtomicStore = Subtarget->hasV6Ops();
21224
21225 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21226 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21228}
21229
21230// Loads and stores less than 64-bits are already atomic; ones above that
21231// are doomed anyway, so defer to the default libcall and blame the OS when
21232// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21233// anything for those.
21234// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21235// guarantee, see DDI0406C ARM architecture reference manual,
21236// sections A8.8.72-74 LDRD)
21239 bool has64BitAtomicLoad;
21240 if (Subtarget->isMClass())
21241 has64BitAtomicLoad = false;
21242 else if (Subtarget->isThumb())
21243 has64BitAtomicLoad = Subtarget->hasV7Ops();
21244 else
21245 has64BitAtomicLoad = Subtarget->hasV6Ops();
21246
21247 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21248 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21250}
21251
21252// For the real atomic operations, we have ldrex/strex up to 32 bits,
21253// and up to 64 bits on the non-M profiles
21256 if (AI->isFloatingPointOperation())
21258
21259 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21260 bool hasAtomicRMW;
21261 if (Subtarget->isMClass())
21262 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21263 else if (Subtarget->isThumb())
21264 hasAtomicRMW = Subtarget->hasV7Ops();
21265 else
21266 hasAtomicRMW = Subtarget->hasV6Ops();
21267 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21268 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21269 // implement atomicrmw without spilling. If the target address is also on
21270 // the stack and close enough to the spill slot, this can lead to a
21271 // situation where the monitor always gets cleared and the atomic operation
21272 // can never succeed. So at -O0 lower this operation to a CAS loop.
21273 if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
21276 }
21278}
21279
21280// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21281// bits, and up to 64 bits on the non-M profiles.
21284 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21285 // implement cmpxchg without spilling. If the address being exchanged is also
21286 // on the stack and close enough to the spill slot, this can lead to a
21287 // situation where the monitor always gets cleared and the atomic operation
21288 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21289 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21290 bool HasAtomicCmpXchg;
21291 if (Subtarget->isMClass())
21292 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21293 else if (Subtarget->isThumb())
21294 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21295 else
21296 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21297 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&
21298 HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21301}
21302
21304 const Instruction *I) const {
21305 return InsertFencesForAtomic;
21306}
21307
21309 // ROPI/RWPI are not supported currently.
21310 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21311}
21312
21314 RTLIB::LibcallImpl SecurityCheckCookieLibcall =
21315 getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
21316 if (SecurityCheckCookieLibcall == RTLIB::Unsupported)
21318
21319 // MSVC CRT has a global variable holding security cookie.
21320 M.getOrInsertGlobal("__security_cookie",
21321 PointerType::getUnqual(M.getContext()));
21322
21323 // MSVC CRT has a function to validate security cookie.
21324 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21325 getLibcallImplName(SecurityCheckCookieLibcall),
21326 Type::getVoidTy(M.getContext()), PointerType::getUnqual(M.getContext()));
21327 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21328 F->addParamAttr(0, Attribute::AttrKind::InReg);
21329}
21330
21332 // MSVC CRT has a function to validate security cookie.
21333 RTLIB::LibcallImpl SecurityCheckCookie =
21334 getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
21335 if (SecurityCheckCookie != RTLIB::Unsupported)
21336 return M.getFunction(getLibcallImplName(SecurityCheckCookie));
21338}
21339
21341 unsigned &Cost) const {
21342 // If we do not have NEON, vector types are not natively supported.
21343 if (!Subtarget->hasNEON())
21344 return false;
21345
21346 // Floating point values and vector values map to the same register file.
21347 // Therefore, although we could do a store extract of a vector type, this is
21348 // better to leave at float as we have more freedom in the addressing mode for
21349 // those.
21350 if (VectorTy->isFPOrFPVectorTy())
21351 return false;
21352
21353 // If the index is unknown at compile time, this is very expensive to lower
21354 // and it is not possible to combine the store with the extract.
21355 if (!isa<ConstantInt>(Idx))
21356 return false;
21357
21358 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21359 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21360 // We can do a store + vector extract on any vector that fits perfectly in a D
21361 // or Q register.
21362 if (BitWidth == 64 || BitWidth == 128) {
21363 Cost = 0;
21364 return true;
21365 }
21366 return false;
21367}
21368
21370 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21371 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
21372 unsigned Opcode = Op.getOpcode();
21373 switch (Opcode) {
21374 case ARMISD::VORRIMM:
21375 case ARMISD::VBICIMM:
21376 return false;
21377 }
21379 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
21380}
21381
21383 return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
21384}
21385
21387 return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
21388}
21389
21391 const Instruction &AndI) const {
21392 if (!Subtarget->hasV7Ops())
21393 return false;
21394
21395 // Sink the `and` instruction only if the mask would fit into a modified
21396 // immediate operand.
21398 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21399 return false;
21400 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21401 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21402 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21403}
21404
21407 SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21408 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21411 ExpansionFactor);
21412}
21413
21415 Value *Addr,
21416 AtomicOrdering Ord) const {
21417 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21418 bool IsAcquire = isAcquireOrStronger(Ord);
21419
21420 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21421 // intrinsic must return {i32, i32} and we have to recombine them into a
21422 // single i64 here.
21423 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21425 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21426
21427 Value *LoHi =
21428 Builder.CreateIntrinsic(Int, Addr, /*FMFSource=*/nullptr, "lohi");
21429
21430 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21431 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21432 if (!Subtarget->isLittle())
21433 std::swap (Lo, Hi);
21434 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21435 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21436 return Builder.CreateOr(
21437 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21438 }
21439
21440 Type *Tys[] = { Addr->getType() };
21441 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21442 CallInst *CI = Builder.CreateIntrinsic(Int, Tys, Addr);
21443
21444 CI->addParamAttr(
21445 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21446 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21447}
21448
21450 IRBuilderBase &Builder) const {
21451 if (!Subtarget->hasV7Ops())
21452 return;
21453 Builder.CreateIntrinsic(Intrinsic::arm_clrex, {});
21454}
21455
21457 Value *Val, Value *Addr,
21458 AtomicOrdering Ord) const {
21459 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21460 bool IsRelease = isReleaseOrStronger(Ord);
21461
21462 // Since the intrinsics must have legal type, the i64 intrinsics take two
21463 // parameters: "i32, i32". We must marshal Val into the appropriate form
21464 // before the call.
21465 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21467 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21468 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21469
21470 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21471 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21472 if (!Subtarget->isLittle())
21473 std::swap(Lo, Hi);
21474 return Builder.CreateIntrinsic(Int, {Lo, Hi, Addr});
21475 }
21476
21477 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21478 Type *Tys[] = { Addr->getType() };
21480
21481 CallInst *CI = Builder.CreateCall(
21482 Strex, {Builder.CreateZExtOrBitCast(
21483 Val, Strex->getFunctionType()->getParamType(0)),
21484 Addr});
21485 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21486 Val->getType()));
21487 return CI;
21488}
21489
21490
21492 return Subtarget->isMClass();
21493}
21494
21495/// A helper function for determining the number of interleaved accesses we
21496/// will generate when lowering accesses of the given type.
21497unsigned
21499 const DataLayout &DL) const {
21500 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21501}
21502
21504 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21505 const DataLayout &DL) const {
21506
21507 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21508 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21509
21510 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21511 return false;
21512
21513 // Ensure the vector doesn't have f16 elements. Even though we could do an
21514 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21515 // f32.
21516 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21517 return false;
21518 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21519 return false;
21520
21521 // Ensure the number of vector elements is greater than 1.
21522 if (VecTy->getNumElements() < 2)
21523 return false;
21524
21525 // Ensure the element type is legal.
21526 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21527 return false;
21528 // And the alignment if high enough under MVE.
21529 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21530 return false;
21531
21532 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21533 // 128 will be split into multiple interleaved accesses.
21534 if (Subtarget->hasNEON() && VecSize == 64)
21535 return true;
21536 return VecSize % 128 == 0;
21537}
21538
21540 if (Subtarget->hasNEON())
21541 return 4;
21542 if (Subtarget->hasMVEIntegerOps())
21545}
21546
21547/// Lower an interleaved load into a vldN intrinsic.
21548///
21549/// E.g. Lower an interleaved load (Factor = 2):
21550/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21551/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21552/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21553///
21554/// Into:
21555/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21556/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21557/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21559 Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles,
21560 ArrayRef<unsigned> Indices, unsigned Factor, const APInt &GapMask) const {
21561 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21562 "Invalid interleave factor");
21563 assert(!Shuffles.empty() && "Empty shufflevector input");
21564 assert(Shuffles.size() == Indices.size() &&
21565 "Unmatched number of shufflevectors and indices");
21566
21567 auto *LI = dyn_cast<LoadInst>(Load);
21568 if (!LI)
21569 return false;
21570 assert(!Mask && GapMask.popcount() == Factor && "Unexpected mask on a load");
21571
21572 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21573 Type *EltTy = VecTy->getElementType();
21574
21575 const DataLayout &DL = LI->getDataLayout();
21576 Align Alignment = LI->getAlign();
21577
21578 // Skip if we do not have NEON and skip illegal vector types. We can
21579 // "legalize" wide vector types into multiple interleaved accesses as long as
21580 // the vector types are divisible by 128.
21581 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21582 return false;
21583
21584 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21585
21586 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21587 // load integer vectors first and then convert to pointer vectors.
21588 if (EltTy->isPointerTy())
21589 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21590
21591 IRBuilder<> Builder(LI);
21592
21593 // The base address of the load.
21594 Value *BaseAddr = LI->getPointerOperand();
21595
21596 if (NumLoads > 1) {
21597 // If we're going to generate more than one load, reset the sub-vector type
21598 // to something legal.
21599 VecTy = FixedVectorType::get(VecTy->getElementType(),
21600 VecTy->getNumElements() / NumLoads);
21601 }
21602
21603 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
21604
21605 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21606 if (Subtarget->hasNEON()) {
21607 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21608 Type *Tys[] = {VecTy, PtrTy};
21609 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21610 Intrinsic::arm_neon_vld3,
21611 Intrinsic::arm_neon_vld4};
21612
21614 Ops.push_back(BaseAddr);
21615 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21616
21617 return Builder.CreateIntrinsic(LoadInts[Factor - 2], Tys, Ops,
21618 /*FMFSource=*/nullptr, "vldN");
21619 } else {
21620 assert((Factor == 2 || Factor == 4) &&
21621 "expected interleave factor of 2 or 4 for MVE");
21622 Intrinsic::ID LoadInts =
21623 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21624 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21625 Type *Tys[] = {VecTy, PtrTy};
21626
21628 Ops.push_back(BaseAddr);
21629 return Builder.CreateIntrinsic(LoadInts, Tys, Ops, /*FMFSource=*/nullptr,
21630 "vldN");
21631 }
21632 };
21633
21634 // Holds sub-vectors extracted from the load intrinsic return values. The
21635 // sub-vectors are associated with the shufflevector instructions they will
21636 // replace.
21638
21639 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21640 // If we're generating more than one load, compute the base address of
21641 // subsequent loads as an offset from the previous.
21642 if (LoadCount > 0)
21643 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21644 VecTy->getNumElements() * Factor);
21645
21646 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21647
21648 // Replace uses of each shufflevector with the corresponding vector loaded
21649 // by ldN.
21650 for (unsigned i = 0; i < Shuffles.size(); i++) {
21651 ShuffleVectorInst *SV = Shuffles[i];
21652 unsigned Index = Indices[i];
21653
21654 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21655
21656 // Convert the integer vector to pointer vector if the element is pointer.
21657 if (EltTy->isPointerTy())
21658 SubVec = Builder.CreateIntToPtr(
21659 SubVec,
21661
21662 SubVecs[SV].push_back(SubVec);
21663 }
21664 }
21665
21666 // Replace uses of the shufflevector instructions with the sub-vectors
21667 // returned by the load intrinsic. If a shufflevector instruction is
21668 // associated with more than one sub-vector, those sub-vectors will be
21669 // concatenated into a single wide vector.
21670 for (ShuffleVectorInst *SVI : Shuffles) {
21671 auto &SubVec = SubVecs[SVI];
21672 auto *WideVec =
21673 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21674 SVI->replaceAllUsesWith(WideVec);
21675 }
21676
21677 return true;
21678}
21679
21680/// Lower an interleaved store into a vstN intrinsic.
21681///
21682/// E.g. Lower an interleaved store (Factor = 3):
21683/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21684/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21685/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21686///
21687/// Into:
21688/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21689/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21690/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21691/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21692///
21693/// Note that the new shufflevectors will be removed and we'll only generate one
21694/// vst3 instruction in CodeGen.
21695///
21696/// Example for a more general valid mask (Factor 3). Lower:
21697/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21698/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21699/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21700///
21701/// Into:
21702/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21703/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21704/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21705/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21707 Value *LaneMask,
21708 ShuffleVectorInst *SVI,
21709 unsigned Factor,
21710 const APInt &GapMask) const {
21711 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21712 "Invalid interleave factor");
21713 auto *SI = dyn_cast<StoreInst>(Store);
21714 if (!SI)
21715 return false;
21716 assert(!LaneMask && GapMask.popcount() == Factor &&
21717 "Unexpected mask on store");
21718
21719 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21720 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21721
21722 unsigned LaneLen = VecTy->getNumElements() / Factor;
21723 Type *EltTy = VecTy->getElementType();
21724 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21725
21726 const DataLayout &DL = SI->getDataLayout();
21727 Align Alignment = SI->getAlign();
21728
21729 // Skip if we do not have NEON and skip illegal vector types. We can
21730 // "legalize" wide vector types into multiple interleaved accesses as long as
21731 // the vector types are divisible by 128.
21732 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21733 return false;
21734
21735 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21736
21737 Value *Op0 = SVI->getOperand(0);
21738 Value *Op1 = SVI->getOperand(1);
21739 IRBuilder<> Builder(SI);
21740
21741 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21742 // vectors to integer vectors.
21743 if (EltTy->isPointerTy()) {
21744 Type *IntTy = DL.getIntPtrType(EltTy);
21745
21746 // Convert to the corresponding integer vector.
21747 auto *IntVecTy =
21749 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21750 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21751
21752 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21753 }
21754
21755 // The base address of the store.
21756 Value *BaseAddr = SI->getPointerOperand();
21757
21758 if (NumStores > 1) {
21759 // If we're going to generate more than one store, reset the lane length
21760 // and sub-vector type to something legal.
21761 LaneLen /= NumStores;
21762 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21763 }
21764
21765 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
21766
21767 auto Mask = SVI->getShuffleMask();
21768
21769 auto createStoreIntrinsic = [&](Value *BaseAddr,
21770 SmallVectorImpl<Value *> &Shuffles) {
21771 if (Subtarget->hasNEON()) {
21772 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21773 Intrinsic::arm_neon_vst3,
21774 Intrinsic::arm_neon_vst4};
21775 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21776 Type *Tys[] = {PtrTy, SubVecTy};
21777
21779 Ops.push_back(BaseAddr);
21780 append_range(Ops, Shuffles);
21781 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21782 Builder.CreateIntrinsic(StoreInts[Factor - 2], Tys, Ops);
21783 } else {
21784 assert((Factor == 2 || Factor == 4) &&
21785 "expected interleave factor of 2 or 4 for MVE");
21786 Intrinsic::ID StoreInts =
21787 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21788 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21789 Type *Tys[] = {PtrTy, SubVecTy};
21790
21792 Ops.push_back(BaseAddr);
21793 append_range(Ops, Shuffles);
21794 for (unsigned F = 0; F < Factor; F++) {
21795 Ops.push_back(Builder.getInt32(F));
21796 Builder.CreateIntrinsic(StoreInts, Tys, Ops);
21797 Ops.pop_back();
21798 }
21799 }
21800 };
21801
21802 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21803 // If we generating more than one store, we compute the base address of
21804 // subsequent stores as an offset from the previous.
21805 if (StoreCount > 0)
21806 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21807 BaseAddr, LaneLen * Factor);
21808
21809 SmallVector<Value *, 4> Shuffles;
21810
21811 // Split the shufflevector operands into sub vectors for the new vstN call.
21812 for (unsigned i = 0; i < Factor; i++) {
21813 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21814 if (Mask[IdxI] >= 0) {
21815 Shuffles.push_back(Builder.CreateShuffleVector(
21816 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21817 } else {
21818 unsigned StartMask = 0;
21819 for (unsigned j = 1; j < LaneLen; j++) {
21820 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21821 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21822 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21823 break;
21824 }
21825 }
21826 // Note: If all elements in a chunk are undefs, StartMask=0!
21827 // Note: Filling undef gaps with random elements is ok, since
21828 // those elements were being written anyway (with undefs).
21829 // In the case of all undefs we're defaulting to using elems from 0
21830 // Note: StartMask cannot be negative, it's checked in
21831 // isReInterleaveMask
21832 Shuffles.push_back(Builder.CreateShuffleVector(
21833 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21834 }
21835 }
21836
21837 createStoreIntrinsic(BaseAddr, Shuffles);
21838 }
21839 return true;
21840}
21841
21849
21851 uint64_t &Members) {
21852 if (auto *ST = dyn_cast<StructType>(Ty)) {
21853 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21854 uint64_t SubMembers = 0;
21855 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21856 return false;
21857 Members += SubMembers;
21858 }
21859 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21860 uint64_t SubMembers = 0;
21861 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21862 return false;
21863 Members += SubMembers * AT->getNumElements();
21864 } else if (Ty->isFloatTy()) {
21865 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21866 return false;
21867 Members = 1;
21868 Base = HA_FLOAT;
21869 } else if (Ty->isDoubleTy()) {
21870 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21871 return false;
21872 Members = 1;
21873 Base = HA_DOUBLE;
21874 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21875 Members = 1;
21876 switch (Base) {
21877 case HA_FLOAT:
21878 case HA_DOUBLE:
21879 return false;
21880 case HA_VECT64:
21881 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21882 case HA_VECT128:
21883 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21884 case HA_UNKNOWN:
21885 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
21886 case 64:
21887 Base = HA_VECT64;
21888 return true;
21889 case 128:
21890 Base = HA_VECT128;
21891 return true;
21892 default:
21893 return false;
21894 }
21895 }
21896 }
21897
21898 return (Members > 0 && Members <= 4);
21899}
21900
21901/// Return the correct alignment for the current calling convention.
21903 Type *ArgTy, const DataLayout &DL) const {
21904 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
21905 if (!ArgTy->isVectorTy())
21906 return ABITypeAlign;
21907
21908 // Avoid over-aligning vector parameters. It would require realigning the
21909 // stack and waste space for no real benefit.
21910 MaybeAlign StackAlign = DL.getStackAlignment();
21911 assert(StackAlign && "data layout string is missing stack alignment");
21912 return std::min(ABITypeAlign, *StackAlign);
21913}
21914
21915/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
21916/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
21917/// passing according to AAPCS rules.
21919 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
21920 const DataLayout &DL) const {
21921 if (getEffectiveCallingConv(CallConv, isVarArg) !=
21923 return false;
21924
21926 uint64_t Members = 0;
21927 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
21928 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
21929
21930 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
21931 return IsHA || IsIntArray;
21932}
21933
21935 const Constant *PersonalityFn) const {
21936 // Platforms which do not use SjLj EH may return values in these registers
21937 // via the personality function.
21939 return EM == ExceptionHandling::SjLj ? Register() : ARM::R0;
21940}
21941
21943 const Constant *PersonalityFn) const {
21944 // Platforms which do not use SjLj EH may return values in these registers
21945 // via the personality function.
21947 return EM == ExceptionHandling::SjLj ? Register() : ARM::R1;
21948}
21949
21950void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
21951 // Update IsSplitCSR in ARMFunctionInfo.
21952 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
21953 AFI->setIsSplitCSR(true);
21954}
21955
21956void ARMTargetLowering::insertCopiesSplitCSR(
21957 MachineBasicBlock *Entry,
21958 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
21959 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
21960 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
21961 if (!IStart)
21962 return;
21963
21964 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
21965 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
21966 MachineBasicBlock::iterator MBBI = Entry->begin();
21967 for (const MCPhysReg *I = IStart; *I; ++I) {
21968 const TargetRegisterClass *RC = nullptr;
21969 if (ARM::GPRRegClass.contains(*I))
21970 RC = &ARM::GPRRegClass;
21971 else if (ARM::DPRRegClass.contains(*I))
21972 RC = &ARM::DPRRegClass;
21973 else
21974 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
21975
21976 Register NewVR = MRI->createVirtualRegister(RC);
21977 // Create copy from CSR to a virtual register.
21978 // FIXME: this currently does not emit CFI pseudo-instructions, it works
21979 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
21980 // nounwind. If we want to generalize this later, we may need to emit
21981 // CFI pseudo-instructions.
21982 assert(Entry->getParent()->getFunction().hasFnAttribute(
21983 Attribute::NoUnwind) &&
21984 "Function should be nounwind in insertCopiesSplitCSR!");
21985 Entry->addLiveIn(*I);
21986 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
21987 .addReg(*I);
21988
21989 // Insert the copy-back instructions right before the terminator.
21990 for (auto *Exit : Exits)
21991 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
21992 TII->get(TargetOpcode::COPY), *I)
21993 .addReg(NewVR);
21994 }
21995}
21996
22001
22003 return Subtarget->hasMVEIntegerOps();
22004}
22005
22008 auto *VTy = dyn_cast<FixedVectorType>(Ty);
22009 if (!VTy)
22010 return false;
22011
22012 auto *ScalarTy = VTy->getScalarType();
22013 unsigned NumElements = VTy->getNumElements();
22014
22015 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22016 if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
22017 return false;
22018
22019 // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
22020 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22021 return Subtarget->hasMVEFloatOps();
22022
22024 return false;
22025
22026 return Subtarget->hasMVEIntegerOps() &&
22027 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22028 ScalarTy->isIntegerTy(32));
22029}
22030
22033 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
22034 Value *Accumulator) const {
22035
22037
22038 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22039
22040 assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
22041
22042 if (TyWidth > 128) {
22043 int Stride = Ty->getNumElements() / 2;
22044 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22045 auto SplitSeqVec = llvm::to_vector(SplitSeq);
22046 ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
22047 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
22048
22049 auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
22050 auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
22051 auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
22052 auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
22053 Value *LowerSplitAcc = nullptr;
22054 Value *UpperSplitAcc = nullptr;
22055
22056 if (Accumulator) {
22057 LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
22058 UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
22059 }
22060
22061 auto *LowerSplitInt = createComplexDeinterleavingIR(
22062 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
22063 auto *UpperSplitInt = createComplexDeinterleavingIR(
22064 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
22065
22066 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
22067 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
22068 }
22069
22070 auto *IntTy = Type::getInt32Ty(B.getContext());
22071
22072 ConstantInt *ConstRotation = nullptr;
22073 if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
22074 ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
22075
22076 if (Accumulator)
22077 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
22078 {ConstRotation, Accumulator, InputB, InputA});
22079 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22080 {ConstRotation, InputB, InputA});
22081 }
22082
22083 if (OperationType == ComplexDeinterleavingOperation::CAdd) {
22084 // 1 means the value is not halved.
22085 auto *ConstHalving = ConstantInt::get(IntTy, 1);
22086
22088 ConstRotation = ConstantInt::get(IntTy, 0);
22090 ConstRotation = ConstantInt::get(IntTy, 1);
22091
22092 if (!ConstRotation)
22093 return nullptr; // Invalid rotation for arm_mve_vcaddq
22094
22095 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22096 {ConstHalving, ConstRotation, InputA, InputB});
22097 }
22098
22099 return nullptr;
22100}
unsigned const MachineRegisterInfo * MRI
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
return SDValue()
static const MCPhysReg GPRArgRegs[]
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
constexpr MVT FlagsVT
Value type used for NZCV flags.
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static bool isConstant(const MachineInstr &MI)
constexpr LLT F64
constexpr LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
static bool isStore(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isIncompatibleReg(const MCPhysReg &PR, MVT VT)
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static const APInt * isPowerOf2Constant(SDValue V)
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static bool isValidMVECond(unsigned CC, bool IsFloat)
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isGTorGE(ISD::CondCode CC)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static bool isReverseMask(ArrayRef< int > M, EVT VT)
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVMulVCTPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations...
static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0, SDValue V1)
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool CanInvertMVEVCMP(SDValue N)
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static EVT getVectorTyFromPredicateVector(EVT VT)
static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG)
static SDValue handleCMSEValue(const SDValue &Value, const ISD::InputArg &Arg, SelectionDAG &DAG, const SDLoc &DL)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static bool isSRL16(const SDValue &Op)
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V)
static bool isLTorLE(ISD::CondCode CC)
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue FindBFIToCombineWith(SDNode *N)
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isSHL16(const SDValue &Op)
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
cl::opt< unsigned > ArmMaxBaseUpdatesToCheck("arm-max-base-updates-to-check", cl::Hidden, cl::desc("Maximum number of base-updates to check generating postindex."), cl::init(64))
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
static bool isLegalMVEShuffleOp(unsigned PFEntry)
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isZeroVector(SDValue N)
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static bool IsVUZPShuffleNode(SDNode *N)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static SDValue findMUL_LOHI(SDValue V)
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file a TargetTransformInfoImplBase conforming object specific to the ARM target machine.
Function Alias Analysis false
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, dxil::ResourceTypeInfo &RTI)
static void createStoreIntrinsic(IntrinsicInst *II, StoreInst *SI, Value *Offset, dxil::ResourceTypeInfo &RTI)
This file defines the DenseMap class.
static bool isSigned(unsigned int Opcode)
#define Check(C,...)
#define op(i)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
#define MAKE_CASE(V)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
nvptx lower args
uint64_t High
uint64_t IntrinsicInst * II
PowerPC Reduce CR logical Operation
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
Value * RHS
Value * LHS
BinaryOperator * Mul
static bool isIntrinsic(const CallBase &Call, Intrinsic::ID ID)
The Input class is used to parse a yaml document into in-memory structs and vectors.
bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5999
APInt bitcastToAPInt() const
Definition APFloat.h:1353
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:423
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1111
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
unsigned logBase2() const
Definition APInt.h:1761
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:475
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1656
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
void setArgumentStackToRestore(unsigned v)
void setArgRegsSaveSize(unsigned s)
void setReturnRegsCount(unsigned s)
unsigned getArgRegsSaveSize() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void setArgumentStackSize(unsigned size)
unsigned getArgumentStackSize() const
const Triple & getTargetTriple() const
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb1Only() const
bool useFPVFMx() const
bool isThumb2() const
bool isTargetWindows() const
bool hasBaseDSP() const
const ARMTargetLowering * getTargetLowering() const override
const ARMBaseRegisterInfo * getRegisterInfo() const override
bool hasVFP2Base() const
bool useFPVFMx64() const
bool isLittle() const
bool useFPVFMx16() const
bool isMClass() const
bool useMulOps() const
Align getDualLoadStoreAlignment() const
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool isReadOnly(const GlobalValue *GV) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool shouldExpandCmpUsingSelects(EVT VT) const override
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const ARMSubtarget * getSubtarget() const
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool lowerInterleavedStore(Instruction *Store, Value *Mask, ShuffleVectorInst *SVI, unsigned Factor, const APInt &GapMask) const override
Lower an interleaved store into a vstN intrinsic.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
bool useLoadStackGuardNode(const Module &M) const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
bool lowerInterleavedLoad(Instruction *Load, Value *Mask, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor, const APInt &GapMask) const override
Lower an interleaved load into a vldN intrinsic.
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy,Idx).
bool useSoftFloat() const override
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
const ARMBaseTargetMachine & getTM() const
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
bool isFloatingPointOperation() const
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
The address of a basic block.
Definition Constants.h:899
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static LLVM_ABI bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
unsigned getInRegsParamsProcessed() const
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getInRegsParamsCount() const
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
AttributeList getAttributes() const
Return the attributes for this call.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715
const APFloat & getValueAPF() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:198
bool isBigEndian() const
Definition DataLayout.h:199
MaybeAlign getStackAlignment() const
Returns the natural stack alignment, or MaybeAlign() if one wasn't specified.
Definition DataLayout.h:228
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
LLVM_ABI Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
StringRef getPrivateGlobalPrefix() const
Definition DataLayout.h:286
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
unsigned size() const
Definition DenseMap.h:108
bool empty() const
Definition DenseMap.h:107
iterator begin()
Definition DenseMap.h:78
iterator end()
Definition DenseMap.h:81
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
arg_iterator arg_begin()
Definition Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition Function.h:687
const Argument * const_arg_iterator
Definition Function.h:73
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
const GlobalValue * getGlobal() const
bool isDSOLocal() const
bool hasExternalWeakLinkage() const
bool hasDLLImportStorageClass() const
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
bool is64BitVector() const
Return true if this is a 64-bit vector type.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI void moveAfter(MachineBasicBlock *NewBefore)
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
Properties which a MachineFunction may have at a given point in time.
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
MachineOperand * mop_iterator
iterator/begin/end - Iterate over all operands of a machine instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class is used to represent an MLOAD node.
This class is used to represent an MSTORE node.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
Align getAlign() const
bool isVolatile() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< use_iterator > uses()
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:197
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const unsigned char * bytes_end() const
Definition StringRef.h:135
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
const unsigned char * bytes_begin() const
Definition StringRef.h:132
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:414
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
const TargetMachine & getTargetMachine() const
void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked load does or does not work with the specified type and ind...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked store does or does not work with the specified type and in...
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setLibcallImpl(RTLIB::Libcall Call, RTLIB::LibcallImpl Impl)
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
ExceptionHandling getExceptionModel() const
Return the ExceptionHandling to use, considering TargetOptions and the Triple's default.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition Triple.h:437
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:296
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
Base class of all SIMD vector types.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:169
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:134
IteratorT end() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static CondCodes getOppositeCondition(CondCodes CC)
Definition ARMBaseInfo.h:48
@ SECREL
Thread Pointer Offset.
@ SBREL
Section Relative (Windows TLS)
@ GOTTPOFF
Global Offset Table, PC Relative.
@ TPOFF
Global Offset Table, Thread Pointer Offset.
TOF
Target Operand Flag enum.
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
unsigned getAM2Offset(unsigned AM2Opc)
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
AddrOpc getAM2Op(unsigned AM2Opc)
bool isBitFieldInvertedMask(unsigned v)
const unsigned FPStatusBits
const unsigned FPReservedBits
const unsigned RoundingBitsPos
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition CallingConv.h:82
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:525
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition ISDOpcodes.h:167
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition ISDOpcodes.h:809
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:952
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ SCMP
[US]CMP - 3-way comparison of signed or unsigned integers.
Definition ISDOpcodes.h:726
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition ISDOpcodes.h:122
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
static const int LAST_INDEXED_MODE
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:330
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
@ Length
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2060
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:260
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:264
ExceptionHandling
Definition CodeGen.h:53
@ SjLj
setjmp/longjmp based exceptions
Definition CodeGen.h:56
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:270
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2138
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition STLExtras.h:1546
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:276
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:157
bool isReleaseOrStronger(AtomicOrdering AO)
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:336
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:203
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
CombineLevel
Definition DAGCombine.h:15
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:155
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr U AbsoluteValue(T X)
Return the absolute value of a signed integer, converted to the corresponding unsigned integer type.
Definition MathExtras.h:597
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1963
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
unsigned gettBLXrOpcode(const MachineFunction &MF)
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:207
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
Load/store instruction that can be merged with a base address update.
SDNode * N
Instruction that updates a pointer.
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
SDValue Inc
Pointer increment operand.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:760
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:295
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:345
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:458
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:368
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:380
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:465
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:376
bool isFixedLengthVector() const
Definition ValueTypes.h:181
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:318
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:303
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:448
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
EVT ArgVT
Usually the non-legalized type of the argument, which is the EVT corresponding to the OrigTy IR type.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:294
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:165
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:304
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:173
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:340
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
APInt getSignedMinValue() const
Return the minimal signed value possible given these KnownBits.
Definition KnownBits.h:128
Matching combinators.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setInRegister(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...