LLVM 22.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
75 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
76 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
77 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
78 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79 return SelectShiftedRegister(N, false, Reg, Shift);
80 }
81 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, true, Reg, Shift);
83 }
84 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
85 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
86 }
87 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
98 }
99 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexed(N, 1, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 2, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 4, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 8, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 16, Base, OffImm);
119 }
120 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
134 }
135 template <unsigned Size, unsigned Max>
136 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
137 // Test if there is an appropriate addressing mode and check if the
138 // immediate fits.
139 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
140 if (Found) {
141 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
142 int64_t C = CI->getSExtValue();
143 if (C <= Max)
144 return true;
145 }
146 }
147
148 // Otherwise, base only, materialize address in register.
149 Base = N;
150 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
151 return true;
152 }
153
154 template<int Width>
155 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
156 SDValue &SignExtend, SDValue &DoShift) {
157 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
158 }
159
160 template<int Width>
161 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
162 SDValue &SignExtend, SDValue &DoShift) {
163 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
164 }
165
166 bool SelectExtractHigh(SDValue N, SDValue &Res) {
167 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
168 N = N->getOperand(0);
169 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
170 !isa<ConstantSDNode>(N->getOperand(1)))
171 return false;
172 EVT VT = N->getValueType(0);
173 EVT LVT = N->getOperand(0).getValueType();
174 unsigned Index = N->getConstantOperandVal(1);
175 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
176 Index != VT.getVectorNumElements())
177 return false;
178 Res = N->getOperand(0);
179 return true;
180 }
181
182 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
183 if (N.getOpcode() != AArch64ISD::VLSHR)
184 return false;
185 SDValue Op = N->getOperand(0);
186 EVT VT = Op.getValueType();
187 unsigned ShtAmt = N->getConstantOperandVal(1);
188 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
189 return false;
190
191 APInt Imm;
192 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
193 Imm = APInt(VT.getScalarSizeInBits(),
194 Op.getOperand(1).getConstantOperandVal(0)
195 << Op.getOperand(1).getConstantOperandVal(1));
196 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
197 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
198 Imm = APInt(VT.getScalarSizeInBits(),
199 Op.getOperand(1).getConstantOperandVal(0));
200 else
201 return false;
202
203 if (Imm != 1ULL << (ShtAmt - 1))
204 return false;
205
206 Res1 = Op.getOperand(0);
207 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
208 return true;
209 }
210
211 bool SelectDupZeroOrUndef(SDValue N) {
212 switch(N->getOpcode()) {
213 case ISD::UNDEF:
214 return true;
215 case AArch64ISD::DUP:
216 case ISD::SPLAT_VECTOR: {
217 auto Opnd0 = N->getOperand(0);
218 if (isNullConstant(Opnd0))
219 return true;
220 if (isNullFPConstant(Opnd0))
221 return true;
222 break;
223 }
224 default:
225 break;
226 }
227
228 return false;
229 }
230
231 bool SelectAny(SDValue) { return true; }
232
233 bool SelectDupZero(SDValue N) {
234 switch(N->getOpcode()) {
235 case AArch64ISD::DUP:
236 case ISD::SPLAT_VECTOR: {
237 auto Opnd0 = N->getOperand(0);
238 if (isNullConstant(Opnd0))
239 return true;
240 if (isNullFPConstant(Opnd0))
241 return true;
242 break;
243 }
244 }
245
246 return false;
247 }
248
249 template<MVT::SimpleValueType VT>
250 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
251 return SelectSVEAddSubImm(N, VT, Imm, Shift);
252 }
253
254 template <MVT::SimpleValueType VT, bool Negate>
255 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
257 }
258
259 template <MVT::SimpleValueType VT>
260 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVECpyDupImm(N, VT, Imm, Shift);
262 }
263
264 template <MVT::SimpleValueType VT, bool Invert = false>
265 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
266 return SelectSVELogicalImm(N, VT, Imm, Invert);
267 }
268
269 template <MVT::SimpleValueType VT>
270 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
271 return SelectSVEArithImm(N, VT, Imm);
272 }
273
274 template <unsigned Low, unsigned High, bool AllowSaturation = false>
275 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
276 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
277 }
278
279 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
280 if (N->getOpcode() != ISD::SPLAT_VECTOR)
281 return false;
282
283 EVT EltVT = N->getValueType(0).getVectorElementType();
284 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
285 /* High */ EltVT.getFixedSizeInBits(),
286 /* AllowSaturation */ true, Imm);
287 }
288
289 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
290 template<signed Min, signed Max, signed Scale, bool Shift>
291 bool SelectCntImm(SDValue N, SDValue &Imm) {
292 if (!isa<ConstantSDNode>(N))
293 return false;
294
295 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
296 if (Shift)
297 MulImm = 1LL << MulImm;
298
299 if ((MulImm % std::abs(Scale)) != 0)
300 return false;
301
302 MulImm /= Scale;
303 if ((MulImm >= Min) && (MulImm <= Max)) {
304 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
305 return true;
306 }
307
308 return false;
309 }
310
311 template <signed Max, signed Scale>
312 bool SelectEXTImm(SDValue N, SDValue &Imm) {
313 if (!isa<ConstantSDNode>(N))
314 return false;
315
316 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
317
318 if (MulImm >= 0 && MulImm <= Max) {
319 MulImm *= Scale;
320 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
321 return true;
322 }
323
324 return false;
325 }
326
327 template <unsigned BaseReg, unsigned Max>
328 bool ImmToReg(SDValue N, SDValue &Imm) {
329 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
330 uint64_t C = CI->getZExtValue();
331
332 if (C > Max)
333 return false;
334
335 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
336 return true;
337 }
338 return false;
339 }
340
341 /// Form sequences of consecutive 64/128-bit registers for use in NEON
342 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
343 /// between 1 and 4 elements. If it contains a single element that is returned
344 /// unchanged; otherwise a REG_SEQUENCE value is returned.
347 // Form a sequence of SVE registers for instructions using list of vectors,
348 // e.g. structured loads and stores (ldN, stN).
349 SDValue createZTuple(ArrayRef<SDValue> Vecs);
350
351 // Similar to above, except the register must start at a multiple of the
352 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
353 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
354
355 /// Generic helper for the createDTuple/createQTuple
356 /// functions. Those should almost always be called instead.
357 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
358 const unsigned SubRegs[]);
359
360 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
361
362 bool tryIndexedLoad(SDNode *N);
363
364 void SelectPtrauthAuth(SDNode *N);
365 void SelectPtrauthResign(SDNode *N);
366
367 bool trySelectStackSlotTagP(SDNode *N);
368 void SelectTagP(SDNode *N);
369
370 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
371 unsigned SubRegIdx);
372 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
373 unsigned SubRegIdx);
374 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
375 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
376 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
377 unsigned Opc_rr, unsigned Opc_ri,
378 bool IsIntr = false);
379 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
380 unsigned Scale, unsigned Opc_ri,
381 unsigned Opc_rr);
382 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
383 bool IsZmMulti, unsigned Opcode,
384 bool HasPred = false);
385 void SelectPExtPair(SDNode *N, unsigned Opc);
386 void SelectWhilePair(SDNode *N, unsigned Opc);
387 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
388 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
389 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
390 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
391 bool IsTupleInput, unsigned Opc);
392 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
393
394 template <unsigned MaxIdx, unsigned Scale>
395 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
396 unsigned Op);
397 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
398 unsigned Op, unsigned MaxIdx, unsigned Scale,
399 unsigned BaseReg = 0);
400 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
401 /// SVE Reg+Imm addressing mode.
402 template <int64_t Min, int64_t Max>
403 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
404 SDValue &OffImm);
405 /// SVE Reg+Reg address mode.
406 template <unsigned Scale>
407 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
408 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
409 }
410
411 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
412 unsigned Opc, uint32_t MaxImm);
413
414 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
415
416 template <unsigned MaxIdx, unsigned Scale>
417 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
418 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
419 }
420
421 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
422 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
423 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
424 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
426 unsigned Opc_rr, unsigned Opc_ri);
427 std::tuple<unsigned, SDValue, SDValue>
428 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
429 const SDValue &OldBase, const SDValue &OldOffset,
430 unsigned Scale);
431
432 bool tryBitfieldExtractOp(SDNode *N);
433 bool tryBitfieldExtractOpFromSExt(SDNode *N);
434 bool tryBitfieldInsertOp(SDNode *N);
435 bool tryBitfieldInsertInZeroOp(SDNode *N);
436 bool tryShiftAmountMod(SDNode *N);
437
438 bool tryReadRegister(SDNode *N);
439 bool tryWriteRegister(SDNode *N);
440
441 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
442 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
443
444 bool trySelectXAR(SDNode *N);
445
446// Include the pieces autogenerated from the target description.
447#include "AArch64GenDAGISel.inc"
448
449private:
450 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
451 SDValue &Shift);
452 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
453 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
454 SDValue &OffImm) {
455 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
456 }
457 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
458 unsigned Size, SDValue &Base,
459 SDValue &OffImm);
460 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
461 SDValue &OffImm);
462 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
463 SDValue &OffImm);
464 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
465 SDValue &Offset, SDValue &SignExtend,
466 SDValue &DoShift);
467 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
471 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
472 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
473 SDValue &Offset, SDValue &SignExtend);
474
475 template<unsigned RegWidth>
476 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
477 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
478 }
479
480 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
481
482 template<unsigned RegWidth>
483 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
484 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
485 }
486
487 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
488 unsigned Width);
489
490 bool SelectCMP_SWAP(SDNode *N);
491
492 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
493 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
494 bool Negate);
495 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
496 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
497
498 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
499 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
500 bool AllowSaturation, SDValue &Imm);
501
502 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
503 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
504 SDValue &Offset);
505 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
506 SDValue &Offset, unsigned Scale = 1);
507
508 bool SelectAllActivePredicate(SDValue N);
509 bool SelectAnyPredicate(SDValue N);
510
511 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
512};
513
514class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
515public:
516 static char ID;
517 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
518 CodeGenOptLevel OptLevel)
520 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
521};
522} // end anonymous namespace
523
524char AArch64DAGToDAGISelLegacy::ID = 0;
525
526INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
527
528/// isIntImmediate - This method tests to see if the node is a constant
529/// operand. If so Imm will receive the 32-bit value.
530static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
531 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
532 Imm = C->getZExtValue();
533 return true;
534 }
535 return false;
536}
537
538// isIntImmediate - This method tests to see if a constant operand.
539// If so Imm will receive the value.
540static bool isIntImmediate(SDValue N, uint64_t &Imm) {
541 return isIntImmediate(N.getNode(), Imm);
542}
543
544// isOpcWithIntImmediate - This method tests to see if the node is a specific
545// opcode and that it has a immediate integer right operand.
546// If so Imm will receive the 32 bit value.
547static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
548 uint64_t &Imm) {
549 return N->getOpcode() == Opc &&
550 isIntImmediate(N->getOperand(1).getNode(), Imm);
551}
552
553// isIntImmediateEq - This method tests to see if N is a constant operand that
554// is equivalent to 'ImmExpected'.
555#ifndef NDEBUG
556static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
557 uint64_t Imm;
558 if (!isIntImmediate(N.getNode(), Imm))
559 return false;
560 return Imm == ImmExpected;
561}
562#endif
563
564bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
565 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
566 std::vector<SDValue> &OutOps) {
567 switch(ConstraintID) {
568 default:
569 llvm_unreachable("Unexpected asm memory constraint");
570 case InlineAsm::ConstraintCode::m:
571 case InlineAsm::ConstraintCode::o:
572 case InlineAsm::ConstraintCode::Q:
573 // We need to make sure that this one operand does not end up in XZR, thus
574 // require the address to be in a PointerRegClass register.
575 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
576 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
577 SDLoc dl(Op);
578 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
579 SDValue NewOp =
580 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
581 dl, Op.getValueType(),
582 Op, RC), 0);
583 OutOps.push_back(NewOp);
584 return false;
585 }
586 return true;
587}
588
589/// SelectArithImmed - Select an immediate value that can be represented as
590/// a 12-bit value shifted left by either 0 or 12. If so, return true with
591/// Val set to the 12-bit value and Shift set to the shifter operand.
592bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
593 SDValue &Shift) {
594 // This function is called from the addsub_shifted_imm ComplexPattern,
595 // which lists [imm] as the list of opcode it's interested in, however
596 // we still need to check whether the operand is actually an immediate
597 // here because the ComplexPattern opcode list is only used in
598 // root-level opcode matching.
599 if (!isa<ConstantSDNode>(N.getNode()))
600 return false;
601
602 uint64_t Immed = N.getNode()->getAsZExtVal();
603 unsigned ShiftAmt;
604
605 if (Immed >> 12 == 0) {
606 ShiftAmt = 0;
607 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
608 ShiftAmt = 12;
609 Immed = Immed >> 12;
610 } else
611 return false;
612
613 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
614 SDLoc dl(N);
615 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
616 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
617 return true;
618}
619
620/// SelectNegArithImmed - As above, but negates the value before trying to
621/// select it.
622bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
623 SDValue &Shift) {
624 // This function is called from the addsub_shifted_imm ComplexPattern,
625 // which lists [imm] as the list of opcode it's interested in, however
626 // we still need to check whether the operand is actually an immediate
627 // here because the ComplexPattern opcode list is only used in
628 // root-level opcode matching.
629 if (!isa<ConstantSDNode>(N.getNode()))
630 return false;
631
632 // The immediate operand must be a 24-bit zero-extended immediate.
633 uint64_t Immed = N.getNode()->getAsZExtVal();
634
635 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
636 // have the opposite effect on the C flag, so this pattern mustn't match under
637 // those circumstances.
638 if (Immed == 0)
639 return false;
640
641 if (N.getValueType() == MVT::i32)
642 Immed = ~((uint32_t)Immed) + 1;
643 else
644 Immed = ~Immed + 1ULL;
645 if (Immed & 0xFFFFFFFFFF000000ULL)
646 return false;
647
648 Immed &= 0xFFFFFFULL;
649 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
650 Shift);
651}
652
653/// getShiftTypeForNode - Translate a shift node to the corresponding
654/// ShiftType value.
656 switch (N.getOpcode()) {
657 default:
659 case ISD::SHL:
660 return AArch64_AM::LSL;
661 case ISD::SRL:
662 return AArch64_AM::LSR;
663 case ISD::SRA:
664 return AArch64_AM::ASR;
665 case ISD::ROTR:
666 return AArch64_AM::ROR;
667 }
668}
669
671 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
672}
673
674/// Determine whether it is worth it to fold SHL into the addressing
675/// mode.
677 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
678 // It is worth folding logical shift of up to three places.
679 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
680 if (!CSD)
681 return false;
682 unsigned ShiftVal = CSD->getZExtValue();
683 if (ShiftVal > 3)
684 return false;
685
686 // Check if this particular node is reused in any non-memory related
687 // operation. If yes, do not try to fold this node into the address
688 // computation, since the computation will be kept.
689 const SDNode *Node = V.getNode();
690 for (SDNode *UI : Node->users())
691 if (!isMemOpOrPrefetch(UI))
692 for (SDNode *UII : UI->users())
693 if (!isMemOpOrPrefetch(UII))
694 return false;
695 return true;
696}
697
698/// Determine whether it is worth to fold V into an extended register addressing
699/// mode.
700bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
701 // Trivial if we are optimizing for code size or if there is only
702 // one use of the value.
703 if (CurDAG->shouldOptForSize() || V.hasOneUse())
704 return true;
705
706 // If a subtarget has a slow shift, folding a shift into multiple loads
707 // costs additional micro-ops.
708 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
709 return false;
710
711 // Check whether we're going to emit the address arithmetic anyway because
712 // it's used by a non-address operation.
713 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
714 return true;
715 if (V.getOpcode() == ISD::ADD) {
716 const SDValue LHS = V.getOperand(0);
717 const SDValue RHS = V.getOperand(1);
718 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
719 return true;
720 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
721 return true;
722 }
723
724 // It hurts otherwise, since the value will be reused.
725 return false;
726}
727
728/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
729/// to select more shifted register
730bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
731 SDValue &Shift) {
732 EVT VT = N.getValueType();
733 if (VT != MVT::i32 && VT != MVT::i64)
734 return false;
735
736 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
737 return false;
738 SDValue LHS = N.getOperand(0);
739 if (!LHS->hasOneUse())
740 return false;
741
742 unsigned LHSOpcode = LHS->getOpcode();
743 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
744 return false;
745
746 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
747 if (!ShiftAmtNode)
748 return false;
749
750 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
751 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
752 if (!RHSC)
753 return false;
754
755 APInt AndMask = RHSC->getAPIntValue();
756 unsigned LowZBits, MaskLen;
757 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
758 return false;
759
760 unsigned BitWidth = N.getValueSizeInBits();
761 SDLoc DL(LHS);
762 uint64_t NewShiftC;
763 unsigned NewShiftOp;
764 if (LHSOpcode == ISD::SHL) {
765 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
766 // BitWidth != LowZBits + MaskLen doesn't match the pattern
767 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
768 return false;
769
770 NewShiftC = LowZBits - ShiftAmtC;
771 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
772 } else {
773 if (LowZBits == 0)
774 return false;
775
776 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
777 NewShiftC = LowZBits + ShiftAmtC;
778 if (NewShiftC >= BitWidth)
779 return false;
780
781 // SRA need all high bits
782 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
783 return false;
784
785 // SRL high bits can be 0 or 1
786 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
787 return false;
788
789 if (LHSOpcode == ISD::SRL)
790 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
791 else
792 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
793 }
794
795 assert(NewShiftC < BitWidth && "Invalid shift amount");
796 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
797 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
798 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
799 NewShiftAmt, BitWidthMinus1),
800 0);
801 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
802 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
803 return true;
804}
805
806/// getExtendTypeForNode - Translate an extend node to the corresponding
807/// ExtendType value.
809getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
810 if (N.getOpcode() == ISD::SIGN_EXTEND ||
811 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
812 EVT SrcVT;
813 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
814 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
815 else
816 SrcVT = N.getOperand(0).getValueType();
817
818 if (!IsLoadStore && SrcVT == MVT::i8)
819 return AArch64_AM::SXTB;
820 else if (!IsLoadStore && SrcVT == MVT::i16)
821 return AArch64_AM::SXTH;
822 else if (SrcVT == MVT::i32)
823 return AArch64_AM::SXTW;
824 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
825
827 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
828 N.getOpcode() == ISD::ANY_EXTEND) {
829 EVT SrcVT = N.getOperand(0).getValueType();
830 if (!IsLoadStore && SrcVT == MVT::i8)
831 return AArch64_AM::UXTB;
832 else if (!IsLoadStore && SrcVT == MVT::i16)
833 return AArch64_AM::UXTH;
834 else if (SrcVT == MVT::i32)
835 return AArch64_AM::UXTW;
836 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
837
839 } else if (N.getOpcode() == ISD::AND) {
840 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
841 if (!CSD)
843 uint64_t AndMask = CSD->getZExtValue();
844
845 switch (AndMask) {
846 default:
848 case 0xFF:
849 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
850 case 0xFFFF:
851 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
852 case 0xFFFFFFFF:
853 return AArch64_AM::UXTW;
854 }
855 }
856
858}
859
860/// Determine whether it is worth to fold V into an extended register of an
861/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
862/// instruction, and the shift should be treated as worth folding even if has
863/// multiple uses.
864bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
865 // Trivial if we are optimizing for code size or if there is only
866 // one use of the value.
867 if (CurDAG->shouldOptForSize() || V.hasOneUse())
868 return true;
869
870 // If a subtarget has a fastpath LSL we can fold a logical shift into
871 // the add/sub and save a cycle.
872 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
873 V.getConstantOperandVal(1) <= 4 &&
875 return true;
876
877 // It hurts otherwise, since the value will be reused.
878 return false;
879}
880
881/// SelectShiftedRegister - Select a "shifted register" operand. If the value
882/// is not shifted, set the Shift operand to default of "LSL 0". The logical
883/// instructions allow the shifted register to be rotated, but the arithmetic
884/// instructions do not. The AllowROR parameter specifies whether ROR is
885/// supported.
886bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
887 SDValue &Reg, SDValue &Shift) {
888 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
889 return true;
890
892 if (ShType == AArch64_AM::InvalidShiftExtend)
893 return false;
894 if (!AllowROR && ShType == AArch64_AM::ROR)
895 return false;
896
897 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
898 unsigned BitSize = N.getValueSizeInBits();
899 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
900 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
901
902 Reg = N.getOperand(0);
903 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
904 return isWorthFoldingALU(N, true);
905 }
906
907 return false;
908}
909
910/// Instructions that accept extend modifiers like UXTW expect the register
911/// being extended to be a GPR32, but the incoming DAG might be acting on a
912/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
913/// this is the case.
915 if (N.getValueType() == MVT::i32)
916 return N;
917
918 SDLoc dl(N);
919 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
920}
921
922// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
923template<signed Low, signed High, signed Scale>
924bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
925 if (!isa<ConstantSDNode>(N))
926 return false;
927
928 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
929 if ((MulImm % std::abs(Scale)) == 0) {
930 int64_t RDVLImm = MulImm / Scale;
931 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
932 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
933 return true;
934 }
935 }
936
937 return false;
938}
939
940/// SelectArithExtendedRegister - Select a "extended register" operand. This
941/// operand folds in an extend followed by an optional left shift.
942bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
943 SDValue &Shift) {
944 unsigned ShiftVal = 0;
946
947 if (N.getOpcode() == ISD::SHL) {
948 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
949 if (!CSD)
950 return false;
951 ShiftVal = CSD->getZExtValue();
952 if (ShiftVal > 4)
953 return false;
954
955 Ext = getExtendTypeForNode(N.getOperand(0));
957 return false;
958
959 Reg = N.getOperand(0).getOperand(0);
960 } else {
963 return false;
964
965 Reg = N.getOperand(0);
966
967 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
968 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
969 auto isDef32 = [](SDValue N) {
970 unsigned Opc = N.getOpcode();
971 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
974 Opc != ISD::FREEZE;
975 };
976 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
977 isDef32(Reg))
978 return false;
979 }
980
981 // AArch64 mandates that the RHS of the operation must use the smallest
982 // register class that could contain the size being extended from. Thus,
983 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
984 // there might not be an actual 32-bit value in the program. We can
985 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
986 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
987 Reg = narrowIfNeeded(CurDAG, Reg);
988 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
989 MVT::i32);
990 return isWorthFoldingALU(N);
991}
992
993/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
994/// operand is referred by the instructions have SP operand
995bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
996 SDValue &Shift) {
997 unsigned ShiftVal = 0;
999
1000 if (N.getOpcode() != ISD::SHL)
1001 return false;
1002
1003 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1004 if (!CSD)
1005 return false;
1006 ShiftVal = CSD->getZExtValue();
1007 if (ShiftVal > 4)
1008 return false;
1009
1011 Reg = N.getOperand(0);
1012 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1013 MVT::i32);
1014 return isWorthFoldingALU(N);
1015}
1016
1017/// If there's a use of this ADDlow that's not itself a load/store then we'll
1018/// need to create a real ADD instruction from it anyway and there's no point in
1019/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1020/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1021/// leads to duplicated ADRP instructions.
1023 for (auto *User : N->users()) {
1024 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1025 User->getOpcode() != ISD::ATOMIC_LOAD &&
1026 User->getOpcode() != ISD::ATOMIC_STORE)
1027 return false;
1028
1029 // ldar and stlr have much more restrictive addressing modes (just a
1030 // register).
1031 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1032 return false;
1033 }
1034
1035 return true;
1036}
1037
1038/// Check if the immediate offset is valid as a scaled immediate.
1039static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1040 unsigned Size) {
1041 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1042 Offset < (Range << Log2_32(Size)))
1043 return true;
1044 return false;
1045}
1046
1047/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1048/// immediate" address. The "Size" argument is the size in bytes of the memory
1049/// reference, which determines the scale.
1050bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1051 unsigned BW, unsigned Size,
1052 SDValue &Base,
1053 SDValue &OffImm) {
1054 SDLoc dl(N);
1055 const DataLayout &DL = CurDAG->getDataLayout();
1056 const TargetLowering *TLI = getTargetLowering();
1057 if (N.getOpcode() == ISD::FrameIndex) {
1058 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1059 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1060 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1061 return true;
1062 }
1063
1064 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1065 // selected here doesn't support labels/immediates, only base+offset.
1066 if (CurDAG->isBaseWithConstantOffset(N)) {
1067 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1068 if (IsSignedImm) {
1069 int64_t RHSC = RHS->getSExtValue();
1070 unsigned Scale = Log2_32(Size);
1071 int64_t Range = 0x1LL << (BW - 1);
1072
1073 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1074 RHSC < (Range << Scale)) {
1075 Base = N.getOperand(0);
1076 if (Base.getOpcode() == ISD::FrameIndex) {
1077 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1078 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1079 }
1080 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1081 return true;
1082 }
1083 } else {
1084 // unsigned Immediate
1085 uint64_t RHSC = RHS->getZExtValue();
1086 unsigned Scale = Log2_32(Size);
1087 uint64_t Range = 0x1ULL << BW;
1088
1089 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1090 Base = N.getOperand(0);
1091 if (Base.getOpcode() == ISD::FrameIndex) {
1092 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1093 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1094 }
1095 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1096 return true;
1097 }
1098 }
1099 }
1100 }
1101 // Base only. The address will be materialized into a register before
1102 // the memory is accessed.
1103 // add x0, Xbase, #offset
1104 // stp x1, x2, [x0]
1105 Base = N;
1106 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1107 return true;
1108}
1109
1110/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1111/// immediate" address. The "Size" argument is the size in bytes of the memory
1112/// reference, which determines the scale.
1113bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1114 SDValue &Base, SDValue &OffImm) {
1115 SDLoc dl(N);
1116 const DataLayout &DL = CurDAG->getDataLayout();
1117 const TargetLowering *TLI = getTargetLowering();
1118 if (N.getOpcode() == ISD::FrameIndex) {
1119 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1120 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1121 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1122 return true;
1123 }
1124
1125 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1126 GlobalAddressSDNode *GAN =
1127 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1128 Base = N.getOperand(0);
1129 OffImm = N.getOperand(1);
1130 if (!GAN)
1131 return true;
1132
1133 if (GAN->getOffset() % Size == 0 &&
1135 return true;
1136 }
1137
1138 if (CurDAG->isBaseWithConstantOffset(N)) {
1139 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1140 int64_t RHSC = (int64_t)RHS->getZExtValue();
1141 unsigned Scale = Log2_32(Size);
1142 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1143 Base = N.getOperand(0);
1144 if (Base.getOpcode() == ISD::FrameIndex) {
1145 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1146 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1147 }
1148 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1149 return true;
1150 }
1151 }
1152 }
1153
1154 // Before falling back to our general case, check if the unscaled
1155 // instructions can handle this. If so, that's preferable.
1156 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1157 return false;
1158
1159 // Base only. The address will be materialized into a register before
1160 // the memory is accessed.
1161 // add x0, Xbase, #offset
1162 // ldr x0, [x0]
1163 Base = N;
1164 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1165 return true;
1166}
1167
1168/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1169/// immediate" address. This should only match when there is an offset that
1170/// is not valid for a scaled immediate addressing mode. The "Size" argument
1171/// is the size in bytes of the memory reference, which is needed here to know
1172/// what is valid for a scaled immediate.
1173bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1174 SDValue &Base,
1175 SDValue &OffImm) {
1176 if (!CurDAG->isBaseWithConstantOffset(N))
1177 return false;
1178 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1179 int64_t RHSC = RHS->getSExtValue();
1180 if (RHSC >= -256 && RHSC < 256) {
1181 Base = N.getOperand(0);
1182 if (Base.getOpcode() == ISD::FrameIndex) {
1183 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1184 const TargetLowering *TLI = getTargetLowering();
1185 Base = CurDAG->getTargetFrameIndex(
1186 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1187 }
1188 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1189 return true;
1190 }
1191 }
1192 return false;
1193}
1194
1196 SDLoc dl(N);
1197 SDValue ImpDef = SDValue(
1198 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1199 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1200 N);
1201}
1202
1203/// Check if the given SHL node (\p N), can be used to form an
1204/// extended register for an addressing mode.
1205bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1206 bool WantExtend, SDValue &Offset,
1207 SDValue &SignExtend) {
1208 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1209 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1210 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1211 return false;
1212
1213 SDLoc dl(N);
1214 if (WantExtend) {
1216 getExtendTypeForNode(N.getOperand(0), true);
1218 return false;
1219
1220 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1221 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1222 MVT::i32);
1223 } else {
1224 Offset = N.getOperand(0);
1225 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1226 }
1227
1228 unsigned LegalShiftVal = Log2_32(Size);
1229 unsigned ShiftVal = CSD->getZExtValue();
1230
1231 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1232 return false;
1233
1234 return isWorthFoldingAddr(N, Size);
1235}
1236
1237bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1239 SDValue &SignExtend,
1240 SDValue &DoShift) {
1241 if (N.getOpcode() != ISD::ADD)
1242 return false;
1243 SDValue LHS = N.getOperand(0);
1244 SDValue RHS = N.getOperand(1);
1245 SDLoc dl(N);
1246
1247 // We don't want to match immediate adds here, because they are better lowered
1248 // to the register-immediate addressing modes.
1249 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1250 return false;
1251
1252 // Check if this particular node is reused in any non-memory related
1253 // operation. If yes, do not try to fold this node into the address
1254 // computation, since the computation will be kept.
1255 const SDNode *Node = N.getNode();
1256 for (SDNode *UI : Node->users()) {
1257 if (!isMemOpOrPrefetch(UI))
1258 return false;
1259 }
1260
1261 // Remember if it is worth folding N when it produces extended register.
1262 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1263
1264 // Try to match a shifted extend on the RHS.
1265 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1266 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1267 Base = LHS;
1268 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1269 return true;
1270 }
1271
1272 // Try to match a shifted extend on the LHS.
1273 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1274 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1275 Base = RHS;
1276 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1277 return true;
1278 }
1279
1280 // There was no shift, whatever else we find.
1281 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1282
1284 // Try to match an unshifted extend on the LHS.
1285 if (IsExtendedRegisterWorthFolding &&
1286 (Ext = getExtendTypeForNode(LHS, true)) !=
1288 Base = RHS;
1289 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1290 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1291 MVT::i32);
1292 if (isWorthFoldingAddr(LHS, Size))
1293 return true;
1294 }
1295
1296 // Try to match an unshifted extend on the RHS.
1297 if (IsExtendedRegisterWorthFolding &&
1298 (Ext = getExtendTypeForNode(RHS, true)) !=
1300 Base = LHS;
1301 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1302 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1303 MVT::i32);
1304 if (isWorthFoldingAddr(RHS, Size))
1305 return true;
1306 }
1307
1308 return false;
1309}
1310
1311// Check if the given immediate is preferred by ADD. If an immediate can be
1312// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1313// encoded by one MOVZ, return true.
1314static bool isPreferredADD(int64_t ImmOff) {
1315 // Constant in [0x0, 0xfff] can be encoded in ADD.
1316 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1317 return true;
1318 // Check if it can be encoded in an "ADD LSL #12".
1319 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1320 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1321 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1322 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1323 return false;
1324}
1325
1326bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1328 SDValue &SignExtend,
1329 SDValue &DoShift) {
1330 if (N.getOpcode() != ISD::ADD)
1331 return false;
1332 SDValue LHS = N.getOperand(0);
1333 SDValue RHS = N.getOperand(1);
1334 SDLoc DL(N);
1335
1336 // Check if this particular node is reused in any non-memory related
1337 // operation. If yes, do not try to fold this node into the address
1338 // computation, since the computation will be kept.
1339 const SDNode *Node = N.getNode();
1340 for (SDNode *UI : Node->users()) {
1341 if (!isMemOpOrPrefetch(UI))
1342 return false;
1343 }
1344
1345 // Watch out if RHS is a wide immediate, it can not be selected into
1346 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1347 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1348 // instructions like:
1349 // MOV X0, WideImmediate
1350 // ADD X1, BaseReg, X0
1351 // LDR X2, [X1, 0]
1352 // For such situation, using [BaseReg, XReg] addressing mode can save one
1353 // ADD/SUB:
1354 // MOV X0, WideImmediate
1355 // LDR X2, [BaseReg, X0]
1356 if (isa<ConstantSDNode>(RHS)) {
1357 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1358 // Skip the immediate can be selected by load/store addressing mode.
1359 // Also skip the immediate can be encoded by a single ADD (SUB is also
1360 // checked by using -ImmOff).
1361 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1362 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1363 return false;
1364
1365 SDValue Ops[] = { RHS };
1366 SDNode *MOVI =
1367 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1368 SDValue MOVIV = SDValue(MOVI, 0);
1369 // This ADD of two X register will be selected into [Reg+Reg] mode.
1370 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1371 }
1372
1373 // Remember if it is worth folding N when it produces extended register.
1374 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1375
1376 // Try to match a shifted extend on the RHS.
1377 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1378 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1379 Base = LHS;
1380 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1381 return true;
1382 }
1383
1384 // Try to match a shifted extend on the LHS.
1385 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1386 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1387 Base = RHS;
1388 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1389 return true;
1390 }
1391
1392 // Match any non-shifted, non-extend, non-immediate add expression.
1393 Base = LHS;
1394 Offset = RHS;
1395 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1396 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1397 // Reg1 + Reg2 is free: no check needed.
1398 return true;
1399}
1400
1401SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1402 static const unsigned RegClassIDs[] = {
1403 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1404 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1405 AArch64::dsub2, AArch64::dsub3};
1406
1407 return createTuple(Regs, RegClassIDs, SubRegs);
1408}
1409
1410SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1411 static const unsigned RegClassIDs[] = {
1412 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1413 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1414 AArch64::qsub2, AArch64::qsub3};
1415
1416 return createTuple(Regs, RegClassIDs, SubRegs);
1417}
1418
1419SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1420 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1421 AArch64::ZPR3RegClassID,
1422 AArch64::ZPR4RegClassID};
1423 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1424 AArch64::zsub2, AArch64::zsub3};
1425
1426 return createTuple(Regs, RegClassIDs, SubRegs);
1427}
1428
1429SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1430 assert(Regs.size() == 2 || Regs.size() == 4);
1431
1432 // The createTuple interface requires 3 RegClassIDs for each possible
1433 // tuple type even though we only have them for ZPR2 and ZPR4.
1434 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1435 AArch64::ZPR4Mul4RegClassID};
1436 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1437 AArch64::zsub2, AArch64::zsub3};
1438 return createTuple(Regs, RegClassIDs, SubRegs);
1439}
1440
1441SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1442 const unsigned RegClassIDs[],
1443 const unsigned SubRegs[]) {
1444 // There's no special register-class for a vector-list of 1 element: it's just
1445 // a vector.
1446 if (Regs.size() == 1)
1447 return Regs[0];
1448
1449 assert(Regs.size() >= 2 && Regs.size() <= 4);
1450
1451 SDLoc DL(Regs[0]);
1452
1454
1455 // First operand of REG_SEQUENCE is the desired RegClass.
1456 Ops.push_back(
1457 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1458
1459 // Then we get pairs of source & subregister-position for the components.
1460 for (unsigned i = 0; i < Regs.size(); ++i) {
1461 Ops.push_back(Regs[i]);
1462 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1463 }
1464
1465 SDNode *N =
1466 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1467 return SDValue(N, 0);
1468}
1469
1470void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1471 bool isExt) {
1472 SDLoc dl(N);
1473 EVT VT = N->getValueType(0);
1474
1475 unsigned ExtOff = isExt;
1476
1477 // Form a REG_SEQUENCE to force register allocation.
1478 unsigned Vec0Off = ExtOff + 1;
1479 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1480 SDValue RegSeq = createQTuple(Regs);
1481
1483 if (isExt)
1484 Ops.push_back(N->getOperand(1));
1485 Ops.push_back(RegSeq);
1486 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1487 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1488}
1489
1490static std::tuple<SDValue, SDValue>
1492 SDLoc DL(Disc);
1493 SDValue AddrDisc;
1494 SDValue ConstDisc;
1495
1496 // If this is a blend, remember the constant and address discriminators.
1497 // Otherwise, it's either a constant discriminator, or a non-blended
1498 // address discriminator.
1499 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1500 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1501 AddrDisc = Disc->getOperand(1);
1502 ConstDisc = Disc->getOperand(2);
1503 } else {
1504 ConstDisc = Disc;
1505 }
1506
1507 // If the constant discriminator (either the blend RHS, or the entire
1508 // discriminator value) isn't a 16-bit constant, bail out, and let the
1509 // discriminator be computed separately.
1510 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1511 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1512 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1513
1514 // If there's no address discriminator, use XZR directly.
1515 if (!AddrDisc)
1516 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1517
1518 return std::make_tuple(
1519 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1520 AddrDisc);
1521}
1522
1523void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1524 SDLoc DL(N);
1525 // IntrinsicID is operand #0
1526 SDValue Val = N->getOperand(1);
1527 SDValue AUTKey = N->getOperand(2);
1528 SDValue AUTDisc = N->getOperand(3);
1529
1530 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1531 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1532
1533 SDValue AUTAddrDisc, AUTConstDisc;
1534 std::tie(AUTConstDisc, AUTAddrDisc) =
1535 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1536
1537 if (!Subtarget->isX16X17Safer()) {
1538 SDValue Ops[] = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1539
1540 SDNode *AUT =
1541 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1542 ReplaceNode(N, AUT);
1543 } else {
1544 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1545 AArch64::X16, Val, SDValue());
1546 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1547
1548 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1549 ReplaceNode(N, AUT);
1550 }
1551}
1552
1553void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1554 SDLoc DL(N);
1555 // IntrinsicID is operand #0
1556 SDValue Val = N->getOperand(1);
1557 SDValue AUTKey = N->getOperand(2);
1558 SDValue AUTDisc = N->getOperand(3);
1559 SDValue PACKey = N->getOperand(4);
1560 SDValue PACDisc = N->getOperand(5);
1561
1562 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1563 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1564
1565 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1566 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1567
1568 SDValue AUTAddrDisc, AUTConstDisc;
1569 std::tie(AUTConstDisc, AUTAddrDisc) =
1570 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1571
1572 SDValue PACAddrDisc, PACConstDisc;
1573 std::tie(PACConstDisc, PACAddrDisc) =
1574 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1575
1576 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1577 AArch64::X16, Val, SDValue());
1578
1579 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1580 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1581
1582 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1583 ReplaceNode(N, AUTPAC);
1584}
1585
1586bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1587 LoadSDNode *LD = cast<LoadSDNode>(N);
1588 if (LD->isUnindexed())
1589 return false;
1590 EVT VT = LD->getMemoryVT();
1591 EVT DstVT = N->getValueType(0);
1592 ISD::MemIndexedMode AM = LD->getAddressingMode();
1593 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1594 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1595 int OffsetVal = (int)OffsetOp->getZExtValue();
1596
1597 // We're not doing validity checking here. That was done when checking
1598 // if we should mark the load as indexed or not. We're just selecting
1599 // the right instruction.
1600 unsigned Opcode = 0;
1601
1602 ISD::LoadExtType ExtType = LD->getExtensionType();
1603 bool InsertTo64 = false;
1604 if (VT == MVT::i64)
1605 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1606 else if (VT == MVT::i32) {
1607 if (ExtType == ISD::NON_EXTLOAD)
1608 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1609 else if (ExtType == ISD::SEXTLOAD)
1610 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1611 else {
1612 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1613 InsertTo64 = true;
1614 // The result of the load is only i32. It's the subreg_to_reg that makes
1615 // it into an i64.
1616 DstVT = MVT::i32;
1617 }
1618 } else if (VT == MVT::i16) {
1619 if (ExtType == ISD::SEXTLOAD) {
1620 if (DstVT == MVT::i64)
1621 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1622 else
1623 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1624 } else {
1625 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1626 InsertTo64 = DstVT == MVT::i64;
1627 // The result of the load is only i32. It's the subreg_to_reg that makes
1628 // it into an i64.
1629 DstVT = MVT::i32;
1630 }
1631 } else if (VT == MVT::i8) {
1632 if (ExtType == ISD::SEXTLOAD) {
1633 if (DstVT == MVT::i64)
1634 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1635 else
1636 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1637 } else {
1638 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1639 InsertTo64 = DstVT == MVT::i64;
1640 // The result of the load is only i32. It's the subreg_to_reg that makes
1641 // it into an i64.
1642 DstVT = MVT::i32;
1643 }
1644 } else if (VT == MVT::f16) {
1645 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1646 } else if (VT == MVT::bf16) {
1647 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1648 } else if (VT == MVT::f32) {
1649 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1650 } else if (VT == MVT::f64 ||
1651 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1652 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1653 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1654 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1655 } else if (VT.is64BitVector()) {
1656 if (IsPre || OffsetVal != 8)
1657 return false;
1658 switch (VT.getScalarSizeInBits()) {
1659 case 8:
1660 Opcode = AArch64::LD1Onev8b_POST;
1661 break;
1662 case 16:
1663 Opcode = AArch64::LD1Onev4h_POST;
1664 break;
1665 case 32:
1666 Opcode = AArch64::LD1Onev2s_POST;
1667 break;
1668 case 64:
1669 Opcode = AArch64::LD1Onev1d_POST;
1670 break;
1671 default:
1672 llvm_unreachable("Expected vector element to be a power of 2");
1673 }
1674 } else if (VT.is128BitVector()) {
1675 if (IsPre || OffsetVal != 16)
1676 return false;
1677 switch (VT.getScalarSizeInBits()) {
1678 case 8:
1679 Opcode = AArch64::LD1Onev16b_POST;
1680 break;
1681 case 16:
1682 Opcode = AArch64::LD1Onev8h_POST;
1683 break;
1684 case 32:
1685 Opcode = AArch64::LD1Onev4s_POST;
1686 break;
1687 case 64:
1688 Opcode = AArch64::LD1Onev2d_POST;
1689 break;
1690 default:
1691 llvm_unreachable("Expected vector element to be a power of 2");
1692 }
1693 } else
1694 return false;
1695 SDValue Chain = LD->getChain();
1696 SDValue Base = LD->getBasePtr();
1697 SDLoc dl(N);
1698 // LD1 encodes an immediate offset by using XZR as the offset register.
1699 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1700 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1701 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1702 SDValue Ops[] = { Base, Offset, Chain };
1703 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1704 MVT::Other, Ops);
1705
1706 // Transfer memoperands.
1707 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1708 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1709
1710 // Either way, we're replacing the node, so tell the caller that.
1711 SDValue LoadedVal = SDValue(Res, 1);
1712 if (InsertTo64) {
1713 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1714 LoadedVal =
1715 SDValue(CurDAG->getMachineNode(
1716 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1717 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1718 SubReg),
1719 0);
1720 }
1721
1722 ReplaceUses(SDValue(N, 0), LoadedVal);
1723 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1724 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1725 CurDAG->RemoveDeadNode(N);
1726 return true;
1727}
1728
1729void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1730 unsigned SubRegIdx) {
1731 SDLoc dl(N);
1732 EVT VT = N->getValueType(0);
1733 SDValue Chain = N->getOperand(0);
1734
1735 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1736 Chain};
1737
1738 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1739
1740 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1741 SDValue SuperReg = SDValue(Ld, 0);
1742 for (unsigned i = 0; i < NumVecs; ++i)
1743 ReplaceUses(SDValue(N, i),
1744 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1745
1746 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1747
1748 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1749 // because it's too simple to have needed special treatment during lowering.
1750 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1751 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1752 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1753 }
1754
1755 CurDAG->RemoveDeadNode(N);
1756}
1757
1758void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1759 unsigned Opc, unsigned SubRegIdx) {
1760 SDLoc dl(N);
1761 EVT VT = N->getValueType(0);
1762 SDValue Chain = N->getOperand(0);
1763
1764 SDValue Ops[] = {N->getOperand(1), // Mem operand
1765 N->getOperand(2), // Incremental
1766 Chain};
1767
1768 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1769 MVT::Untyped, MVT::Other};
1770
1771 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1772
1773 // Update uses of write back register
1774 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1775
1776 // Update uses of vector list
1777 SDValue SuperReg = SDValue(Ld, 1);
1778 if (NumVecs == 1)
1779 ReplaceUses(SDValue(N, 0), SuperReg);
1780 else
1781 for (unsigned i = 0; i < NumVecs; ++i)
1782 ReplaceUses(SDValue(N, i),
1783 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1784
1785 // Update the chain
1786 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1787 CurDAG->RemoveDeadNode(N);
1788}
1789
1790/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1791/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1792/// new Base and an SDValue representing the new offset.
1793std::tuple<unsigned, SDValue, SDValue>
1794AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1795 unsigned Opc_ri,
1796 const SDValue &OldBase,
1797 const SDValue &OldOffset,
1798 unsigned Scale) {
1799 SDValue NewBase = OldBase;
1800 SDValue NewOffset = OldOffset;
1801 // Detect a possible Reg+Imm addressing mode.
1802 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1803 N, OldBase, NewBase, NewOffset);
1804
1805 // Detect a possible reg+reg addressing mode, but only if we haven't already
1806 // detected a Reg+Imm one.
1807 const bool IsRegReg =
1808 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1809
1810 // Select the instruction.
1811 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1812}
1813
1814enum class SelectTypeKind {
1815 Int1 = 0,
1816 Int = 1,
1817 FP = 2,
1818 AnyType = 3,
1819};
1820
1821/// This function selects an opcode from a list of opcodes, which is
1822/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1823/// element types, in this order.
1824template <SelectTypeKind Kind>
1825static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1826 // Only match scalable vector VTs
1827 if (!VT.isScalableVector())
1828 return 0;
1829
1830 EVT EltVT = VT.getVectorElementType();
1831 unsigned Key = VT.getVectorMinNumElements();
1832 switch (Kind) {
1834 break;
1836 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1837 EltVT != MVT::i64)
1838 return 0;
1839 break;
1841 if (EltVT != MVT::i1)
1842 return 0;
1843 break;
1844 case SelectTypeKind::FP:
1845 if (EltVT == MVT::bf16)
1846 Key = 16;
1847 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1848 EltVT != MVT::f64)
1849 return 0;
1850 break;
1851 }
1852
1853 unsigned Offset;
1854 switch (Key) {
1855 case 16: // 8-bit or bf16
1856 Offset = 0;
1857 break;
1858 case 8: // 16-bit
1859 Offset = 1;
1860 break;
1861 case 4: // 32-bit
1862 Offset = 2;
1863 break;
1864 case 2: // 64-bit
1865 Offset = 3;
1866 break;
1867 default:
1868 return 0;
1869 }
1870
1871 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1872}
1873
1874// This function is almost identical to SelectWhilePair, but has an
1875// extra check on the range of the immediate operand.
1876// TODO: Merge these two functions together at some point?
1877void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1878 // Immediate can be either 0 or 1.
1879 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1880 if (Imm->getZExtValue() > 1)
1881 return;
1882
1883 SDLoc DL(N);
1884 EVT VT = N->getValueType(0);
1885 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1886 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1887 SDValue SuperReg = SDValue(WhilePair, 0);
1888
1889 for (unsigned I = 0; I < 2; ++I)
1890 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1891 AArch64::psub0 + I, DL, VT, SuperReg));
1892
1893 CurDAG->RemoveDeadNode(N);
1894}
1895
1896void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1897 SDLoc DL(N);
1898 EVT VT = N->getValueType(0);
1899
1900 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1901
1902 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1903 SDValue SuperReg = SDValue(WhilePair, 0);
1904
1905 for (unsigned I = 0; I < 2; ++I)
1906 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1907 AArch64::psub0 + I, DL, VT, SuperReg));
1908
1909 CurDAG->RemoveDeadNode(N);
1910}
1911
1912void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1913 unsigned Opcode) {
1914 EVT VT = N->getValueType(0);
1915 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1916 SDValue Ops = createZTuple(Regs);
1917 SDLoc DL(N);
1918 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1919 SDValue SuperReg = SDValue(Intrinsic, 0);
1920 for (unsigned i = 0; i < NumVecs; ++i)
1921 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1922 AArch64::zsub0 + i, DL, VT, SuperReg));
1923
1924 CurDAG->RemoveDeadNode(N);
1925}
1926
1927void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1928 unsigned Opcode) {
1929 SDLoc DL(N);
1930 EVT VT = N->getValueType(0);
1931 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1932 Ops.push_back(/*Chain*/ N->getOperand(0));
1933
1935 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1936 SDValue SuperReg = SDValue(Instruction, 0);
1937
1938 for (unsigned i = 0; i < NumVecs; ++i)
1939 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1940 AArch64::zsub0 + i, DL, VT, SuperReg));
1941
1942 // Copy chain
1943 unsigned ChainIdx = NumVecs;
1944 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1945 CurDAG->RemoveDeadNode(N);
1946}
1947
1948void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1949 unsigned NumVecs,
1950 bool IsZmMulti,
1951 unsigned Opcode,
1952 bool HasPred) {
1953 assert(Opcode != 0 && "Unexpected opcode");
1954
1955 SDLoc DL(N);
1956 EVT VT = N->getValueType(0);
1957 unsigned FirstVecIdx = HasPred ? 2 : 1;
1958
1959 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1960 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1961 return createZMulTuple(Regs);
1962 };
1963
1964 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1965
1966 SDValue Zm;
1967 if (IsZmMulti)
1968 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1969 else
1970 Zm = N->getOperand(NumVecs + FirstVecIdx);
1971
1973 if (HasPred)
1974 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1975 N->getOperand(1), Zdn, Zm);
1976 else
1977 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1978 SDValue SuperReg = SDValue(Intrinsic, 0);
1979 for (unsigned i = 0; i < NumVecs; ++i)
1980 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1981 AArch64::zsub0 + i, DL, VT, SuperReg));
1982
1983 CurDAG->RemoveDeadNode(N);
1984}
1985
1986void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1987 unsigned Scale, unsigned Opc_ri,
1988 unsigned Opc_rr, bool IsIntr) {
1989 assert(Scale < 5 && "Invalid scaling value.");
1990 SDLoc DL(N);
1991 EVT VT = N->getValueType(0);
1992 SDValue Chain = N->getOperand(0);
1993
1994 // Optimize addressing mode.
1996 unsigned Opc;
1997 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1998 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1999 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2000
2001 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2002 Base, // Memory operand
2003 Offset, Chain};
2004
2005 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2006
2007 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2008 SDValue SuperReg = SDValue(Load, 0);
2009 for (unsigned i = 0; i < NumVecs; ++i)
2010 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2011 AArch64::zsub0 + i, DL, VT, SuperReg));
2012
2013 // Copy chain
2014 unsigned ChainIdx = NumVecs;
2015 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2016 CurDAG->RemoveDeadNode(N);
2017}
2018
2019void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2020 unsigned NumVecs,
2021 unsigned Scale,
2022 unsigned Opc_ri,
2023 unsigned Opc_rr) {
2024 assert(Scale < 4 && "Invalid scaling value.");
2025 SDLoc DL(N);
2026 EVT VT = N->getValueType(0);
2027 SDValue Chain = N->getOperand(0);
2028
2029 SDValue PNg = N->getOperand(2);
2030 SDValue Base = N->getOperand(3);
2031 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2032 unsigned Opc;
2033 std::tie(Opc, Base, Offset) =
2034 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2035
2036 SDValue Ops[] = {PNg, // Predicate-as-counter
2037 Base, // Memory operand
2038 Offset, Chain};
2039
2040 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2041
2042 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2043 SDValue SuperReg = SDValue(Load, 0);
2044 for (unsigned i = 0; i < NumVecs; ++i)
2045 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2046 AArch64::zsub0 + i, DL, VT, SuperReg));
2047
2048 // Copy chain
2049 unsigned ChainIdx = NumVecs;
2050 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2051 CurDAG->RemoveDeadNode(N);
2052}
2053
2054void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2055 unsigned Opcode) {
2056 if (N->getValueType(0) != MVT::nxv4f32)
2057 return;
2058 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2059}
2060
2061void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2062 unsigned NumOutVecs,
2063 unsigned Opc,
2064 uint32_t MaxImm) {
2065 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2066 if (Imm->getZExtValue() > MaxImm)
2067 return;
2068
2069 SDValue ZtValue;
2070 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2071 return;
2072
2073 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
2074 SDLoc DL(Node);
2075 EVT VT = Node->getValueType(0);
2076
2078 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2079 SDValue SuperReg = SDValue(Instruction, 0);
2080
2081 for (unsigned I = 0; I < NumOutVecs; ++I)
2082 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2083 AArch64::zsub0 + I, DL, VT, SuperReg));
2084
2085 // Copy chain
2086 unsigned ChainIdx = NumOutVecs;
2087 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2088 CurDAG->RemoveDeadNode(Node);
2089}
2090
2091void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2092 unsigned NumOutVecs,
2093 unsigned Opc) {
2094
2095 SDValue ZtValue;
2097 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2098 return;
2099
2100 Ops.push_back(ZtValue);
2101 Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)}));
2102 SDLoc DL(Node);
2103 EVT VT = Node->getValueType(0);
2104
2106 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2107 SDValue SuperReg = SDValue(Instruction, 0);
2108
2109 for (unsigned I = 0; I < NumOutVecs; ++I)
2110 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2111 AArch64::zsub0 + I, DL, VT, SuperReg));
2112
2113 // Copy chain
2114 unsigned ChainIdx = NumOutVecs;
2115 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2116 CurDAG->RemoveDeadNode(Node);
2117}
2118
2119void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2120 unsigned Op) {
2121 SDLoc DL(N);
2122 EVT VT = N->getValueType(0);
2123
2124 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2125 SDValue Zd = createZMulTuple(Regs);
2126 SDValue Zn = N->getOperand(1 + NumVecs);
2127 SDValue Zm = N->getOperand(2 + NumVecs);
2128
2129 SDValue Ops[] = {Zd, Zn, Zm};
2130
2131 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2132 SDValue SuperReg = SDValue(Intrinsic, 0);
2133 for (unsigned i = 0; i < NumVecs; ++i)
2134 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2135 AArch64::zsub0 + i, DL, VT, SuperReg));
2136
2137 CurDAG->RemoveDeadNode(N);
2138}
2139
2140bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2141 switch (BaseReg) {
2142 default:
2143 return false;
2144 case AArch64::ZA:
2145 case AArch64::ZAB0:
2146 if (TileNum == 0)
2147 break;
2148 return false;
2149 case AArch64::ZAH0:
2150 if (TileNum <= 1)
2151 break;
2152 return false;
2153 case AArch64::ZAS0:
2154 if (TileNum <= 3)
2155 break;
2156 return false;
2157 case AArch64::ZAD0:
2158 if (TileNum <= 7)
2159 break;
2160 return false;
2161 }
2162
2163 BaseReg += TileNum;
2164 return true;
2165}
2166
2167template <unsigned MaxIdx, unsigned Scale>
2168void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2169 unsigned BaseReg, unsigned Op) {
2170 unsigned TileNum = 0;
2171 if (BaseReg != AArch64::ZA)
2172 TileNum = N->getConstantOperandVal(2);
2173
2174 if (!SelectSMETile(BaseReg, TileNum))
2175 return;
2176
2177 SDValue SliceBase, Base, Offset;
2178 if (BaseReg == AArch64::ZA)
2179 SliceBase = N->getOperand(2);
2180 else
2181 SliceBase = N->getOperand(3);
2182
2183 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2184 return;
2185
2186 SDLoc DL(N);
2187 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2188 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2189 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2190
2191 EVT VT = N->getValueType(0);
2192 for (unsigned I = 0; I < NumVecs; ++I)
2193 ReplaceUses(SDValue(N, I),
2194 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2195 SDValue(Mov, 0)));
2196 // Copy chain
2197 unsigned ChainIdx = NumVecs;
2198 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2199 CurDAG->RemoveDeadNode(N);
2200}
2201
2202void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2203 unsigned Op, unsigned MaxIdx,
2204 unsigned Scale, unsigned BaseReg) {
2205 // Slice can be in different positions
2206 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2207 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2208 SDValue SliceBase = N->getOperand(2);
2209 if (BaseReg != AArch64::ZA)
2210 SliceBase = N->getOperand(3);
2211
2213 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2214 return;
2215 // The correct Za tile number is computed in Machine Instruction
2216 // See EmitZAInstr
2217 // DAG cannot select Za tile as an output register with ZReg
2218 SDLoc DL(N);
2220 if (BaseReg != AArch64::ZA )
2221 Ops.push_back(N->getOperand(2));
2222 Ops.push_back(Base);
2223 Ops.push_back(Offset);
2224 Ops.push_back(N->getOperand(0)); //Chain
2225 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2226
2227 EVT VT = N->getValueType(0);
2228 for (unsigned I = 0; I < NumVecs; ++I)
2229 ReplaceUses(SDValue(N, I),
2230 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2231 SDValue(Mov, 0)));
2232
2233 // Copy chain
2234 unsigned ChainIdx = NumVecs;
2235 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2236 CurDAG->RemoveDeadNode(N);
2237}
2238
2239void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2240 unsigned NumOutVecs,
2241 bool IsTupleInput,
2242 unsigned Opc) {
2243 SDLoc DL(N);
2244 EVT VT = N->getValueType(0);
2245 unsigned NumInVecs = N->getNumOperands() - 1;
2246
2248 if (IsTupleInput) {
2249 assert((NumInVecs == 2 || NumInVecs == 4) &&
2250 "Don't know how to handle multi-register input!");
2251 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2252 Ops.push_back(createZMulTuple(Regs));
2253 } else {
2254 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2255 for (unsigned I = 0; I < NumInVecs; I++)
2256 Ops.push_back(N->getOperand(1 + I));
2257 }
2258
2259 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2260 SDValue SuperReg = SDValue(Res, 0);
2261
2262 for (unsigned I = 0; I < NumOutVecs; I++)
2263 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2264 AArch64::zsub0 + I, DL, VT, SuperReg));
2265 CurDAG->RemoveDeadNode(N);
2266}
2267
2268void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2269 unsigned Opc) {
2270 SDLoc dl(N);
2271 EVT VT = N->getOperand(2)->getValueType(0);
2272
2273 // Form a REG_SEQUENCE to force register allocation.
2274 bool Is128Bit = VT.getSizeInBits() == 128;
2275 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2276 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2277
2278 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2279 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2280
2281 // Transfer memoperands.
2282 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2283 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2284
2285 ReplaceNode(N, St);
2286}
2287
2288void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2289 unsigned Scale, unsigned Opc_rr,
2290 unsigned Opc_ri) {
2291 SDLoc dl(N);
2292
2293 // Form a REG_SEQUENCE to force register allocation.
2294 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2295 SDValue RegSeq = createZTuple(Regs);
2296
2297 // Optimize addressing mode.
2298 unsigned Opc;
2300 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2301 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2302 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2303
2304 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2305 Base, // address
2306 Offset, // offset
2307 N->getOperand(0)}; // chain
2308 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2309
2310 ReplaceNode(N, St);
2311}
2312
2313bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2314 SDValue &OffImm) {
2315 SDLoc dl(N);
2316 const DataLayout &DL = CurDAG->getDataLayout();
2317 const TargetLowering *TLI = getTargetLowering();
2318
2319 // Try to match it for the frame address
2320 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2321 int FI = FINode->getIndex();
2322 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2323 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2324 return true;
2325 }
2326
2327 return false;
2328}
2329
2330void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2331 unsigned Opc) {
2332 SDLoc dl(N);
2333 EVT VT = N->getOperand(2)->getValueType(0);
2334 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2335 MVT::Other}; // Type for the Chain
2336
2337 // Form a REG_SEQUENCE to force register allocation.
2338 bool Is128Bit = VT.getSizeInBits() == 128;
2339 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2340 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2341
2342 SDValue Ops[] = {RegSeq,
2343 N->getOperand(NumVecs + 1), // base register
2344 N->getOperand(NumVecs + 2), // Incremental
2345 N->getOperand(0)}; // Chain
2346 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2347
2348 ReplaceNode(N, St);
2349}
2350
2351namespace {
2352/// WidenVector - Given a value in the V64 register class, produce the
2353/// equivalent value in the V128 register class.
2354class WidenVector {
2355 SelectionDAG &DAG;
2356
2357public:
2358 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2359
2360 SDValue operator()(SDValue V64Reg) {
2361 EVT VT = V64Reg.getValueType();
2362 unsigned NarrowSize = VT.getVectorNumElements();
2363 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2364 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2365 SDLoc DL(V64Reg);
2366
2367 SDValue Undef =
2368 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2369 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2370 }
2371};
2372} // namespace
2373
2374/// NarrowVector - Given a value in the V128 register class, produce the
2375/// equivalent value in the V64 register class.
2377 EVT VT = V128Reg.getValueType();
2378 unsigned WideSize = VT.getVectorNumElements();
2379 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2380 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2381
2382 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2383 V128Reg);
2384}
2385
2386void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2387 unsigned Opc) {
2388 SDLoc dl(N);
2389 EVT VT = N->getValueType(0);
2390 bool Narrow = VT.getSizeInBits() == 64;
2391
2392 // Form a REG_SEQUENCE to force register allocation.
2393 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2394
2395 if (Narrow)
2396 transform(Regs, Regs.begin(),
2397 WidenVector(*CurDAG));
2398
2399 SDValue RegSeq = createQTuple(Regs);
2400
2401 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2402
2403 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2404
2405 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2406 N->getOperand(NumVecs + 3), N->getOperand(0)};
2407 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2408 SDValue SuperReg = SDValue(Ld, 0);
2409
2410 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2411 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2412 AArch64::qsub2, AArch64::qsub3 };
2413 for (unsigned i = 0; i < NumVecs; ++i) {
2414 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2415 if (Narrow)
2416 NV = NarrowVector(NV, *CurDAG);
2417 ReplaceUses(SDValue(N, i), NV);
2418 }
2419
2420 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2421 CurDAG->RemoveDeadNode(N);
2422}
2423
2424void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2425 unsigned Opc) {
2426 SDLoc dl(N);
2427 EVT VT = N->getValueType(0);
2428 bool Narrow = VT.getSizeInBits() == 64;
2429
2430 // Form a REG_SEQUENCE to force register allocation.
2431 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2432
2433 if (Narrow)
2434 transform(Regs, Regs.begin(),
2435 WidenVector(*CurDAG));
2436
2437 SDValue RegSeq = createQTuple(Regs);
2438
2439 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2440 RegSeq->getValueType(0), MVT::Other};
2441
2442 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2443
2444 SDValue Ops[] = {RegSeq,
2445 CurDAG->getTargetConstant(LaneNo, dl,
2446 MVT::i64), // Lane Number
2447 N->getOperand(NumVecs + 2), // Base register
2448 N->getOperand(NumVecs + 3), // Incremental
2449 N->getOperand(0)};
2450 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2451
2452 // Update uses of the write back register
2453 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2454
2455 // Update uses of the vector list
2456 SDValue SuperReg = SDValue(Ld, 1);
2457 if (NumVecs == 1) {
2458 ReplaceUses(SDValue(N, 0),
2459 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2460 } else {
2461 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2462 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2463 AArch64::qsub2, AArch64::qsub3 };
2464 for (unsigned i = 0; i < NumVecs; ++i) {
2465 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2466 SuperReg);
2467 if (Narrow)
2468 NV = NarrowVector(NV, *CurDAG);
2469 ReplaceUses(SDValue(N, i), NV);
2470 }
2471 }
2472
2473 // Update the Chain
2474 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2475 CurDAG->RemoveDeadNode(N);
2476}
2477
2478void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2479 unsigned Opc) {
2480 SDLoc dl(N);
2481 EVT VT = N->getOperand(2)->getValueType(0);
2482 bool Narrow = VT.getSizeInBits() == 64;
2483
2484 // Form a REG_SEQUENCE to force register allocation.
2485 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2486
2487 if (Narrow)
2488 transform(Regs, Regs.begin(),
2489 WidenVector(*CurDAG));
2490
2491 SDValue RegSeq = createQTuple(Regs);
2492
2493 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2494
2495 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2496 N->getOperand(NumVecs + 3), N->getOperand(0)};
2497 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2498
2499 // Transfer memoperands.
2500 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2501 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2502
2503 ReplaceNode(N, St);
2504}
2505
2506void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2507 unsigned Opc) {
2508 SDLoc dl(N);
2509 EVT VT = N->getOperand(2)->getValueType(0);
2510 bool Narrow = VT.getSizeInBits() == 64;
2511
2512 // Form a REG_SEQUENCE to force register allocation.
2513 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2514
2515 if (Narrow)
2516 transform(Regs, Regs.begin(),
2517 WidenVector(*CurDAG));
2518
2519 SDValue RegSeq = createQTuple(Regs);
2520
2521 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2522 MVT::Other};
2523
2524 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2525
2526 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2527 N->getOperand(NumVecs + 2), // Base Register
2528 N->getOperand(NumVecs + 3), // Incremental
2529 N->getOperand(0)};
2530 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2531
2532 // Transfer memoperands.
2533 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2534 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2535
2536 ReplaceNode(N, St);
2537}
2538
2540 unsigned &Opc, SDValue &Opd0,
2541 unsigned &LSB, unsigned &MSB,
2542 unsigned NumberOfIgnoredLowBits,
2543 bool BiggerPattern) {
2544 assert(N->getOpcode() == ISD::AND &&
2545 "N must be a AND operation to call this function");
2546
2547 EVT VT = N->getValueType(0);
2548
2549 // Here we can test the type of VT and return false when the type does not
2550 // match, but since it is done prior to that call in the current context
2551 // we turned that into an assert to avoid redundant code.
2552 assert((VT == MVT::i32 || VT == MVT::i64) &&
2553 "Type checking must have been done before calling this function");
2554
2555 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2556 // changed the AND node to a 32-bit mask operation. We'll have to
2557 // undo that as part of the transform here if we want to catch all
2558 // the opportunities.
2559 // Currently the NumberOfIgnoredLowBits argument helps to recover
2560 // from these situations when matching bigger pattern (bitfield insert).
2561
2562 // For unsigned extracts, check for a shift right and mask
2563 uint64_t AndImm = 0;
2564 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2565 return false;
2566
2567 const SDNode *Op0 = N->getOperand(0).getNode();
2568
2569 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2570 // simplified. Try to undo that
2571 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2572
2573 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2574 if (AndImm & (AndImm + 1))
2575 return false;
2576
2577 bool ClampMSB = false;
2578 uint64_t SrlImm = 0;
2579 // Handle the SRL + ANY_EXTEND case.
2580 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2581 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2582 // Extend the incoming operand of the SRL to 64-bit.
2583 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2584 // Make sure to clamp the MSB so that we preserve the semantics of the
2585 // original operations.
2586 ClampMSB = true;
2587 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2589 SrlImm)) {
2590 // If the shift result was truncated, we can still combine them.
2591 Opd0 = Op0->getOperand(0).getOperand(0);
2592
2593 // Use the type of SRL node.
2594 VT = Opd0->getValueType(0);
2595 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2596 Opd0 = Op0->getOperand(0);
2597 ClampMSB = (VT == MVT::i32);
2598 } else if (BiggerPattern) {
2599 // Let's pretend a 0 shift right has been performed.
2600 // The resulting code will be at least as good as the original one
2601 // plus it may expose more opportunities for bitfield insert pattern.
2602 // FIXME: Currently we limit this to the bigger pattern, because
2603 // some optimizations expect AND and not UBFM.
2604 Opd0 = N->getOperand(0);
2605 } else
2606 return false;
2607
2608 // Bail out on large immediates. This happens when no proper
2609 // combining/constant folding was performed.
2610 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2611 LLVM_DEBUG(
2612 (dbgs() << N
2613 << ": Found large shift immediate, this should not happen\n"));
2614 return false;
2615 }
2616
2617 LSB = SrlImm;
2618 MSB = SrlImm +
2619 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2620 : llvm::countr_one<uint64_t>(AndImm)) -
2621 1;
2622 if (ClampMSB)
2623 // Since we're moving the extend before the right shift operation, we need
2624 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2625 // the zeros which would get shifted in with the original right shift
2626 // operation.
2627 MSB = MSB > 31 ? 31 : MSB;
2628
2629 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2630 return true;
2631}
2632
2634 SDValue &Opd0, unsigned &Immr,
2635 unsigned &Imms) {
2636 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2637
2638 EVT VT = N->getValueType(0);
2639 unsigned BitWidth = VT.getSizeInBits();
2640 assert((VT == MVT::i32 || VT == MVT::i64) &&
2641 "Type checking must have been done before calling this function");
2642
2643 SDValue Op = N->getOperand(0);
2644 if (Op->getOpcode() == ISD::TRUNCATE) {
2645 Op = Op->getOperand(0);
2646 VT = Op->getValueType(0);
2647 BitWidth = VT.getSizeInBits();
2648 }
2649
2650 uint64_t ShiftImm;
2651 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2652 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2653 return false;
2654
2655 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2656 if (ShiftImm + Width > BitWidth)
2657 return false;
2658
2659 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2660 Opd0 = Op.getOperand(0);
2661 Immr = ShiftImm;
2662 Imms = ShiftImm + Width - 1;
2663 return true;
2664}
2665
2667 SDValue &Opd0, unsigned &LSB,
2668 unsigned &MSB) {
2669 // We are looking for the following pattern which basically extracts several
2670 // continuous bits from the source value and places it from the LSB of the
2671 // destination value, all other bits of the destination value or set to zero:
2672 //
2673 // Value2 = AND Value, MaskImm
2674 // SRL Value2, ShiftImm
2675 //
2676 // with MaskImm >> ShiftImm to search for the bit width.
2677 //
2678 // This gets selected into a single UBFM:
2679 //
2680 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2681 //
2682
2683 if (N->getOpcode() != ISD::SRL)
2684 return false;
2685
2686 uint64_t AndMask = 0;
2687 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2688 return false;
2689
2690 Opd0 = N->getOperand(0).getOperand(0);
2691
2692 uint64_t SrlImm = 0;
2693 if (!isIntImmediate(N->getOperand(1), SrlImm))
2694 return false;
2695
2696 // Check whether we really have several bits extract here.
2697 if (!isMask_64(AndMask >> SrlImm))
2698 return false;
2699
2700 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2701 LSB = SrlImm;
2702 MSB = llvm::Log2_64(AndMask);
2703 return true;
2704}
2705
2706static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2707 unsigned &Immr, unsigned &Imms,
2708 bool BiggerPattern) {
2709 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2710 "N must be a SHR/SRA operation to call this function");
2711
2712 EVT VT = N->getValueType(0);
2713
2714 // Here we can test the type of VT and return false when the type does not
2715 // match, but since it is done prior to that call in the current context
2716 // we turned that into an assert to avoid redundant code.
2717 assert((VT == MVT::i32 || VT == MVT::i64) &&
2718 "Type checking must have been done before calling this function");
2719
2720 // Check for AND + SRL doing several bits extract.
2721 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2722 return true;
2723
2724 // We're looking for a shift of a shift.
2725 uint64_t ShlImm = 0;
2726 uint64_t TruncBits = 0;
2727 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2728 Opd0 = N->getOperand(0).getOperand(0);
2729 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2730 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2731 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2732 // be considered as setting high 32 bits as zero. Our strategy here is to
2733 // always generate 64bit UBFM. This consistency will help the CSE pass
2734 // later find more redundancy.
2735 Opd0 = N->getOperand(0).getOperand(0);
2736 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2737 VT = Opd0.getValueType();
2738 assert(VT == MVT::i64 && "the promoted type should be i64");
2739 } else if (BiggerPattern) {
2740 // Let's pretend a 0 shift left has been performed.
2741 // FIXME: Currently we limit this to the bigger pattern case,
2742 // because some optimizations expect AND and not UBFM
2743 Opd0 = N->getOperand(0);
2744 } else
2745 return false;
2746
2747 // Missing combines/constant folding may have left us with strange
2748 // constants.
2749 if (ShlImm >= VT.getSizeInBits()) {
2750 LLVM_DEBUG(
2751 (dbgs() << N
2752 << ": Found large shift immediate, this should not happen\n"));
2753 return false;
2754 }
2755
2756 uint64_t SrlImm = 0;
2757 if (!isIntImmediate(N->getOperand(1), SrlImm))
2758 return false;
2759
2760 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2761 "bad amount in shift node!");
2762 int immr = SrlImm - ShlImm;
2763 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2764 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2765 // SRA requires a signed extraction
2766 if (VT == MVT::i32)
2767 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2768 else
2769 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2770 return true;
2771}
2772
2773bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2774 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2775
2776 EVT VT = N->getValueType(0);
2777 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2778 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2779 return false;
2780
2781 uint64_t ShiftImm;
2782 SDValue Op = N->getOperand(0);
2783 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2784 return false;
2785
2786 SDLoc dl(N);
2787 // Extend the incoming operand of the shift to 64-bits.
2788 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2789 unsigned Immr = ShiftImm;
2790 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2791 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2792 CurDAG->getTargetConstant(Imms, dl, VT)};
2793 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2794 return true;
2795}
2796
2797static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2798 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2799 unsigned NumberOfIgnoredLowBits = 0,
2800 bool BiggerPattern = false) {
2801 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2802 return false;
2803
2804 switch (N->getOpcode()) {
2805 default:
2806 if (!N->isMachineOpcode())
2807 return false;
2808 break;
2809 case ISD::AND:
2810 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2811 NumberOfIgnoredLowBits, BiggerPattern);
2812 case ISD::SRL:
2813 case ISD::SRA:
2814 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2815
2817 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2818 }
2819
2820 unsigned NOpc = N->getMachineOpcode();
2821 switch (NOpc) {
2822 default:
2823 return false;
2824 case AArch64::SBFMWri:
2825 case AArch64::UBFMWri:
2826 case AArch64::SBFMXri:
2827 case AArch64::UBFMXri:
2828 Opc = NOpc;
2829 Opd0 = N->getOperand(0);
2830 Immr = N->getConstantOperandVal(1);
2831 Imms = N->getConstantOperandVal(2);
2832 return true;
2833 }
2834 // Unreachable
2835 return false;
2836}
2837
2838bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2839 unsigned Opc, Immr, Imms;
2840 SDValue Opd0;
2841 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2842 return false;
2843
2844 EVT VT = N->getValueType(0);
2845 SDLoc dl(N);
2846
2847 // If the bit extract operation is 64bit but the original type is 32bit, we
2848 // need to add one EXTRACT_SUBREG.
2849 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2850 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2851 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2852
2853 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2854 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2855 MVT::i32, SDValue(BFM, 0));
2856 ReplaceNode(N, Inner.getNode());
2857 return true;
2858 }
2859
2860 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2861 CurDAG->getTargetConstant(Imms, dl, VT)};
2862 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2863 return true;
2864}
2865
2866/// Does DstMask form a complementary pair with the mask provided by
2867/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2868/// this asks whether DstMask zeroes precisely those bits that will be set by
2869/// the other half.
2870static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2871 unsigned NumberOfIgnoredHighBits, EVT VT) {
2872 assert((VT == MVT::i32 || VT == MVT::i64) &&
2873 "i32 or i64 mask type expected!");
2874 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2875
2876 // Enable implicitTrunc as we're intentionally ignoring high bits.
2877 APInt SignificantDstMask =
2878 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2879 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2880
2881 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2882 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2883}
2884
2885// Look for bits that will be useful for later uses.
2886// A bit is consider useless as soon as it is dropped and never used
2887// before it as been dropped.
2888// E.g., looking for useful bit of x
2889// 1. y = x & 0x7
2890// 2. z = y >> 2
2891// After #1, x useful bits are 0x7, then the useful bits of x, live through
2892// y.
2893// After #2, the useful bits of x are 0x4.
2894// However, if x is used on an unpredictable instruction, then all its bits
2895// are useful.
2896// E.g.
2897// 1. y = x & 0x7
2898// 2. z = y >> 2
2899// 3. str x, [@x]
2900static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2901
2903 unsigned Depth) {
2904 uint64_t Imm =
2905 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2906 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2907 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2908 getUsefulBits(Op, UsefulBits, Depth + 1);
2909}
2910
2912 uint64_t Imm, uint64_t MSB,
2913 unsigned Depth) {
2914 // inherit the bitwidth value
2915 APInt OpUsefulBits(UsefulBits);
2916 OpUsefulBits = 1;
2917
2918 if (MSB >= Imm) {
2919 OpUsefulBits <<= MSB - Imm + 1;
2920 --OpUsefulBits;
2921 // The interesting part will be in the lower part of the result
2922 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2923 // The interesting part was starting at Imm in the argument
2924 OpUsefulBits <<= Imm;
2925 } else {
2926 OpUsefulBits <<= MSB + 1;
2927 --OpUsefulBits;
2928 // The interesting part will be shifted in the result
2929 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2930 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2931 // The interesting part was at zero in the argument
2932 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2933 }
2934
2935 UsefulBits &= OpUsefulBits;
2936}
2937
2938static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2939 unsigned Depth) {
2940 uint64_t Imm =
2941 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2942 uint64_t MSB =
2943 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2944
2945 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2946}
2947
2949 unsigned Depth) {
2950 uint64_t ShiftTypeAndValue =
2951 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2952 APInt Mask(UsefulBits);
2953 Mask.clearAllBits();
2954 Mask.flipAllBits();
2955
2956 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2957 // Shift Left
2958 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2959 Mask <<= ShiftAmt;
2960 getUsefulBits(Op, Mask, Depth + 1);
2961 Mask.lshrInPlace(ShiftAmt);
2962 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2963 // Shift Right
2964 // We do not handle AArch64_AM::ASR, because the sign will change the
2965 // number of useful bits
2966 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2967 Mask.lshrInPlace(ShiftAmt);
2968 getUsefulBits(Op, Mask, Depth + 1);
2969 Mask <<= ShiftAmt;
2970 } else
2971 return;
2972
2973 UsefulBits &= Mask;
2974}
2975
2976static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2977 unsigned Depth) {
2978 uint64_t Imm =
2979 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2980 uint64_t MSB =
2981 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2982
2983 APInt OpUsefulBits(UsefulBits);
2984 OpUsefulBits = 1;
2985
2986 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2987 ResultUsefulBits.flipAllBits();
2988 APInt Mask(UsefulBits.getBitWidth(), 0);
2989
2990 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2991
2992 if (MSB >= Imm) {
2993 // The instruction is a BFXIL.
2994 uint64_t Width = MSB - Imm + 1;
2995 uint64_t LSB = Imm;
2996
2997 OpUsefulBits <<= Width;
2998 --OpUsefulBits;
2999
3000 if (Op.getOperand(1) == Orig) {
3001 // Copy the low bits from the result to bits starting from LSB.
3002 Mask = ResultUsefulBits & OpUsefulBits;
3003 Mask <<= LSB;
3004 }
3005
3006 if (Op.getOperand(0) == Orig)
3007 // Bits starting from LSB in the input contribute to the result.
3008 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3009 } else {
3010 // The instruction is a BFI.
3011 uint64_t Width = MSB + 1;
3012 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3013
3014 OpUsefulBits <<= Width;
3015 --OpUsefulBits;
3016 OpUsefulBits <<= LSB;
3017
3018 if (Op.getOperand(1) == Orig) {
3019 // Copy the bits from the result to the zero bits.
3020 Mask = ResultUsefulBits & OpUsefulBits;
3021 Mask.lshrInPlace(LSB);
3022 }
3023
3024 if (Op.getOperand(0) == Orig)
3025 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3026 }
3027
3028 UsefulBits &= Mask;
3029}
3030
3031static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3032 SDValue Orig, unsigned Depth) {
3033
3034 // Users of this node should have already been instruction selected
3035 // FIXME: Can we turn that into an assert?
3036 if (!UserNode->isMachineOpcode())
3037 return;
3038
3039 switch (UserNode->getMachineOpcode()) {
3040 default:
3041 return;
3042 case AArch64::ANDSWri:
3043 case AArch64::ANDSXri:
3044 case AArch64::ANDWri:
3045 case AArch64::ANDXri:
3046 // We increment Depth only when we call the getUsefulBits
3047 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3048 Depth);
3049 case AArch64::UBFMWri:
3050 case AArch64::UBFMXri:
3051 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3052
3053 case AArch64::ORRWrs:
3054 case AArch64::ORRXrs:
3055 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3056 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3057 Depth);
3058 return;
3059 case AArch64::BFMWri:
3060 case AArch64::BFMXri:
3061 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3062
3063 case AArch64::STRBBui:
3064 case AArch64::STURBBi:
3065 if (UserNode->getOperand(0) != Orig)
3066 return;
3067 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3068 return;
3069
3070 case AArch64::STRHHui:
3071 case AArch64::STURHHi:
3072 if (UserNode->getOperand(0) != Orig)
3073 return;
3074 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3075 return;
3076 }
3077}
3078
3079static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3081 return;
3082 // Initialize UsefulBits
3083 if (!Depth) {
3084 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3085 // At the beginning, assume every produced bits is useful
3086 UsefulBits = APInt(Bitwidth, 0);
3087 UsefulBits.flipAllBits();
3088 }
3089 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3090
3091 for (SDNode *Node : Op.getNode()->users()) {
3092 // A use cannot produce useful bits
3093 APInt UsefulBitsForUse = APInt(UsefulBits);
3094 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3095 UsersUsefulBits |= UsefulBitsForUse;
3096 }
3097 // UsefulBits contains the produced bits that are meaningful for the
3098 // current definition, thus a user cannot make a bit meaningful at
3099 // this point
3100 UsefulBits &= UsersUsefulBits;
3101}
3102
3103/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3104/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3105/// 0, return Op unchanged.
3106static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3107 if (ShlAmount == 0)
3108 return Op;
3109
3110 EVT VT = Op.getValueType();
3111 SDLoc dl(Op);
3112 unsigned BitWidth = VT.getSizeInBits();
3113 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3114
3115 SDNode *ShiftNode;
3116 if (ShlAmount > 0) {
3117 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3118 ShiftNode = CurDAG->getMachineNode(
3119 UBFMOpc, dl, VT, Op,
3120 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3121 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3122 } else {
3123 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3124 assert(ShlAmount < 0 && "expected right shift");
3125 int ShrAmount = -ShlAmount;
3126 ShiftNode = CurDAG->getMachineNode(
3127 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3128 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3129 }
3130
3131 return SDValue(ShiftNode, 0);
3132}
3133
3134// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3136 bool BiggerPattern,
3137 const uint64_t NonZeroBits,
3138 SDValue &Src, int &DstLSB,
3139 int &Width);
3140
3141// For bit-field-positioning pattern "shl VAL, N)".
3143 bool BiggerPattern,
3144 const uint64_t NonZeroBits,
3145 SDValue &Src, int &DstLSB,
3146 int &Width);
3147
3148/// Does this tree qualify as an attempt to move a bitfield into position,
3149/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3151 bool BiggerPattern, SDValue &Src,
3152 int &DstLSB, int &Width) {
3153 EVT VT = Op.getValueType();
3154 unsigned BitWidth = VT.getSizeInBits();
3155 (void)BitWidth;
3156 assert(BitWidth == 32 || BitWidth == 64);
3157
3158 KnownBits Known = CurDAG->computeKnownBits(Op);
3159
3160 // Non-zero in the sense that they're not provably zero, which is the key
3161 // point if we want to use this value
3162 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3163 if (!isShiftedMask_64(NonZeroBits))
3164 return false;
3165
3166 switch (Op.getOpcode()) {
3167 default:
3168 break;
3169 case ISD::AND:
3170 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3171 NonZeroBits, Src, DstLSB, Width);
3172 case ISD::SHL:
3173 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3174 NonZeroBits, Src, DstLSB, Width);
3175 }
3176
3177 return false;
3178}
3179
3181 bool BiggerPattern,
3182 const uint64_t NonZeroBits,
3183 SDValue &Src, int &DstLSB,
3184 int &Width) {
3185 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3186
3187 EVT VT = Op.getValueType();
3188 assert((VT == MVT::i32 || VT == MVT::i64) &&
3189 "Caller guarantees VT is one of i32 or i64");
3190 (void)VT;
3191
3192 uint64_t AndImm;
3193 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3194 return false;
3195
3196 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3197 // 1) (AndImm & (1 << POS) == 0)
3198 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3199 //
3200 // 1) and 2) don't agree so something must be wrong (e.g., in
3201 // 'SelectionDAG::computeKnownBits')
3202 assert((~AndImm & NonZeroBits) == 0 &&
3203 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3204
3205 SDValue AndOp0 = Op.getOperand(0);
3206
3207 uint64_t ShlImm;
3208 SDValue ShlOp0;
3209 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3210 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3211 ShlOp0 = AndOp0.getOperand(0);
3212 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3214 ShlImm)) {
3215 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3216
3217 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3218 SDValue ShlVal = AndOp0.getOperand(0);
3219
3220 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3221 // expect VT to be MVT::i32.
3222 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3223
3224 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3225 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3226 } else
3227 return false;
3228
3229 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3230 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3231 // AndOp0+AND.
3232 if (!BiggerPattern && !AndOp0.hasOneUse())
3233 return false;
3234
3235 DstLSB = llvm::countr_zero(NonZeroBits);
3236 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3237
3238 // Bail out on large Width. This happens when no proper combining / constant
3239 // folding was performed.
3240 if (Width >= (int)VT.getSizeInBits()) {
3241 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3242 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3243 // "val".
3244 // If VT is i32, what Width >= 32 means:
3245 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3246 // demands at least 'Width' bits (after dag-combiner). This together with
3247 // `any_extend` Op (undefined higher bits) indicates missed combination
3248 // when lowering the 'and' IR instruction to an machine IR instruction.
3249 LLVM_DEBUG(
3250 dbgs()
3251 << "Found large Width in bit-field-positioning -- this indicates no "
3252 "proper combining / constant folding was performed\n");
3253 return false;
3254 }
3255
3256 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3257 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3258 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3259 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3260 // which case it is not profitable to insert an extra shift.
3261 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3262 return false;
3263
3264 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3265 return true;
3266}
3267
3268// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3269// UBFIZ.
3271 SDValue &Src, int &DstLSB,
3272 int &Width) {
3273 // Caller should have verified that N is a left shift with constant shift
3274 // amount; asserts that.
3275 assert(Op.getOpcode() == ISD::SHL &&
3276 "Op.getNode() should be a SHL node to call this function");
3277 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3278 "Op.getNode() should shift ShlImm to call this function");
3279
3280 uint64_t AndImm = 0;
3281 SDValue Op0 = Op.getOperand(0);
3282 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3283 return false;
3284
3285 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3286 if (isMask_64(ShiftedAndImm)) {
3287 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3288 // should end with Mask, and could be prefixed with random bits if those
3289 // bits are shifted out.
3290 //
3291 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3292 // the AND result corresponding to those bits are shifted out, so it's fine
3293 // to not extract them.
3294 Width = llvm::countr_one(ShiftedAndImm);
3295 DstLSB = ShlImm;
3296 Src = Op0.getOperand(0);
3297 return true;
3298 }
3299 return false;
3300}
3301
3303 bool BiggerPattern,
3304 const uint64_t NonZeroBits,
3305 SDValue &Src, int &DstLSB,
3306 int &Width) {
3307 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3308
3309 EVT VT = Op.getValueType();
3310 assert((VT == MVT::i32 || VT == MVT::i64) &&
3311 "Caller guarantees that type is i32 or i64");
3312 (void)VT;
3313
3314 uint64_t ShlImm;
3315 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3316 return false;
3317
3318 if (!BiggerPattern && !Op.hasOneUse())
3319 return false;
3320
3321 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3322 return true;
3323
3324 DstLSB = llvm::countr_zero(NonZeroBits);
3325 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3326
3327 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3328 return false;
3329
3330 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3331 return true;
3332}
3333
3334static bool isShiftedMask(uint64_t Mask, EVT VT) {
3335 assert(VT == MVT::i32 || VT == MVT::i64);
3336 if (VT == MVT::i32)
3337 return isShiftedMask_32(Mask);
3338 return isShiftedMask_64(Mask);
3339}
3340
3341// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3342// inserted only sets known zero bits.
3344 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3345
3346 EVT VT = N->getValueType(0);
3347 if (VT != MVT::i32 && VT != MVT::i64)
3348 return false;
3349
3350 unsigned BitWidth = VT.getSizeInBits();
3351
3352 uint64_t OrImm;
3353 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3354 return false;
3355
3356 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3357 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3358 // performance neutral.
3360 return false;
3361
3362 uint64_t MaskImm;
3363 SDValue And = N->getOperand(0);
3364 // Must be a single use AND with an immediate operand.
3365 if (!And.hasOneUse() ||
3366 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3367 return false;
3368
3369 // Compute the Known Zero for the AND as this allows us to catch more general
3370 // cases than just looking for AND with imm.
3371 KnownBits Known = CurDAG->computeKnownBits(And);
3372
3373 // Non-zero in the sense that they're not provably zero, which is the key
3374 // point if we want to use this value.
3375 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3376
3377 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3378 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3379 return false;
3380
3381 // The bits being inserted must only set those bits that are known to be zero.
3382 if ((OrImm & NotKnownZero) != 0) {
3383 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3384 // currently handle this case.
3385 return false;
3386 }
3387
3388 // BFI/BFXIL dst, src, #lsb, #width.
3389 int LSB = llvm::countr_one(NotKnownZero);
3390 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3391
3392 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3393 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3394 unsigned ImmS = Width - 1;
3395
3396 // If we're creating a BFI instruction avoid cases where we need more
3397 // instructions to materialize the BFI constant as compared to the original
3398 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3399 // should be no worse in this case.
3400 bool IsBFI = LSB != 0;
3401 uint64_t BFIImm = OrImm >> LSB;
3402 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3403 // We have a BFI instruction and we know the constant can't be materialized
3404 // with a ORR-immediate with the zero register.
3405 unsigned OrChunks = 0, BFIChunks = 0;
3406 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3407 if (((OrImm >> Shift) & 0xFFFF) != 0)
3408 ++OrChunks;
3409 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3410 ++BFIChunks;
3411 }
3412 if (BFIChunks > OrChunks)
3413 return false;
3414 }
3415
3416 // Materialize the constant to be inserted.
3417 SDLoc DL(N);
3418 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3419 SDNode *MOVI = CurDAG->getMachineNode(
3420 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3421
3422 // Create the BFI/BFXIL instruction.
3423 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3424 CurDAG->getTargetConstant(ImmR, DL, VT),
3425 CurDAG->getTargetConstant(ImmS, DL, VT)};
3426 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3427 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3428 return true;
3429}
3430
3432 SDValue &ShiftedOperand,
3433 uint64_t &EncodedShiftImm) {
3434 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3435 if (!Dst.hasOneUse())
3436 return false;
3437
3438 EVT VT = Dst.getValueType();
3439 assert((VT == MVT::i32 || VT == MVT::i64) &&
3440 "Caller should guarantee that VT is one of i32 or i64");
3441 const unsigned SizeInBits = VT.getSizeInBits();
3442
3443 SDLoc DL(Dst.getNode());
3444 uint64_t AndImm, ShlImm;
3445 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3446 isShiftedMask_64(AndImm)) {
3447 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3448 SDValue DstOp0 = Dst.getOperand(0);
3449 if (!DstOp0.hasOneUse())
3450 return false;
3451
3452 // An example to illustrate the transformation
3453 // From:
3454 // lsr x8, x1, #1
3455 // and x8, x8, #0x3f80
3456 // bfxil x8, x1, #0, #7
3457 // To:
3458 // and x8, x23, #0x7f
3459 // ubfx x9, x23, #8, #7
3460 // orr x23, x8, x9, lsl #7
3461 //
3462 // The number of instructions remains the same, but ORR is faster than BFXIL
3463 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3464 // the dependency chain is improved after the transformation.
3465 uint64_t SrlImm;
3466 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3467 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3468 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3469 unsigned MaskWidth =
3470 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3471 unsigned UBFMOpc =
3472 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3473 SDNode *UBFMNode = CurDAG->getMachineNode(
3474 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3475 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3476 VT),
3477 CurDAG->getTargetConstant(
3478 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3479 ShiftedOperand = SDValue(UBFMNode, 0);
3480 EncodedShiftImm = AArch64_AM::getShifterImm(
3481 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3482 return true;
3483 }
3484 }
3485 return false;
3486 }
3487
3488 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3489 ShiftedOperand = Dst.getOperand(0);
3490 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3491 return true;
3492 }
3493
3494 uint64_t SrlImm;
3495 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3496 ShiftedOperand = Dst.getOperand(0);
3497 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3498 return true;
3499 }
3500 return false;
3501}
3502
3503// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3504// the operands and select it to AArch64::ORR with shifted registers if
3505// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3506static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3507 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3508 const bool BiggerPattern) {
3509 EVT VT = N->getValueType(0);
3510 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3511 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3512 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3513 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3514 assert((VT == MVT::i32 || VT == MVT::i64) &&
3515 "Expect result type to be i32 or i64 since N is combinable to BFM");
3516 SDLoc DL(N);
3517
3518 // Bail out if BFM simplifies away one node in BFM Dst.
3519 if (OrOpd1 != Dst)
3520 return false;
3521
3522 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3523 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3524 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3525 if (BiggerPattern) {
3526 uint64_t SrcAndImm;
3527 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3528 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3529 // OrOpd0 = AND Src, #Mask
3530 // So BFM simplifies away one AND node from Src and doesn't simplify away
3531 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3532 // one node (from Rd), ORR is better since it has higher throughput and
3533 // smaller latency than BFM on many AArch64 processors (and for the rest
3534 // ORR is at least as good as BFM).
3535 SDValue ShiftedOperand;
3536 uint64_t EncodedShiftImm;
3537 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3538 EncodedShiftImm)) {
3539 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3540 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3541 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3542 return true;
3543 }
3544 }
3545 return false;
3546 }
3547
3548 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3549
3550 uint64_t ShlImm;
3551 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3552 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3553 SDValue Ops[] = {
3554 Dst, Src,
3555 CurDAG->getTargetConstant(
3557 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3558 return true;
3559 }
3560
3561 // Select the following pattern to left-shifted operand rather than BFI.
3562 // %val1 = op ..
3563 // %val2 = shl %val1, #imm
3564 // %res = or %val1, %val2
3565 //
3566 // If N is selected to be BFI, we know that
3567 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3568 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3569 //
3570 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3571 if (OrOpd0.getOperand(0) == OrOpd1) {
3572 SDValue Ops[] = {
3573 OrOpd1, OrOpd1,
3574 CurDAG->getTargetConstant(
3576 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3577 return true;
3578 }
3579 }
3580
3581 uint64_t SrlImm;
3582 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3583 // Select the following pattern to right-shifted operand rather than BFXIL.
3584 // %val1 = op ..
3585 // %val2 = lshr %val1, #imm
3586 // %res = or %val1, %val2
3587 //
3588 // If N is selected to be BFXIL, we know that
3589 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3590 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3591 //
3592 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3593 if (OrOpd0.getOperand(0) == OrOpd1) {
3594 SDValue Ops[] = {
3595 OrOpd1, OrOpd1,
3596 CurDAG->getTargetConstant(
3598 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3599 return true;
3600 }
3601 }
3602
3603 return false;
3604}
3605
3606static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3607 SelectionDAG *CurDAG) {
3608 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3609
3610 EVT VT = N->getValueType(0);
3611 if (VT != MVT::i32 && VT != MVT::i64)
3612 return false;
3613
3614 unsigned BitWidth = VT.getSizeInBits();
3615
3616 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3617 // have the expected shape. Try to undo that.
3618
3619 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3620 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3621
3622 // Given a OR operation, check if we have the following pattern
3623 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3624 // isBitfieldExtractOp)
3625 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3626 // countTrailingZeros(mask2) == imm2 - imm + 1
3627 // f = d | c
3628 // if yes, replace the OR instruction with:
3629 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3630
3631 // OR is commutative, check all combinations of operand order and values of
3632 // BiggerPattern, i.e.
3633 // Opd0, Opd1, BiggerPattern=false
3634 // Opd1, Opd0, BiggerPattern=false
3635 // Opd0, Opd1, BiggerPattern=true
3636 // Opd1, Opd0, BiggerPattern=true
3637 // Several of these combinations may match, so check with BiggerPattern=false
3638 // first since that will produce better results by matching more instructions
3639 // and/or inserting fewer extra instructions.
3640 for (int I = 0; I < 4; ++I) {
3641
3642 SDValue Dst, Src;
3643 unsigned ImmR, ImmS;
3644 bool BiggerPattern = I / 2;
3645 SDValue OrOpd0Val = N->getOperand(I % 2);
3646 SDNode *OrOpd0 = OrOpd0Val.getNode();
3647 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3648 SDNode *OrOpd1 = OrOpd1Val.getNode();
3649
3650 unsigned BFXOpc;
3651 int DstLSB, Width;
3652 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3653 NumberOfIgnoredLowBits, BiggerPattern)) {
3654 // Check that the returned opcode is compatible with the pattern,
3655 // i.e., same type and zero extended (U and not S)
3656 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3657 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3658 continue;
3659
3660 // Compute the width of the bitfield insertion
3661 DstLSB = 0;
3662 Width = ImmS - ImmR + 1;
3663 // FIXME: This constraint is to catch bitfield insertion we may
3664 // want to widen the pattern if we want to grab general bitfield
3665 // move case
3666 if (Width <= 0)
3667 continue;
3668
3669 // If the mask on the insertee is correct, we have a BFXIL operation. We
3670 // can share the ImmR and ImmS values from the already-computed UBFM.
3671 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3672 BiggerPattern,
3673 Src, DstLSB, Width)) {
3674 ImmR = (BitWidth - DstLSB) % BitWidth;
3675 ImmS = Width - 1;
3676 } else
3677 continue;
3678
3679 // Check the second part of the pattern
3680 EVT VT = OrOpd1Val.getValueType();
3681 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3682
3683 // Compute the Known Zero for the candidate of the first operand.
3684 // This allows to catch more general case than just looking for
3685 // AND with imm. Indeed, simplify-demanded-bits may have removed
3686 // the AND instruction because it proves it was useless.
3687 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3688
3689 // Check if there is enough room for the second operand to appear
3690 // in the first one
3691 APInt BitsToBeInserted =
3692 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3693
3694 if ((BitsToBeInserted & ~Known.Zero) != 0)
3695 continue;
3696
3697 // Set the first operand
3698 uint64_t Imm;
3699 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3700 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3701 // In that case, we can eliminate the AND
3702 Dst = OrOpd1->getOperand(0);
3703 else
3704 // Maybe the AND has been removed by simplify-demanded-bits
3705 // or is useful because it discards more bits
3706 Dst = OrOpd1Val;
3707
3708 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3709 // with shifted operand is more efficient.
3710 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3711 BiggerPattern))
3712 return true;
3713
3714 // both parts match
3715 SDLoc DL(N);
3716 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3717 CurDAG->getTargetConstant(ImmS, DL, VT)};
3718 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3719 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3720 return true;
3721 }
3722
3723 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3724 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3725 // mask (e.g., 0x000ffff0).
3726 uint64_t Mask0Imm, Mask1Imm;
3727 SDValue And0 = N->getOperand(0);
3728 SDValue And1 = N->getOperand(1);
3729 if (And0.hasOneUse() && And1.hasOneUse() &&
3730 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3731 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3732 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3733 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3734
3735 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3736 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3737 // bits to be inserted.
3738 if (isShiftedMask(Mask0Imm, VT)) {
3739 std::swap(And0, And1);
3740 std::swap(Mask0Imm, Mask1Imm);
3741 }
3742
3743 SDValue Src = And1->getOperand(0);
3744 SDValue Dst = And0->getOperand(0);
3745 unsigned LSB = llvm::countr_zero(Mask1Imm);
3746 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3747
3748 // The BFXIL inserts the low-order bits from a source register, so right
3749 // shift the needed bits into place.
3750 SDLoc DL(N);
3751 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3752 uint64_t LsrImm = LSB;
3753 if (Src->hasOneUse() &&
3754 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3755 (LsrImm + LSB) < BitWidth) {
3756 Src = Src->getOperand(0);
3757 LsrImm += LSB;
3758 }
3759
3760 SDNode *LSR = CurDAG->getMachineNode(
3761 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3762 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3763
3764 // BFXIL is an alias of BFM, so translate to BFM operands.
3765 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3766 unsigned ImmS = Width - 1;
3767
3768 // Create the BFXIL instruction.
3769 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3770 CurDAG->getTargetConstant(ImmR, DL, VT),
3771 CurDAG->getTargetConstant(ImmS, DL, VT)};
3772 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3773 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3774 return true;
3775 }
3776
3777 return false;
3778}
3779
3780bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3781 if (N->getOpcode() != ISD::OR)
3782 return false;
3783
3784 APInt NUsefulBits;
3785 getUsefulBits(SDValue(N, 0), NUsefulBits);
3786
3787 // If all bits are not useful, just return UNDEF.
3788 if (!NUsefulBits) {
3789 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3790 return true;
3791 }
3792
3793 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3794 return true;
3795
3796 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3797}
3798
3799/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3800/// equivalent of a left shift by a constant amount followed by an and masking
3801/// out a contiguous set of bits.
3802bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3803 if (N->getOpcode() != ISD::AND)
3804 return false;
3805
3806 EVT VT = N->getValueType(0);
3807 if (VT != MVT::i32 && VT != MVT::i64)
3808 return false;
3809
3810 SDValue Op0;
3811 int DstLSB, Width;
3812 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3813 Op0, DstLSB, Width))
3814 return false;
3815
3816 // ImmR is the rotate right amount.
3817 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3818 // ImmS is the most significant bit of the source to be moved.
3819 unsigned ImmS = Width - 1;
3820
3821 SDLoc DL(N);
3822 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3823 CurDAG->getTargetConstant(ImmS, DL, VT)};
3824 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3825 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3826 return true;
3827}
3828
3829/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3830/// variable shift/rotate instructions.
3831bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3832 EVT VT = N->getValueType(0);
3833
3834 unsigned Opc;
3835 switch (N->getOpcode()) {
3836 case ISD::ROTR:
3837 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3838 break;
3839 case ISD::SHL:
3840 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3841 break;
3842 case ISD::SRL:
3843 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3844 break;
3845 case ISD::SRA:
3846 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3847 break;
3848 default:
3849 return false;
3850 }
3851
3852 uint64_t Size;
3853 uint64_t Bits;
3854 if (VT == MVT::i32) {
3855 Bits = 5;
3856 Size = 32;
3857 } else if (VT == MVT::i64) {
3858 Bits = 6;
3859 Size = 64;
3860 } else
3861 return false;
3862
3863 SDValue ShiftAmt = N->getOperand(1);
3864 SDLoc DL(N);
3865 SDValue NewShiftAmt;
3866
3867 // Skip over an extend of the shift amount.
3868 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3869 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3870 ShiftAmt = ShiftAmt->getOperand(0);
3871
3872 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3873 SDValue Add0 = ShiftAmt->getOperand(0);
3874 SDValue Add1 = ShiftAmt->getOperand(1);
3875 uint64_t Add0Imm;
3876 uint64_t Add1Imm;
3877 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3878 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3879 // to avoid the ADD/SUB.
3880 NewShiftAmt = Add0;
3881 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3882 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3883 (Add0Imm % Size == 0)) {
3884 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3885 // to generate a NEG instead of a SUB from a constant.
3886 unsigned NegOpc;
3887 unsigned ZeroReg;
3888 EVT SubVT = ShiftAmt->getValueType(0);
3889 if (SubVT == MVT::i32) {
3890 NegOpc = AArch64::SUBWrr;
3891 ZeroReg = AArch64::WZR;
3892 } else {
3893 assert(SubVT == MVT::i64);
3894 NegOpc = AArch64::SUBXrr;
3895 ZeroReg = AArch64::XZR;
3896 }
3897 SDValue Zero =
3898 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3899 MachineSDNode *Neg =
3900 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3901 NewShiftAmt = SDValue(Neg, 0);
3902 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3903 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3904 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3905 // to generate a NOT instead of a SUB from a constant.
3906 unsigned NotOpc;
3907 unsigned ZeroReg;
3908 EVT SubVT = ShiftAmt->getValueType(0);
3909 if (SubVT == MVT::i32) {
3910 NotOpc = AArch64::ORNWrr;
3911 ZeroReg = AArch64::WZR;
3912 } else {
3913 assert(SubVT == MVT::i64);
3914 NotOpc = AArch64::ORNXrr;
3915 ZeroReg = AArch64::XZR;
3916 }
3917 SDValue Zero =
3918 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3920 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3921 NewShiftAmt = SDValue(Not, 0);
3922 } else
3923 return false;
3924 } else {
3925 // If the shift amount is masked with an AND, check that the mask covers the
3926 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3927 // the AND.
3928 uint64_t MaskImm;
3929 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3930 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3931 return false;
3932
3933 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3934 return false;
3935
3936 NewShiftAmt = ShiftAmt->getOperand(0);
3937 }
3938
3939 // Narrow/widen the shift amount to match the size of the shift operation.
3940 if (VT == MVT::i32)
3941 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3942 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3943 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3944 MachineSDNode *Ext = CurDAG->getMachineNode(
3945 AArch64::SUBREG_TO_REG, DL, VT,
3946 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3947 NewShiftAmt = SDValue(Ext, 0);
3948 }
3949
3950 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3951 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3952 return true;
3953}
3954
3956 SDValue &FixedPos,
3957 unsigned RegWidth,
3958 bool isReciprocal) {
3959 APFloat FVal(0.0);
3960 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3961 FVal = CN->getValueAPF();
3962 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3963 // Some otherwise illegal constants are allowed in this case.
3964 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3965 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3966 return false;
3967
3968 ConstantPoolSDNode *CN =
3969 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3970 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3971 } else
3972 return false;
3973
3974 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3975 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3976 // x-register.
3977 //
3978 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3979 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3980 // integers.
3981 bool IsExact;
3982
3983 if (isReciprocal)
3984 if (!FVal.getExactInverse(&FVal))
3985 return false;
3986
3987 // fbits is between 1 and 64 in the worst-case, which means the fmul
3988 // could have 2^64 as an actual operand. Need 65 bits of precision.
3989 APSInt IntVal(65, true);
3990 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3991
3992 // N.b. isPowerOf2 also checks for > 0.
3993 if (!IsExact || !IntVal.isPowerOf2())
3994 return false;
3995 unsigned FBits = IntVal.logBase2();
3996
3997 // Checks above should have guaranteed that we haven't lost information in
3998 // finding FBits, but it must still be in range.
3999 if (FBits == 0 || FBits > RegWidth) return false;
4000
4001 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4002 return true;
4003}
4004
4005bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4006 unsigned RegWidth) {
4007 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4008 false);
4009}
4010
4011bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4012 SDValue &FixedPos,
4013 unsigned RegWidth) {
4014 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4015 true);
4016}
4017
4018// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4019// of the string and obtains the integer values from them and combines these
4020// into a single value to be used in the MRS/MSR instruction.
4023 RegString.split(Fields, ':');
4024
4025 if (Fields.size() == 1)
4026 return -1;
4027
4028 assert(Fields.size() == 5
4029 && "Invalid number of fields in read register string");
4030
4032 bool AllIntFields = true;
4033
4034 for (StringRef Field : Fields) {
4035 unsigned IntField;
4036 AllIntFields &= !Field.getAsInteger(10, IntField);
4037 Ops.push_back(IntField);
4038 }
4039
4040 assert(AllIntFields &&
4041 "Unexpected non-integer value in special register string.");
4042 (void)AllIntFields;
4043
4044 // Need to combine the integer fields of the string into a single value
4045 // based on the bit encoding of MRS/MSR instruction.
4046 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4047 (Ops[3] << 3) | (Ops[4]);
4048}
4049
4050// Lower the read_register intrinsic to an MRS instruction node if the special
4051// register string argument is either of the form detailed in the ALCE (the
4052// form described in getIntOperandsFromRegisterString) or is a named register
4053// known by the MRS SysReg mapper.
4054bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4055 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4056 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4057 SDLoc DL(N);
4058
4059 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4060
4061 unsigned Opcode64Bit = AArch64::MRS;
4062 int Imm = getIntOperandFromRegisterString(RegString->getString());
4063 if (Imm == -1) {
4064 // No match, Use the sysreg mapper to map the remaining possible strings to
4065 // the value for the register to be used for the instruction operand.
4066 const auto *TheReg =
4067 AArch64SysReg::lookupSysRegByName(RegString->getString());
4068 if (TheReg && TheReg->Readable &&
4069 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4070 Imm = TheReg->Encoding;
4071 else
4072 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4073
4074 if (Imm == -1) {
4075 // Still no match, see if this is "pc" or give up.
4076 if (!ReadIs128Bit && RegString->getString() == "pc") {
4077 Opcode64Bit = AArch64::ADR;
4078 Imm = 0;
4079 } else {
4080 return false;
4081 }
4082 }
4083 }
4084
4085 SDValue InChain = N->getOperand(0);
4086 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4087 if (!ReadIs128Bit) {
4088 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4089 {SysRegImm, InChain});
4090 } else {
4091 SDNode *MRRS = CurDAG->getMachineNode(
4092 AArch64::MRRS, DL,
4093 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4094 {SysRegImm, InChain});
4095
4096 // Sysregs are not endian. The even register always contains the low half
4097 // of the register.
4098 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4099 SDValue(MRRS, 0));
4100 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4101 SDValue(MRRS, 0));
4102 SDValue OutChain = SDValue(MRRS, 1);
4103
4104 ReplaceUses(SDValue(N, 0), Lo);
4105 ReplaceUses(SDValue(N, 1), Hi);
4106 ReplaceUses(SDValue(N, 2), OutChain);
4107 };
4108 return true;
4109}
4110
4111// Lower the write_register intrinsic to an MSR instruction node if the special
4112// register string argument is either of the form detailed in the ALCE (the
4113// form described in getIntOperandsFromRegisterString) or is a named register
4114// known by the MSR SysReg mapper.
4115bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4116 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4117 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4118 SDLoc DL(N);
4119
4120 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4121
4122 if (!WriteIs128Bit) {
4123 // Check if the register was one of those allowed as the pstatefield value
4124 // in the MSR (immediate) instruction. To accept the values allowed in the
4125 // pstatefield for the MSR (immediate) instruction, we also require that an
4126 // immediate value has been provided as an argument, we know that this is
4127 // the case as it has been ensured by semantic checking.
4128 auto trySelectPState = [&](auto PMapper, unsigned State) {
4129 if (PMapper) {
4130 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4131 "Expected a constant integer expression.");
4132 unsigned Reg = PMapper->Encoding;
4133 uint64_t Immed = N->getConstantOperandVal(2);
4134 CurDAG->SelectNodeTo(
4135 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4136 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4137 return true;
4138 }
4139 return false;
4140 };
4141
4142 if (trySelectPState(
4143 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4144 AArch64::MSRpstateImm4))
4145 return true;
4146 if (trySelectPState(
4147 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4148 AArch64::MSRpstateImm1))
4149 return true;
4150 }
4151
4152 int Imm = getIntOperandFromRegisterString(RegString->getString());
4153 if (Imm == -1) {
4154 // Use the sysreg mapper to attempt to map the remaining possible strings
4155 // to the value for the register to be used for the MSR (register)
4156 // instruction operand.
4157 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4158 if (TheReg && TheReg->Writeable &&
4159 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4160 Imm = TheReg->Encoding;
4161 else
4162 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4163
4164 if (Imm == -1)
4165 return false;
4166 }
4167
4168 SDValue InChain = N->getOperand(0);
4169 if (!WriteIs128Bit) {
4170 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4171 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4172 N->getOperand(2), InChain);
4173 } else {
4174 // No endian swap. The lower half always goes into the even subreg, and the
4175 // higher half always into the odd supreg.
4176 SDNode *Pair = CurDAG->getMachineNode(
4177 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4178 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4179 MVT::i32),
4180 N->getOperand(2),
4181 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4182 N->getOperand(3),
4183 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4184
4185 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4186 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4187 SDValue(Pair, 0), InChain);
4188 }
4189
4190 return true;
4191}
4192
4193/// We've got special pseudo-instructions for these
4194bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4195 unsigned Opcode;
4196 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4197
4198 // Leave IR for LSE if subtarget supports it.
4199 if (Subtarget->hasLSE()) return false;
4200
4201 if (MemTy == MVT::i8)
4202 Opcode = AArch64::CMP_SWAP_8;
4203 else if (MemTy == MVT::i16)
4204 Opcode = AArch64::CMP_SWAP_16;
4205 else if (MemTy == MVT::i32)
4206 Opcode = AArch64::CMP_SWAP_32;
4207 else if (MemTy == MVT::i64)
4208 Opcode = AArch64::CMP_SWAP_64;
4209 else
4210 llvm_unreachable("Unknown AtomicCmpSwap type");
4211
4212 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4213 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4214 N->getOperand(0)};
4215 SDNode *CmpSwap = CurDAG->getMachineNode(
4216 Opcode, SDLoc(N),
4217 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4218
4219 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4220 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4221
4222 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4223 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4224 CurDAG->RemoveDeadNode(N);
4225
4226 return true;
4227}
4228
4229bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4230 SDValue &Shift) {
4231 if (!isa<ConstantSDNode>(N))
4232 return false;
4233
4234 SDLoc DL(N);
4235 uint64_t Val = cast<ConstantSDNode>(N)
4236 ->getAPIntValue()
4237 .trunc(VT.getFixedSizeInBits())
4238 .getZExtValue();
4239
4240 switch (VT.SimpleTy) {
4241 case MVT::i8:
4242 // All immediates are supported.
4243 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4244 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4245 return true;
4246 case MVT::i16:
4247 case MVT::i32:
4248 case MVT::i64:
4249 // Support 8bit unsigned immediates.
4250 if (Val <= 255) {
4251 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4252 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4253 return true;
4254 }
4255 // Support 16bit unsigned immediates that are a multiple of 256.
4256 if (Val <= 65280 && Val % 256 == 0) {
4257 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4258 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4259 return true;
4260 }
4261 break;
4262 default:
4263 break;
4264 }
4265
4266 return false;
4267}
4268
4269bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4270 SDValue &Imm, SDValue &Shift,
4271 bool Negate) {
4272 if (!isa<ConstantSDNode>(N))
4273 return false;
4274
4275 SDLoc DL(N);
4276 int64_t Val = cast<ConstantSDNode>(N)
4277 ->getAPIntValue()
4278 .trunc(VT.getFixedSizeInBits())
4279 .getSExtValue();
4280
4281 if (Negate)
4282 Val = -Val;
4283
4284 // Signed saturating instructions treat their immediate operand as unsigned,
4285 // whereas the related intrinsics define their operands to be signed. This
4286 // means we can only use the immediate form when the operand is non-negative.
4287 if (Val < 0)
4288 return false;
4289
4290 switch (VT.SimpleTy) {
4291 case MVT::i8:
4292 // All positive immediates are supported.
4293 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4294 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4295 return true;
4296 case MVT::i16:
4297 case MVT::i32:
4298 case MVT::i64:
4299 // Support 8bit positive immediates.
4300 if (Val <= 255) {
4301 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4302 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4303 return true;
4304 }
4305 // Support 16bit positive immediates that are a multiple of 256.
4306 if (Val <= 65280 && Val % 256 == 0) {
4307 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4308 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4309 return true;
4310 }
4311 break;
4312 default:
4313 break;
4314 }
4315
4316 return false;
4317}
4318
4319bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4320 SDValue &Shift) {
4321 if (!isa<ConstantSDNode>(N))
4322 return false;
4323
4324 SDLoc DL(N);
4325 int64_t Val = cast<ConstantSDNode>(N)
4326 ->getAPIntValue()
4327 .trunc(VT.getFixedSizeInBits())
4328 .getSExtValue();
4329
4330 switch (VT.SimpleTy) {
4331 case MVT::i8:
4332 // All immediates are supported.
4333 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4334 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4335 return true;
4336 case MVT::i16:
4337 case MVT::i32:
4338 case MVT::i64:
4339 // Support 8bit signed immediates.
4340 if (Val >= -128 && Val <= 127) {
4341 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4342 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4343 return true;
4344 }
4345 // Support 16bit signed immediates that are a multiple of 256.
4346 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4347 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4348 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4349 return true;
4350 }
4351 break;
4352 default:
4353 break;
4354 }
4355
4356 return false;
4357}
4358
4359bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4360 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4361 int64_t ImmVal = CNode->getSExtValue();
4362 SDLoc DL(N);
4363 if (ImmVal >= -128 && ImmVal < 128) {
4364 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4365 return true;
4366 }
4367 }
4368 return false;
4369}
4370
4371bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4372 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4373 uint64_t ImmVal = CNode->getZExtValue();
4374
4375 switch (VT.SimpleTy) {
4376 case MVT::i8:
4377 ImmVal &= 0xFF;
4378 break;
4379 case MVT::i16:
4380 ImmVal &= 0xFFFF;
4381 break;
4382 case MVT::i32:
4383 ImmVal &= 0xFFFFFFFF;
4384 break;
4385 case MVT::i64:
4386 break;
4387 default:
4388 llvm_unreachable("Unexpected type");
4389 }
4390
4391 if (ImmVal < 256) {
4392 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4393 return true;
4394 }
4395 }
4396 return false;
4397}
4398
4399bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4400 bool Invert) {
4401 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4402 uint64_t ImmVal = CNode->getZExtValue();
4403 SDLoc DL(N);
4404
4405 if (Invert)
4406 ImmVal = ~ImmVal;
4407
4408 // Shift mask depending on type size.
4409 switch (VT.SimpleTy) {
4410 case MVT::i8:
4411 ImmVal &= 0xFF;
4412 ImmVal |= ImmVal << 8;
4413 ImmVal |= ImmVal << 16;
4414 ImmVal |= ImmVal << 32;
4415 break;
4416 case MVT::i16:
4417 ImmVal &= 0xFFFF;
4418 ImmVal |= ImmVal << 16;
4419 ImmVal |= ImmVal << 32;
4420 break;
4421 case MVT::i32:
4422 ImmVal &= 0xFFFFFFFF;
4423 ImmVal |= ImmVal << 32;
4424 break;
4425 case MVT::i64:
4426 break;
4427 default:
4428 llvm_unreachable("Unexpected type");
4429 }
4430
4431 uint64_t encoding;
4432 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4433 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4434 return true;
4435 }
4436 }
4437 return false;
4438}
4439
4440// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4441// Rather than attempt to normalise everything we can sometimes saturate the
4442// shift amount during selection. This function also allows for consistent
4443// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4444// required by the instructions.
4445bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4446 uint64_t High, bool AllowSaturation,
4447 SDValue &Imm) {
4448 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4449 uint64_t ImmVal = CN->getZExtValue();
4450
4451 // Reject shift amounts that are too small.
4452 if (ImmVal < Low)
4453 return false;
4454
4455 // Reject or saturate shift amounts that are too big.
4456 if (ImmVal > High) {
4457 if (!AllowSaturation)
4458 return false;
4459 ImmVal = High;
4460 }
4461
4462 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4463 return true;
4464 }
4465
4466 return false;
4467}
4468
4469bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4470 // tagp(FrameIndex, IRGstack, tag_offset):
4471 // since the offset between FrameIndex and IRGstack is a compile-time
4472 // constant, this can be lowered to a single ADDG instruction.
4473 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4474 return false;
4475 }
4476
4477 SDValue IRG_SP = N->getOperand(2);
4478 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4479 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4480 return false;
4481 }
4482
4483 const TargetLowering *TLI = getTargetLowering();
4484 SDLoc DL(N);
4485 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4486 SDValue FiOp = CurDAG->getTargetFrameIndex(
4487 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4488 int TagOffset = N->getConstantOperandVal(3);
4489
4490 SDNode *Out = CurDAG->getMachineNode(
4491 AArch64::TAGPstack, DL, MVT::i64,
4492 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4493 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4494 ReplaceNode(N, Out);
4495 return true;
4496}
4497
4498void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4499 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4500 "llvm.aarch64.tagp third argument must be an immediate");
4501 if (trySelectStackSlotTagP(N))
4502 return;
4503 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4504 // compile-time constant, not just for stack allocations.
4505
4506 // General case for unrelated pointers in Op1 and Op2.
4507 SDLoc DL(N);
4508 int TagOffset = N->getConstantOperandVal(3);
4509 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4510 {N->getOperand(1), N->getOperand(2)});
4511 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4512 {SDValue(N1, 0), N->getOperand(2)});
4513 SDNode *N3 = CurDAG->getMachineNode(
4514 AArch64::ADDG, DL, MVT::i64,
4515 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4516 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4517 ReplaceNode(N, N3);
4518}
4519
4520bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4521 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4522
4523 // Bail when not a "cast" like insert_subvector.
4524 if (N->getConstantOperandVal(2) != 0)
4525 return false;
4526 if (!N->getOperand(0).isUndef())
4527 return false;
4528
4529 // Bail when normal isel should do the job.
4530 EVT VT = N->getValueType(0);
4531 EVT InVT = N->getOperand(1).getValueType();
4532 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4533 return false;
4534 if (InVT.getSizeInBits() <= 128)
4535 return false;
4536
4537 // NOTE: We can only get here when doing fixed length SVE code generation.
4538 // We do manual selection because the types involved are not linked to real
4539 // registers (despite being legal) and must be coerced into SVE registers.
4540
4542 "Expected to insert into a packed scalable vector!");
4543
4544 SDLoc DL(N);
4545 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4546 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4547 N->getOperand(1), RC));
4548 return true;
4549}
4550
4551bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4552 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4553
4554 // Bail when not a "cast" like extract_subvector.
4555 if (N->getConstantOperandVal(1) != 0)
4556 return false;
4557
4558 // Bail when normal isel can do the job.
4559 EVT VT = N->getValueType(0);
4560 EVT InVT = N->getOperand(0).getValueType();
4561 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4562 return false;
4563 if (VT.getSizeInBits() <= 128)
4564 return false;
4565
4566 // NOTE: We can only get here when doing fixed length SVE code generation.
4567 // We do manual selection because the types involved are not linked to real
4568 // registers (despite being legal) and must be coerced into SVE registers.
4569
4571 "Expected to extract from a packed scalable vector!");
4572
4573 SDLoc DL(N);
4574 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4575 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4576 N->getOperand(0), RC));
4577 return true;
4578}
4579
4580bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4581 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4582
4583 SDValue N0 = N->getOperand(0);
4584 SDValue N1 = N->getOperand(1);
4585
4586 EVT VT = N->getValueType(0);
4587 SDLoc DL(N);
4588
4589 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4590 // Rotate by a constant is a funnel shift in IR which is exanded to
4591 // an OR with shifted operands.
4592 // We do the following transform:
4593 // OR N0, N1 -> xar (x, y, imm)
4594 // Where:
4595 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4596 // N0 = SHL_PRED true, V, splat(bits-imm)
4597 // V = (xor x, y)
4598 if (VT.isScalableVector() &&
4599 (Subtarget->hasSVE2() ||
4600 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4601 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4602 N1.getOpcode() != AArch64ISD::SRL_PRED)
4603 std::swap(N0, N1);
4604 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4605 N1.getOpcode() != AArch64ISD::SRL_PRED)
4606 return false;
4607
4608 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4609 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4610 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4611 return false;
4612
4613 if (N0.getOperand(1) != N1.getOperand(1))
4614 return false;
4615
4616 SDValue R1, R2;
4617 bool IsXOROperand = true;
4618 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4619 IsXOROperand = false;
4620 } else {
4621 R1 = N0.getOperand(1).getOperand(0);
4622 R2 = N1.getOperand(1).getOperand(1);
4623 }
4624
4625 APInt ShlAmt, ShrAmt;
4626 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4628 return false;
4629
4630 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4631 return false;
4632
4633 if (!IsXOROperand) {
4634 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4635 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4636 SDValue MOVIV = SDValue(MOV, 0);
4637
4638 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4639 SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
4640 VT, Zero, MOVIV, ZSub);
4641
4642 R1 = N1->getOperand(1);
4643 R2 = SDValue(SubRegToReg, 0);
4644 }
4645
4646 SDValue Imm =
4647 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4648
4649 SDValue Ops[] = {R1, R2, Imm};
4650 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4651 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4652 AArch64::XAR_ZZZI_D})) {
4653 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4654 return true;
4655 }
4656 return false;
4657 }
4658
4659 // We have Neon SHA3 XAR operation for v2i64 but for types
4660 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4661 // is available.
4662 EVT SVT;
4663 switch (VT.getSimpleVT().SimpleTy) {
4664 case MVT::v4i32:
4665 case MVT::v2i32:
4666 SVT = MVT::nxv4i32;
4667 break;
4668 case MVT::v8i16:
4669 case MVT::v4i16:
4670 SVT = MVT::nxv8i16;
4671 break;
4672 case MVT::v16i8:
4673 case MVT::v8i8:
4674 SVT = MVT::nxv16i8;
4675 break;
4676 case MVT::v2i64:
4677 case MVT::v1i64:
4678 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4679 break;
4680 default:
4681 return false;
4682 }
4683
4684 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4685 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4686 return false;
4687
4688 if (N0->getOpcode() != AArch64ISD::VSHL ||
4689 N1->getOpcode() != AArch64ISD::VLSHR)
4690 return false;
4691
4692 if (N0->getOperand(0) != N1->getOperand(0))
4693 return false;
4694
4695 SDValue R1, R2;
4696 bool IsXOROperand = true;
4697 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4698 IsXOROperand = false;
4699 } else {
4700 SDValue XOR = N0.getOperand(0);
4701 R1 = XOR.getOperand(0);
4702 R2 = XOR.getOperand(1);
4703 }
4704
4705 unsigned HsAmt = N0.getConstantOperandVal(1);
4706 unsigned ShAmt = N1.getConstantOperandVal(1);
4707
4708 SDValue Imm = CurDAG->getTargetConstant(
4709 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4710
4711 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4712 if (ShAmt + HsAmt != VTSizeInBits)
4713 return false;
4714
4715 if (!IsXOROperand) {
4716 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4717 SDNode *MOV =
4718 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4719 SDValue MOVIV = SDValue(MOV, 0);
4720
4721 R1 = N1->getOperand(0);
4722 R2 = MOVIV;
4723 }
4724
4725 if (SVT != VT) {
4726 SDValue Undef =
4727 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4728
4729 if (SVT.isScalableVector() && VT.is64BitVector()) {
4730 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4731
4732 SDValue UndefQ = SDValue(
4733 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4734 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4735
4736 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4737 UndefQ, R1, DSub),
4738 0);
4739 if (R2.getValueType() == VT)
4740 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4741 UndefQ, R2, DSub),
4742 0);
4743 }
4744
4745 SDValue SubReg = CurDAG->getTargetConstant(
4746 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4747
4748 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4749 R1, SubReg),
4750 0);
4751
4752 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4753 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4754 Undef, R2, SubReg),
4755 0);
4756 }
4757
4758 SDValue Ops[] = {R1, R2, Imm};
4759 SDNode *XAR = nullptr;
4760
4761 if (SVT.isScalableVector()) {
4762 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4763 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4764 AArch64::XAR_ZZZI_D}))
4765 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4766 } else {
4767 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4768 }
4769
4770 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4771
4772 if (SVT != VT) {
4773 if (VT.is64BitVector() && SVT.isScalableVector()) {
4774 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4775
4776 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4777 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4778 SDValue(XAR, 0), ZSub);
4779
4780 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4781 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4782 SDValue(Q, 0), DSub);
4783 } else {
4784 SDValue SubReg = CurDAG->getTargetConstant(
4785 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4786 MVT::i32);
4787 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4788 SDValue(XAR, 0), SubReg);
4789 }
4790 }
4791 ReplaceNode(N, XAR);
4792 return true;
4793}
4794
4795void AArch64DAGToDAGISel::Select(SDNode *Node) {
4796 // If we have a custom node, we already have selected!
4797 if (Node->isMachineOpcode()) {
4798 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4799 Node->setNodeId(-1);
4800 return;
4801 }
4802
4803 // Few custom selection stuff.
4804 EVT VT = Node->getValueType(0);
4805
4806 switch (Node->getOpcode()) {
4807 default:
4808 break;
4809
4811 if (SelectCMP_SWAP(Node))
4812 return;
4813 break;
4814
4815 case ISD::READ_REGISTER:
4816 case AArch64ISD::MRRS:
4817 if (tryReadRegister(Node))
4818 return;
4819 break;
4820
4822 case AArch64ISD::MSRR:
4823 if (tryWriteRegister(Node))
4824 return;
4825 break;
4826
4827 case ISD::LOAD: {
4828 // Try to select as an indexed load. Fall through to normal processing
4829 // if we can't.
4830 if (tryIndexedLoad(Node))
4831 return;
4832 break;
4833 }
4834
4835 case ISD::SRL:
4836 case ISD::AND:
4837 case ISD::SRA:
4839 if (tryBitfieldExtractOp(Node))
4840 return;
4841 if (tryBitfieldInsertInZeroOp(Node))
4842 return;
4843 [[fallthrough]];
4844 case ISD::ROTR:
4845 case ISD::SHL:
4846 if (tryShiftAmountMod(Node))
4847 return;
4848 break;
4849
4850 case ISD::SIGN_EXTEND:
4851 if (tryBitfieldExtractOpFromSExt(Node))
4852 return;
4853 break;
4854
4855 case ISD::OR:
4856 if (tryBitfieldInsertOp(Node))
4857 return;
4858 if (trySelectXAR(Node))
4859 return;
4860 break;
4861
4863 if (trySelectCastScalableToFixedLengthVector(Node))
4864 return;
4865 break;
4866 }
4867
4868 case ISD::INSERT_SUBVECTOR: {
4869 if (trySelectCastFixedLengthToScalableVector(Node))
4870 return;
4871 break;
4872 }
4873
4874 case ISD::Constant: {
4875 // Materialize zero constants as copies from WZR/XZR. This allows
4876 // the coalescer to propagate these into other instructions.
4877 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4878 if (ConstNode->isZero()) {
4879 if (VT == MVT::i32) {
4880 SDValue New = CurDAG->getCopyFromReg(
4881 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4882 ReplaceNode(Node, New.getNode());
4883 return;
4884 } else if (VT == MVT::i64) {
4885 SDValue New = CurDAG->getCopyFromReg(
4886 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4887 ReplaceNode(Node, New.getNode());
4888 return;
4889 }
4890 }
4891 break;
4892 }
4893
4894 case ISD::FrameIndex: {
4895 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4896 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4897 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4898 const TargetLowering *TLI = getTargetLowering();
4899 SDValue TFI = CurDAG->getTargetFrameIndex(
4900 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4901 SDLoc DL(Node);
4902 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4903 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4904 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4905 return;
4906 }
4908 unsigned IntNo = Node->getConstantOperandVal(1);
4909 switch (IntNo) {
4910 default:
4911 break;
4912 case Intrinsic::aarch64_gcsss: {
4913 SDLoc DL(Node);
4914 SDValue Chain = Node->getOperand(0);
4915 SDValue Val = Node->getOperand(2);
4916 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4917 SDNode *SS1 =
4918 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4919 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4920 MVT::Other, Zero, SDValue(SS1, 0));
4921 ReplaceNode(Node, SS2);
4922 return;
4923 }
4924 case Intrinsic::aarch64_ldaxp:
4925 case Intrinsic::aarch64_ldxp: {
4926 unsigned Op =
4927 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4928 SDValue MemAddr = Node->getOperand(2);
4929 SDLoc DL(Node);
4930 SDValue Chain = Node->getOperand(0);
4931
4932 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4933 MVT::Other, MemAddr, Chain);
4934
4935 // Transfer memoperands.
4937 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4938 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4939 ReplaceNode(Node, Ld);
4940 return;
4941 }
4942 case Intrinsic::aarch64_stlxp:
4943 case Intrinsic::aarch64_stxp: {
4944 unsigned Op =
4945 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4946 SDLoc DL(Node);
4947 SDValue Chain = Node->getOperand(0);
4948 SDValue ValLo = Node->getOperand(2);
4949 SDValue ValHi = Node->getOperand(3);
4950 SDValue MemAddr = Node->getOperand(4);
4951
4952 // Place arguments in the right order.
4953 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4954
4955 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4956 // Transfer memoperands.
4958 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4959 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4960
4961 ReplaceNode(Node, St);
4962 return;
4963 }
4964 case Intrinsic::aarch64_neon_ld1x2:
4965 if (VT == MVT::v8i8) {
4966 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4967 return;
4968 } else if (VT == MVT::v16i8) {
4969 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4970 return;
4971 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4972 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4973 return;
4974 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4975 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4976 return;
4977 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4978 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4979 return;
4980 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4981 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4982 return;
4983 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4984 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4985 return;
4986 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4987 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4988 return;
4989 }
4990 break;
4991 case Intrinsic::aarch64_neon_ld1x3:
4992 if (VT == MVT::v8i8) {
4993 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4994 return;
4995 } else if (VT == MVT::v16i8) {
4996 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4997 return;
4998 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4999 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5000 return;
5001 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5002 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5003 return;
5004 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5005 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5006 return;
5007 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5008 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5009 return;
5010 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5011 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5012 return;
5013 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5014 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5015 return;
5016 }
5017 break;
5018 case Intrinsic::aarch64_neon_ld1x4:
5019 if (VT == MVT::v8i8) {
5020 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5021 return;
5022 } else if (VT == MVT::v16i8) {
5023 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5024 return;
5025 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5026 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5027 return;
5028 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5029 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5030 return;
5031 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5032 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5033 return;
5034 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5035 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5036 return;
5037 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5038 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5039 return;
5040 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5041 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5042 return;
5043 }
5044 break;
5045 case Intrinsic::aarch64_neon_ld2:
5046 if (VT == MVT::v8i8) {
5047 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5048 return;
5049 } else if (VT == MVT::v16i8) {
5050 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5051 return;
5052 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5053 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5054 return;
5055 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5056 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5057 return;
5058 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5059 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5060 return;
5061 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5062 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5063 return;
5064 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5065 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5066 return;
5067 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5068 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5069 return;
5070 }
5071 break;
5072 case Intrinsic::aarch64_neon_ld3:
5073 if (VT == MVT::v8i8) {
5074 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5075 return;
5076 } else if (VT == MVT::v16i8) {
5077 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5078 return;
5079 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5080 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5081 return;
5082 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5083 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5084 return;
5085 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5086 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5087 return;
5088 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5089 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5090 return;
5091 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5092 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5093 return;
5094 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5095 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5096 return;
5097 }
5098 break;
5099 case Intrinsic::aarch64_neon_ld4:
5100 if (VT == MVT::v8i8) {
5101 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5102 return;
5103 } else if (VT == MVT::v16i8) {
5104 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5105 return;
5106 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5107 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5108 return;
5109 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5110 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5111 return;
5112 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5113 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5114 return;
5115 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5116 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5117 return;
5118 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5119 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5120 return;
5121 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5122 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5123 return;
5124 }
5125 break;
5126 case Intrinsic::aarch64_neon_ld2r:
5127 if (VT == MVT::v8i8) {
5128 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5129 return;
5130 } else if (VT == MVT::v16i8) {
5131 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5132 return;
5133 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5134 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5135 return;
5136 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5137 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5138 return;
5139 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5140 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5141 return;
5142 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5143 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5144 return;
5145 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5146 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5147 return;
5148 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5149 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5150 return;
5151 }
5152 break;
5153 case Intrinsic::aarch64_neon_ld3r:
5154 if (VT == MVT::v8i8) {
5155 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5156 return;
5157 } else if (VT == MVT::v16i8) {
5158 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5159 return;
5160 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5161 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5162 return;
5163 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5164 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5165 return;
5166 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5167 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5168 return;
5169 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5170 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5171 return;
5172 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5173 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5174 return;
5175 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5176 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5177 return;
5178 }
5179 break;
5180 case Intrinsic::aarch64_neon_ld4r:
5181 if (VT == MVT::v8i8) {
5182 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5183 return;
5184 } else if (VT == MVT::v16i8) {
5185 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5186 return;
5187 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5188 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5189 return;
5190 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5191 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5192 return;
5193 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5194 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5195 return;
5196 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5197 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5198 return;
5199 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5200 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5201 return;
5202 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5203 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5204 return;
5205 }
5206 break;
5207 case Intrinsic::aarch64_neon_ld2lane:
5208 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5209 SelectLoadLane(Node, 2, AArch64::LD2i8);
5210 return;
5211 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5212 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5213 SelectLoadLane(Node, 2, AArch64::LD2i16);
5214 return;
5215 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5216 VT == MVT::v2f32) {
5217 SelectLoadLane(Node, 2, AArch64::LD2i32);
5218 return;
5219 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5220 VT == MVT::v1f64) {
5221 SelectLoadLane(Node, 2, AArch64::LD2i64);
5222 return;
5223 }
5224 break;
5225 case Intrinsic::aarch64_neon_ld3lane:
5226 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5227 SelectLoadLane(Node, 3, AArch64::LD3i8);
5228 return;
5229 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5230 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5231 SelectLoadLane(Node, 3, AArch64::LD3i16);
5232 return;
5233 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5234 VT == MVT::v2f32) {
5235 SelectLoadLane(Node, 3, AArch64::LD3i32);
5236 return;
5237 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5238 VT == MVT::v1f64) {
5239 SelectLoadLane(Node, 3, AArch64::LD3i64);
5240 return;
5241 }
5242 break;
5243 case Intrinsic::aarch64_neon_ld4lane:
5244 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5245 SelectLoadLane(Node, 4, AArch64::LD4i8);
5246 return;
5247 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5248 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5249 SelectLoadLane(Node, 4, AArch64::LD4i16);
5250 return;
5251 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5252 VT == MVT::v2f32) {
5253 SelectLoadLane(Node, 4, AArch64::LD4i32);
5254 return;
5255 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5256 VT == MVT::v1f64) {
5257 SelectLoadLane(Node, 4, AArch64::LD4i64);
5258 return;
5259 }
5260 break;
5261 case Intrinsic::aarch64_ld64b:
5262 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5263 return;
5264 case Intrinsic::aarch64_sve_ld2q_sret: {
5265 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5266 return;
5267 }
5268 case Intrinsic::aarch64_sve_ld3q_sret: {
5269 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5270 return;
5271 }
5272 case Intrinsic::aarch64_sve_ld4q_sret: {
5273 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5274 return;
5275 }
5276 case Intrinsic::aarch64_sve_ld2_sret: {
5277 if (VT == MVT::nxv16i8) {
5278 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5279 true);
5280 return;
5281 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5282 VT == MVT::nxv8bf16) {
5283 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5284 true);
5285 return;
5286 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5287 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5288 true);
5289 return;
5290 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5291 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5292 true);
5293 return;
5294 }
5295 break;
5296 }
5297 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5298 if (VT == MVT::nxv16i8) {
5299 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5300 SelectContiguousMultiVectorLoad(
5301 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5302 else if (Subtarget->hasSVE2p1())
5303 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5304 AArch64::LD1B_2Z);
5305 else
5306 break;
5307 return;
5308 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5309 VT == MVT::nxv8bf16) {
5310 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5311 SelectContiguousMultiVectorLoad(
5312 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5313 else if (Subtarget->hasSVE2p1())
5314 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5315 AArch64::LD1H_2Z);
5316 else
5317 break;
5318 return;
5319 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5320 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5321 SelectContiguousMultiVectorLoad(
5322 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5323 else if (Subtarget->hasSVE2p1())
5324 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5325 AArch64::LD1W_2Z);
5326 else
5327 break;
5328 return;
5329 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5330 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5331 SelectContiguousMultiVectorLoad(
5332 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5333 else if (Subtarget->hasSVE2p1())
5334 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5335 AArch64::LD1D_2Z);
5336 else
5337 break;
5338 return;
5339 }
5340 break;
5341 }
5342 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5343 if (VT == MVT::nxv16i8) {
5344 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5345 SelectContiguousMultiVectorLoad(
5346 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5347 else if (Subtarget->hasSVE2p1())
5348 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5349 AArch64::LD1B_4Z);
5350 else
5351 break;
5352 return;
5353 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5354 VT == MVT::nxv8bf16) {
5355 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5356 SelectContiguousMultiVectorLoad(
5357 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5358 else if (Subtarget->hasSVE2p1())
5359 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5360 AArch64::LD1H_4Z);
5361 else
5362 break;
5363 return;
5364 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5365 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5366 SelectContiguousMultiVectorLoad(
5367 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5368 else if (Subtarget->hasSVE2p1())
5369 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5370 AArch64::LD1W_4Z);
5371 else
5372 break;
5373 return;
5374 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5375 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5376 SelectContiguousMultiVectorLoad(
5377 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5378 else if (Subtarget->hasSVE2p1())
5379 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5380 AArch64::LD1D_4Z);
5381 else
5382 break;
5383 return;
5384 }
5385 break;
5386 }
5387 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5388 if (VT == MVT::nxv16i8) {
5389 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5390 SelectContiguousMultiVectorLoad(Node, 2, 0,
5391 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5392 AArch64::LDNT1B_2Z_PSEUDO);
5393 else if (Subtarget->hasSVE2p1())
5394 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5395 AArch64::LDNT1B_2Z);
5396 else
5397 break;
5398 return;
5399 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5400 VT == MVT::nxv8bf16) {
5401 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5402 SelectContiguousMultiVectorLoad(Node, 2, 1,
5403 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5404 AArch64::LDNT1H_2Z_PSEUDO);
5405 else if (Subtarget->hasSVE2p1())
5406 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5407 AArch64::LDNT1H_2Z);
5408 else
5409 break;
5410 return;
5411 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5412 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5413 SelectContiguousMultiVectorLoad(Node, 2, 2,
5414 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5415 AArch64::LDNT1W_2Z_PSEUDO);
5416 else if (Subtarget->hasSVE2p1())
5417 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5418 AArch64::LDNT1W_2Z);
5419 else
5420 break;
5421 return;
5422 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5423 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5424 SelectContiguousMultiVectorLoad(Node, 2, 3,
5425 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5426 AArch64::LDNT1D_2Z_PSEUDO);
5427 else if (Subtarget->hasSVE2p1())
5428 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5429 AArch64::LDNT1D_2Z);
5430 else
5431 break;
5432 return;
5433 }
5434 break;
5435 }
5436 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5437 if (VT == MVT::nxv16i8) {
5438 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5439 SelectContiguousMultiVectorLoad(Node, 4, 0,
5440 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5441 AArch64::LDNT1B_4Z_PSEUDO);
5442 else if (Subtarget->hasSVE2p1())
5443 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5444 AArch64::LDNT1B_4Z);
5445 else
5446 break;
5447 return;
5448 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5449 VT == MVT::nxv8bf16) {
5450 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5451 SelectContiguousMultiVectorLoad(Node, 4, 1,
5452 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5453 AArch64::LDNT1H_4Z_PSEUDO);
5454 else if (Subtarget->hasSVE2p1())
5455 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5456 AArch64::LDNT1H_4Z);
5457 else
5458 break;
5459 return;
5460 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5461 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5462 SelectContiguousMultiVectorLoad(Node, 4, 2,
5463 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5464 AArch64::LDNT1W_4Z_PSEUDO);
5465 else if (Subtarget->hasSVE2p1())
5466 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5467 AArch64::LDNT1W_4Z);
5468 else
5469 break;
5470 return;
5471 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5472 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5473 SelectContiguousMultiVectorLoad(Node, 4, 3,
5474 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5475 AArch64::LDNT1D_4Z_PSEUDO);
5476 else if (Subtarget->hasSVE2p1())
5477 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5478 AArch64::LDNT1D_4Z);
5479 else
5480 break;
5481 return;
5482 }
5483 break;
5484 }
5485 case Intrinsic::aarch64_sve_ld3_sret: {
5486 if (VT == MVT::nxv16i8) {
5487 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5488 true);
5489 return;
5490 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5491 VT == MVT::nxv8bf16) {
5492 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5493 true);
5494 return;
5495 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5496 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5497 true);
5498 return;
5499 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5500 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5501 true);
5502 return;
5503 }
5504 break;
5505 }
5506 case Intrinsic::aarch64_sve_ld4_sret: {
5507 if (VT == MVT::nxv16i8) {
5508 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5509 true);
5510 return;
5511 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5512 VT == MVT::nxv8bf16) {
5513 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5514 true);
5515 return;
5516 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5517 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5518 true);
5519 return;
5520 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5521 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5522 true);
5523 return;
5524 }
5525 break;
5526 }
5527 case Intrinsic::aarch64_sme_read_hor_vg2: {
5528 if (VT == MVT::nxv16i8) {
5529 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5530 AArch64::MOVA_2ZMXI_H_B);
5531 return;
5532 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5533 VT == MVT::nxv8bf16) {
5534 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5535 AArch64::MOVA_2ZMXI_H_H);
5536 return;
5537 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5538 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5539 AArch64::MOVA_2ZMXI_H_S);
5540 return;
5541 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5542 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5543 AArch64::MOVA_2ZMXI_H_D);
5544 return;
5545 }
5546 break;
5547 }
5548 case Intrinsic::aarch64_sme_read_ver_vg2: {
5549 if (VT == MVT::nxv16i8) {
5550 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5551 AArch64::MOVA_2ZMXI_V_B);
5552 return;
5553 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5554 VT == MVT::nxv8bf16) {
5555 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5556 AArch64::MOVA_2ZMXI_V_H);
5557 return;
5558 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5559 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5560 AArch64::MOVA_2ZMXI_V_S);
5561 return;
5562 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5563 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5564 AArch64::MOVA_2ZMXI_V_D);
5565 return;
5566 }
5567 break;
5568 }
5569 case Intrinsic::aarch64_sme_read_hor_vg4: {
5570 if (VT == MVT::nxv16i8) {
5571 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5572 AArch64::MOVA_4ZMXI_H_B);
5573 return;
5574 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5575 VT == MVT::nxv8bf16) {
5576 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5577 AArch64::MOVA_4ZMXI_H_H);
5578 return;
5579 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5580 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5581 AArch64::MOVA_4ZMXI_H_S);
5582 return;
5583 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5584 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5585 AArch64::MOVA_4ZMXI_H_D);
5586 return;
5587 }
5588 break;
5589 }
5590 case Intrinsic::aarch64_sme_read_ver_vg4: {
5591 if (VT == MVT::nxv16i8) {
5592 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5593 AArch64::MOVA_4ZMXI_V_B);
5594 return;
5595 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5596 VT == MVT::nxv8bf16) {
5597 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5598 AArch64::MOVA_4ZMXI_V_H);
5599 return;
5600 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5601 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5602 AArch64::MOVA_4ZMXI_V_S);
5603 return;
5604 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5605 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5606 AArch64::MOVA_4ZMXI_V_D);
5607 return;
5608 }
5609 break;
5610 }
5611 case Intrinsic::aarch64_sme_read_vg1x2: {
5612 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5613 AArch64::MOVA_VG2_2ZMXI);
5614 return;
5615 }
5616 case Intrinsic::aarch64_sme_read_vg1x4: {
5617 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5618 AArch64::MOVA_VG4_4ZMXI);
5619 return;
5620 }
5621 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5622 if (VT == MVT::nxv16i8) {
5623 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5624 return;
5625 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5626 VT == MVT::nxv8bf16) {
5627 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5628 return;
5629 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5630 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5631 return;
5632 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5633 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5634 return;
5635 }
5636 break;
5637 }
5638 case Intrinsic::aarch64_sme_readz_vert_x2: {
5639 if (VT == MVT::nxv16i8) {
5640 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5641 return;
5642 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5643 VT == MVT::nxv8bf16) {
5644 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5645 return;
5646 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5647 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5648 return;
5649 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5650 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5651 return;
5652 }
5653 break;
5654 }
5655 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5656 if (VT == MVT::nxv16i8) {
5657 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5658 return;
5659 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5660 VT == MVT::nxv8bf16) {
5661 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5662 return;
5663 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5664 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5665 return;
5666 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5667 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5668 return;
5669 }
5670 break;
5671 }
5672 case Intrinsic::aarch64_sme_readz_vert_x4: {
5673 if (VT == MVT::nxv16i8) {
5674 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5675 return;
5676 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5677 VT == MVT::nxv8bf16) {
5678 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5679 return;
5680 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5681 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5682 return;
5683 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5684 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5685 return;
5686 }
5687 break;
5688 }
5689 case Intrinsic::aarch64_sme_readz_x2: {
5690 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5691 AArch64::ZA);
5692 return;
5693 }
5694 case Intrinsic::aarch64_sme_readz_x4: {
5695 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5696 AArch64::ZA);
5697 return;
5698 }
5699 case Intrinsic::swift_async_context_addr: {
5700 SDLoc DL(Node);
5701 SDValue Chain = Node->getOperand(0);
5702 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5703 SDValue Res = SDValue(
5704 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5705 CurDAG->getTargetConstant(8, DL, MVT::i32),
5706 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5707 0);
5708 ReplaceUses(SDValue(Node, 0), Res);
5709 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5710 CurDAG->RemoveDeadNode(Node);
5711
5712 auto &MF = CurDAG->getMachineFunction();
5713 MF.getFrameInfo().setFrameAddressIsTaken(true);
5714 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5715 return;
5716 }
5717 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5718 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5719 Node->getValueType(0),
5720 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5721 AArch64::LUTI2_4ZTZI_S}))
5722 // Second Immediate must be <= 3:
5723 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5724 return;
5725 }
5726 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5727 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5728 Node->getValueType(0),
5729 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5730 // Second Immediate must be <= 1:
5731 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5732 return;
5733 }
5734 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5735 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5736 Node->getValueType(0),
5737 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5738 AArch64::LUTI2_2ZTZI_S}))
5739 // Second Immediate must be <= 7:
5740 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5741 return;
5742 }
5743 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5744 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5745 Node->getValueType(0),
5746 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5747 AArch64::LUTI4_2ZTZI_S}))
5748 // Second Immediate must be <= 3:
5749 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5750 return;
5751 }
5752 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5753 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5754 return;
5755 }
5756 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5757 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5758 Node->getValueType(0),
5759 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5760 SelectCVTIntrinsicFP8(Node, 2, Opc);
5761 return;
5762 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5763 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5764 Node->getValueType(0),
5765 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5766 SelectCVTIntrinsicFP8(Node, 2, Opc);
5767 return;
5768 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5769 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5770 Node->getValueType(0),
5771 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5772 SelectCVTIntrinsicFP8(Node, 2, Opc);
5773 return;
5774 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5775 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5776 Node->getValueType(0),
5777 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5778 SelectCVTIntrinsicFP8(Node, 2, Opc);
5779 return;
5780 }
5781 } break;
5783 unsigned IntNo = Node->getConstantOperandVal(0);
5784 switch (IntNo) {
5785 default:
5786 break;
5787 case Intrinsic::aarch64_tagp:
5788 SelectTagP(Node);
5789 return;
5790
5791 case Intrinsic::ptrauth_auth:
5792 SelectPtrauthAuth(Node);
5793 return;
5794
5795 case Intrinsic::ptrauth_resign:
5796 SelectPtrauthResign(Node);
5797 return;
5798
5799 case Intrinsic::aarch64_neon_tbl2:
5800 SelectTable(Node, 2,
5801 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5802 false);
5803 return;
5804 case Intrinsic::aarch64_neon_tbl3:
5805 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5806 : AArch64::TBLv16i8Three,
5807 false);
5808 return;
5809 case Intrinsic::aarch64_neon_tbl4:
5810 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5811 : AArch64::TBLv16i8Four,
5812 false);
5813 return;
5814 case Intrinsic::aarch64_neon_tbx2:
5815 SelectTable(Node, 2,
5816 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5817 true);
5818 return;
5819 case Intrinsic::aarch64_neon_tbx3:
5820 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5821 : AArch64::TBXv16i8Three,
5822 true);
5823 return;
5824 case Intrinsic::aarch64_neon_tbx4:
5825 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5826 : AArch64::TBXv16i8Four,
5827 true);
5828 return;
5829 case Intrinsic::aarch64_sve_srshl_single_x2:
5830 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5831 Node->getValueType(0),
5832 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5833 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5834 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5835 return;
5836 case Intrinsic::aarch64_sve_srshl_single_x4:
5837 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5838 Node->getValueType(0),
5839 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5840 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5841 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5842 return;
5843 case Intrinsic::aarch64_sve_urshl_single_x2:
5844 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5845 Node->getValueType(0),
5846 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5847 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5848 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5849 return;
5850 case Intrinsic::aarch64_sve_urshl_single_x4:
5851 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5852 Node->getValueType(0),
5853 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5854 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5855 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5856 return;
5857 case Intrinsic::aarch64_sve_srshl_x2:
5858 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5859 Node->getValueType(0),
5860 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5861 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5862 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5863 return;
5864 case Intrinsic::aarch64_sve_srshl_x4:
5865 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5866 Node->getValueType(0),
5867 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5868 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5869 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5870 return;
5871 case Intrinsic::aarch64_sve_urshl_x2:
5872 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5873 Node->getValueType(0),
5874 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5875 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5876 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5877 return;
5878 case Intrinsic::aarch64_sve_urshl_x4:
5879 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5880 Node->getValueType(0),
5881 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5882 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5883 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5884 return;
5885 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5886 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5887 Node->getValueType(0),
5888 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5889 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5890 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5891 return;
5892 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5893 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5894 Node->getValueType(0),
5895 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5896 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5897 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5898 return;
5899 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5900 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5901 Node->getValueType(0),
5902 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5903 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5904 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5905 return;
5906 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5907 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5908 Node->getValueType(0),
5909 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5910 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5911 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5912 return;
5913 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5914 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5915 Node->getValueType(0),
5916 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5917 AArch64::FSCALE_2ZZ_D}))
5918 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5919 return;
5920 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5921 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5922 Node->getValueType(0),
5923 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5924 AArch64::FSCALE_4ZZ_D}))
5925 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5926 return;
5927 case Intrinsic::aarch64_sme_fp8_scale_x2:
5928 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5929 Node->getValueType(0),
5930 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5931 AArch64::FSCALE_2Z2Z_D}))
5932 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5933 return;
5934 case Intrinsic::aarch64_sme_fp8_scale_x4:
5935 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5936 Node->getValueType(0),
5937 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5938 AArch64::FSCALE_4Z4Z_D}))
5939 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5940 return;
5941 case Intrinsic::aarch64_sve_whilege_x2:
5942 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5943 Node->getValueType(0),
5944 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5945 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5946 SelectWhilePair(Node, Op);
5947 return;
5948 case Intrinsic::aarch64_sve_whilegt_x2:
5949 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5950 Node->getValueType(0),
5951 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5952 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5953 SelectWhilePair(Node, Op);
5954 return;
5955 case Intrinsic::aarch64_sve_whilehi_x2:
5956 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5957 Node->getValueType(0),
5958 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5959 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5960 SelectWhilePair(Node, Op);
5961 return;
5962 case Intrinsic::aarch64_sve_whilehs_x2:
5963 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5964 Node->getValueType(0),
5965 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5966 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5967 SelectWhilePair(Node, Op);
5968 return;
5969 case Intrinsic::aarch64_sve_whilele_x2:
5970 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5971 Node->getValueType(0),
5972 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5973 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5974 SelectWhilePair(Node, Op);
5975 return;
5976 case Intrinsic::aarch64_sve_whilelo_x2:
5977 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5978 Node->getValueType(0),
5979 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5980 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5981 SelectWhilePair(Node, Op);
5982 return;
5983 case Intrinsic::aarch64_sve_whilels_x2:
5984 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5985 Node->getValueType(0),
5986 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5987 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5988 SelectWhilePair(Node, Op);
5989 return;
5990 case Intrinsic::aarch64_sve_whilelt_x2:
5991 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5992 Node->getValueType(0),
5993 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5994 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5995 SelectWhilePair(Node, Op);
5996 return;
5997 case Intrinsic::aarch64_sve_smax_single_x2:
5998 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5999 Node->getValueType(0),
6000 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6001 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6002 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6003 return;
6004 case Intrinsic::aarch64_sve_umax_single_x2:
6005 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6006 Node->getValueType(0),
6007 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6008 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6009 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6010 return;
6011 case Intrinsic::aarch64_sve_fmax_single_x2:
6012 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6013 Node->getValueType(0),
6014 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6015 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6016 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6017 return;
6018 case Intrinsic::aarch64_sve_smax_single_x4:
6019 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6020 Node->getValueType(0),
6021 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6022 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6023 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6024 return;
6025 case Intrinsic::aarch64_sve_umax_single_x4:
6026 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6027 Node->getValueType(0),
6028 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6029 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6030 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6031 return;
6032 case Intrinsic::aarch64_sve_fmax_single_x4:
6033 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6034 Node->getValueType(0),
6035 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6036 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6037 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6038 return;
6039 case Intrinsic::aarch64_sve_smin_single_x2:
6040 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6041 Node->getValueType(0),
6042 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6043 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6044 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6045 return;
6046 case Intrinsic::aarch64_sve_umin_single_x2:
6047 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6048 Node->getValueType(0),
6049 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6050 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6051 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6052 return;
6053 case Intrinsic::aarch64_sve_fmin_single_x2:
6054 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6055 Node->getValueType(0),
6056 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6057 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6058 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6059 return;
6060 case Intrinsic::aarch64_sve_smin_single_x4:
6061 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6062 Node->getValueType(0),
6063 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6064 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6065 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6066 return;
6067 case Intrinsic::aarch64_sve_umin_single_x4:
6068 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6069 Node->getValueType(0),
6070 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6071 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6072 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6073 return;
6074 case Intrinsic::aarch64_sve_fmin_single_x4:
6075 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6076 Node->getValueType(0),
6077 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6078 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6079 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6080 return;
6081 case Intrinsic::aarch64_sve_smax_x2:
6082 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6083 Node->getValueType(0),
6084 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6085 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6086 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6087 return;
6088 case Intrinsic::aarch64_sve_umax_x2:
6089 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6090 Node->getValueType(0),
6091 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6092 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6093 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6094 return;
6095 case Intrinsic::aarch64_sve_fmax_x2:
6096 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6097 Node->getValueType(0),
6098 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6099 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6100 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6101 return;
6102 case Intrinsic::aarch64_sve_smax_x4:
6103 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6104 Node->getValueType(0),
6105 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6106 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6107 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6108 return;
6109 case Intrinsic::aarch64_sve_umax_x4:
6110 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6111 Node->getValueType(0),
6112 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6113 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6114 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6115 return;
6116 case Intrinsic::aarch64_sve_fmax_x4:
6117 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6118 Node->getValueType(0),
6119 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6120 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6121 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6122 return;
6123 case Intrinsic::aarch64_sme_famax_x2:
6124 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6125 Node->getValueType(0),
6126 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6127 AArch64::FAMAX_2Z2Z_D}))
6128 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6129 return;
6130 case Intrinsic::aarch64_sme_famax_x4:
6131 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6132 Node->getValueType(0),
6133 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6134 AArch64::FAMAX_4Z4Z_D}))
6135 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6136 return;
6137 case Intrinsic::aarch64_sme_famin_x2:
6138 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6139 Node->getValueType(0),
6140 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6141 AArch64::FAMIN_2Z2Z_D}))
6142 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6143 return;
6144 case Intrinsic::aarch64_sme_famin_x4:
6145 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6146 Node->getValueType(0),
6147 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6148 AArch64::FAMIN_4Z4Z_D}))
6149 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6150 return;
6151 case Intrinsic::aarch64_sve_smin_x2:
6152 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6153 Node->getValueType(0),
6154 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6155 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6156 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6157 return;
6158 case Intrinsic::aarch64_sve_umin_x2:
6159 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6160 Node->getValueType(0),
6161 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6162 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6163 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6164 return;
6165 case Intrinsic::aarch64_sve_fmin_x2:
6166 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6167 Node->getValueType(0),
6168 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6169 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6170 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6171 return;
6172 case Intrinsic::aarch64_sve_smin_x4:
6173 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6174 Node->getValueType(0),
6175 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6176 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6177 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6178 return;
6179 case Intrinsic::aarch64_sve_umin_x4:
6180 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6181 Node->getValueType(0),
6182 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6183 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6184 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6185 return;
6186 case Intrinsic::aarch64_sve_fmin_x4:
6187 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6188 Node->getValueType(0),
6189 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6190 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6191 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6192 return;
6193 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6194 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6195 Node->getValueType(0),
6196 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6197 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6198 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6199 return;
6200 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6201 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6202 Node->getValueType(0),
6203 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6204 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6205 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6206 return;
6207 case Intrinsic::aarch64_sve_fminnm_single_x2:
6208 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6209 Node->getValueType(0),
6210 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6211 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6212 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6213 return;
6214 case Intrinsic::aarch64_sve_fminnm_single_x4:
6215 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6216 Node->getValueType(0),
6217 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6218 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6219 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6220 return;
6221 case Intrinsic::aarch64_sve_fmaxnm_x2:
6222 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6223 Node->getValueType(0),
6224 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6225 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6226 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6227 return;
6228 case Intrinsic::aarch64_sve_fmaxnm_x4:
6229 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6230 Node->getValueType(0),
6231 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6232 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6233 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6234 return;
6235 case Intrinsic::aarch64_sve_fminnm_x2:
6236 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6237 Node->getValueType(0),
6238 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6239 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6240 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6241 return;
6242 case Intrinsic::aarch64_sve_fminnm_x4:
6243 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6244 Node->getValueType(0),
6245 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6246 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6247 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6248 return;
6249 case Intrinsic::aarch64_sve_fcvtzs_x2:
6250 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6251 return;
6252 case Intrinsic::aarch64_sve_scvtf_x2:
6253 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6254 return;
6255 case Intrinsic::aarch64_sve_fcvtzu_x2:
6256 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6257 return;
6258 case Intrinsic::aarch64_sve_ucvtf_x2:
6259 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6260 return;
6261 case Intrinsic::aarch64_sve_fcvtzs_x4:
6262 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6263 return;
6264 case Intrinsic::aarch64_sve_scvtf_x4:
6265 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6266 return;
6267 case Intrinsic::aarch64_sve_fcvtzu_x4:
6268 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6269 return;
6270 case Intrinsic::aarch64_sve_ucvtf_x4:
6271 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6272 return;
6273 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6274 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6275 return;
6276 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6277 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6278 return;
6279 case Intrinsic::aarch64_sve_sclamp_single_x2:
6280 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6281 Node->getValueType(0),
6282 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6283 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6284 SelectClamp(Node, 2, Op);
6285 return;
6286 case Intrinsic::aarch64_sve_uclamp_single_x2:
6287 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6288 Node->getValueType(0),
6289 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6290 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6291 SelectClamp(Node, 2, Op);
6292 return;
6293 case Intrinsic::aarch64_sve_fclamp_single_x2:
6294 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6295 Node->getValueType(0),
6296 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6297 AArch64::FCLAMP_VG2_2Z2Z_D}))
6298 SelectClamp(Node, 2, Op);
6299 return;
6300 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6301 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6302 return;
6303 case Intrinsic::aarch64_sve_sclamp_single_x4:
6304 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6305 Node->getValueType(0),
6306 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6307 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6308 SelectClamp(Node, 4, Op);
6309 return;
6310 case Intrinsic::aarch64_sve_uclamp_single_x4:
6311 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6312 Node->getValueType(0),
6313 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6314 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6315 SelectClamp(Node, 4, Op);
6316 return;
6317 case Intrinsic::aarch64_sve_fclamp_single_x4:
6318 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6319 Node->getValueType(0),
6320 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6321 AArch64::FCLAMP_VG4_4Z4Z_D}))
6322 SelectClamp(Node, 4, Op);
6323 return;
6324 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6325 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6326 return;
6327 case Intrinsic::aarch64_sve_add_single_x2:
6328 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6329 Node->getValueType(0),
6330 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6331 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6332 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6333 return;
6334 case Intrinsic::aarch64_sve_add_single_x4:
6335 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6336 Node->getValueType(0),
6337 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6338 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6339 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6340 return;
6341 case Intrinsic::aarch64_sve_zip_x2:
6342 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6343 Node->getValueType(0),
6344 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6345 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6346 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6347 return;
6348 case Intrinsic::aarch64_sve_zipq_x2:
6349 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6350 AArch64::ZIP_VG2_2ZZZ_Q);
6351 return;
6352 case Intrinsic::aarch64_sve_zip_x4:
6353 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6354 Node->getValueType(0),
6355 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6356 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6357 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6358 return;
6359 case Intrinsic::aarch64_sve_zipq_x4:
6360 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6361 AArch64::ZIP_VG4_4Z4Z_Q);
6362 return;
6363 case Intrinsic::aarch64_sve_uzp_x2:
6364 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6365 Node->getValueType(0),
6366 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6367 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6368 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6369 return;
6370 case Intrinsic::aarch64_sve_uzpq_x2:
6371 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6372 AArch64::UZP_VG2_2ZZZ_Q);
6373 return;
6374 case Intrinsic::aarch64_sve_uzp_x4:
6375 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6376 Node->getValueType(0),
6377 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6378 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6379 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6380 return;
6381 case Intrinsic::aarch64_sve_uzpq_x4:
6382 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6383 AArch64::UZP_VG4_4Z4Z_Q);
6384 return;
6385 case Intrinsic::aarch64_sve_sel_x2:
6386 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6387 Node->getValueType(0),
6388 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6389 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6390 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6391 return;
6392 case Intrinsic::aarch64_sve_sel_x4:
6393 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6394 Node->getValueType(0),
6395 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6396 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6397 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6398 return;
6399 case Intrinsic::aarch64_sve_frinta_x2:
6400 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6401 return;
6402 case Intrinsic::aarch64_sve_frinta_x4:
6403 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6404 return;
6405 case Intrinsic::aarch64_sve_frintm_x2:
6406 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6407 return;
6408 case Intrinsic::aarch64_sve_frintm_x4:
6409 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6410 return;
6411 case Intrinsic::aarch64_sve_frintn_x2:
6412 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6413 return;
6414 case Intrinsic::aarch64_sve_frintn_x4:
6415 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6416 return;
6417 case Intrinsic::aarch64_sve_frintp_x2:
6418 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6419 return;
6420 case Intrinsic::aarch64_sve_frintp_x4:
6421 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6422 return;
6423 case Intrinsic::aarch64_sve_sunpk_x2:
6424 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6425 Node->getValueType(0),
6426 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6427 AArch64::SUNPK_VG2_2ZZ_D}))
6428 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6429 return;
6430 case Intrinsic::aarch64_sve_uunpk_x2:
6431 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6432 Node->getValueType(0),
6433 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6434 AArch64::UUNPK_VG2_2ZZ_D}))
6435 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6436 return;
6437 case Intrinsic::aarch64_sve_sunpk_x4:
6438 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6439 Node->getValueType(0),
6440 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6441 AArch64::SUNPK_VG4_4Z2Z_D}))
6442 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6443 return;
6444 case Intrinsic::aarch64_sve_uunpk_x4:
6445 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6446 Node->getValueType(0),
6447 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6448 AArch64::UUNPK_VG4_4Z2Z_D}))
6449 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6450 return;
6451 case Intrinsic::aarch64_sve_pext_x2: {
6452 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6453 Node->getValueType(0),
6454 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6455 AArch64::PEXT_2PCI_D}))
6456 SelectPExtPair(Node, Op);
6457 return;
6458 }
6459 }
6460 break;
6461 }
6462 case ISD::INTRINSIC_VOID: {
6463 unsigned IntNo = Node->getConstantOperandVal(1);
6464 if (Node->getNumOperands() >= 3)
6465 VT = Node->getOperand(2)->getValueType(0);
6466 switch (IntNo) {
6467 default:
6468 break;
6469 case Intrinsic::aarch64_neon_st1x2: {
6470 if (VT == MVT::v8i8) {
6471 SelectStore(Node, 2, AArch64::ST1Twov8b);
6472 return;
6473 } else if (VT == MVT::v16i8) {
6474 SelectStore(Node, 2, AArch64::ST1Twov16b);
6475 return;
6476 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6477 VT == MVT::v4bf16) {
6478 SelectStore(Node, 2, AArch64::ST1Twov4h);
6479 return;
6480 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6481 VT == MVT::v8bf16) {
6482 SelectStore(Node, 2, AArch64::ST1Twov8h);
6483 return;
6484 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6485 SelectStore(Node, 2, AArch64::ST1Twov2s);
6486 return;
6487 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6488 SelectStore(Node, 2, AArch64::ST1Twov4s);
6489 return;
6490 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6491 SelectStore(Node, 2, AArch64::ST1Twov2d);
6492 return;
6493 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6494 SelectStore(Node, 2, AArch64::ST1Twov1d);
6495 return;
6496 }
6497 break;
6498 }
6499 case Intrinsic::aarch64_neon_st1x3: {
6500 if (VT == MVT::v8i8) {
6501 SelectStore(Node, 3, AArch64::ST1Threev8b);
6502 return;
6503 } else if (VT == MVT::v16i8) {
6504 SelectStore(Node, 3, AArch64::ST1Threev16b);
6505 return;
6506 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6507 VT == MVT::v4bf16) {
6508 SelectStore(Node, 3, AArch64::ST1Threev4h);
6509 return;
6510 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6511 VT == MVT::v8bf16) {
6512 SelectStore(Node, 3, AArch64::ST1Threev8h);
6513 return;
6514 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6515 SelectStore(Node, 3, AArch64::ST1Threev2s);
6516 return;
6517 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6518 SelectStore(Node, 3, AArch64::ST1Threev4s);
6519 return;
6520 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6521 SelectStore(Node, 3, AArch64::ST1Threev2d);
6522 return;
6523 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6524 SelectStore(Node, 3, AArch64::ST1Threev1d);
6525 return;
6526 }
6527 break;
6528 }
6529 case Intrinsic::aarch64_neon_st1x4: {
6530 if (VT == MVT::v8i8) {
6531 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6532 return;
6533 } else if (VT == MVT::v16i8) {
6534 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6535 return;
6536 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6537 VT == MVT::v4bf16) {
6538 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6539 return;
6540 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6541 VT == MVT::v8bf16) {
6542 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6543 return;
6544 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6545 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6546 return;
6547 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6548 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6549 return;
6550 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6551 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6552 return;
6553 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6554 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6555 return;
6556 }
6557 break;
6558 }
6559 case Intrinsic::aarch64_neon_st2: {
6560 if (VT == MVT::v8i8) {
6561 SelectStore(Node, 2, AArch64::ST2Twov8b);
6562 return;
6563 } else if (VT == MVT::v16i8) {
6564 SelectStore(Node, 2, AArch64::ST2Twov16b);
6565 return;
6566 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6567 VT == MVT::v4bf16) {
6568 SelectStore(Node, 2, AArch64::ST2Twov4h);
6569 return;
6570 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6571 VT == MVT::v8bf16) {
6572 SelectStore(Node, 2, AArch64::ST2Twov8h);
6573 return;
6574 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6575 SelectStore(Node, 2, AArch64::ST2Twov2s);
6576 return;
6577 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6578 SelectStore(Node, 2, AArch64::ST2Twov4s);
6579 return;
6580 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6581 SelectStore(Node, 2, AArch64::ST2Twov2d);
6582 return;
6583 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6584 SelectStore(Node, 2, AArch64::ST1Twov1d);
6585 return;
6586 }
6587 break;
6588 }
6589 case Intrinsic::aarch64_neon_st3: {
6590 if (VT == MVT::v8i8) {
6591 SelectStore(Node, 3, AArch64::ST3Threev8b);
6592 return;
6593 } else if (VT == MVT::v16i8) {
6594 SelectStore(Node, 3, AArch64::ST3Threev16b);
6595 return;
6596 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6597 VT == MVT::v4bf16) {
6598 SelectStore(Node, 3, AArch64::ST3Threev4h);
6599 return;
6600 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6601 VT == MVT::v8bf16) {
6602 SelectStore(Node, 3, AArch64::ST3Threev8h);
6603 return;
6604 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6605 SelectStore(Node, 3, AArch64::ST3Threev2s);
6606 return;
6607 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6608 SelectStore(Node, 3, AArch64::ST3Threev4s);
6609 return;
6610 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6611 SelectStore(Node, 3, AArch64::ST3Threev2d);
6612 return;
6613 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6614 SelectStore(Node, 3, AArch64::ST1Threev1d);
6615 return;
6616 }
6617 break;
6618 }
6619 case Intrinsic::aarch64_neon_st4: {
6620 if (VT == MVT::v8i8) {
6621 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6622 return;
6623 } else if (VT == MVT::v16i8) {
6624 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6625 return;
6626 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6627 VT == MVT::v4bf16) {
6628 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6629 return;
6630 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6631 VT == MVT::v8bf16) {
6632 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6633 return;
6634 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6635 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6636 return;
6637 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6638 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6639 return;
6640 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6641 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6642 return;
6643 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6644 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6645 return;
6646 }
6647 break;
6648 }
6649 case Intrinsic::aarch64_neon_st2lane: {
6650 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6651 SelectStoreLane(Node, 2, AArch64::ST2i8);
6652 return;
6653 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6654 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6655 SelectStoreLane(Node, 2, AArch64::ST2i16);
6656 return;
6657 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6658 VT == MVT::v2f32) {
6659 SelectStoreLane(Node, 2, AArch64::ST2i32);
6660 return;
6661 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6662 VT == MVT::v1f64) {
6663 SelectStoreLane(Node, 2, AArch64::ST2i64);
6664 return;
6665 }
6666 break;
6667 }
6668 case Intrinsic::aarch64_neon_st3lane: {
6669 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6670 SelectStoreLane(Node, 3, AArch64::ST3i8);
6671 return;
6672 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6673 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6674 SelectStoreLane(Node, 3, AArch64::ST3i16);
6675 return;
6676 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6677 VT == MVT::v2f32) {
6678 SelectStoreLane(Node, 3, AArch64::ST3i32);
6679 return;
6680 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6681 VT == MVT::v1f64) {
6682 SelectStoreLane(Node, 3, AArch64::ST3i64);
6683 return;
6684 }
6685 break;
6686 }
6687 case Intrinsic::aarch64_neon_st4lane: {
6688 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6689 SelectStoreLane(Node, 4, AArch64::ST4i8);
6690 return;
6691 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6692 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6693 SelectStoreLane(Node, 4, AArch64::ST4i16);
6694 return;
6695 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6696 VT == MVT::v2f32) {
6697 SelectStoreLane(Node, 4, AArch64::ST4i32);
6698 return;
6699 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6700 VT == MVT::v1f64) {
6701 SelectStoreLane(Node, 4, AArch64::ST4i64);
6702 return;
6703 }
6704 break;
6705 }
6706 case Intrinsic::aarch64_sve_st2q: {
6707 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6708 return;
6709 }
6710 case Intrinsic::aarch64_sve_st3q: {
6711 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6712 return;
6713 }
6714 case Intrinsic::aarch64_sve_st4q: {
6715 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6716 return;
6717 }
6718 case Intrinsic::aarch64_sve_st2: {
6719 if (VT == MVT::nxv16i8) {
6720 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6721 return;
6722 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6723 VT == MVT::nxv8bf16) {
6724 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6725 return;
6726 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6727 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6728 return;
6729 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6730 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6731 return;
6732 }
6733 break;
6734 }
6735 case Intrinsic::aarch64_sve_st3: {
6736 if (VT == MVT::nxv16i8) {
6737 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6738 return;
6739 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6740 VT == MVT::nxv8bf16) {
6741 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6742 return;
6743 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6744 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6745 return;
6746 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6747 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6748 return;
6749 }
6750 break;
6751 }
6752 case Intrinsic::aarch64_sve_st4: {
6753 if (VT == MVT::nxv16i8) {
6754 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6755 return;
6756 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6757 VT == MVT::nxv8bf16) {
6758 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6759 return;
6760 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6761 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6762 return;
6763 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6764 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6765 return;
6766 }
6767 break;
6768 }
6769 }
6770 break;
6771 }
6772 case AArch64ISD::LD2post: {
6773 if (VT == MVT::v8i8) {
6774 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6775 return;
6776 } else if (VT == MVT::v16i8) {
6777 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6778 return;
6779 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6780 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6781 return;
6782 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6783 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6784 return;
6785 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6786 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6787 return;
6788 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6789 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6790 return;
6791 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6792 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6793 return;
6794 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6795 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6796 return;
6797 }
6798 break;
6799 }
6800 case AArch64ISD::LD3post: {
6801 if (VT == MVT::v8i8) {
6802 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6803 return;
6804 } else if (VT == MVT::v16i8) {
6805 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6806 return;
6807 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6808 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6809 return;
6810 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6811 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6812 return;
6813 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6814 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6815 return;
6816 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6817 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6818 return;
6819 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6820 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6821 return;
6822 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6823 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6824 return;
6825 }
6826 break;
6827 }
6828 case AArch64ISD::LD4post: {
6829 if (VT == MVT::v8i8) {
6830 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6831 return;
6832 } else if (VT == MVT::v16i8) {
6833 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6834 return;
6835 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6836 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6837 return;
6838 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6839 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6840 return;
6841 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6842 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6843 return;
6844 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6845 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6846 return;
6847 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6848 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6849 return;
6850 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6851 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6852 return;
6853 }
6854 break;
6855 }
6856 case AArch64ISD::LD1x2post: {
6857 if (VT == MVT::v8i8) {
6858 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6859 return;
6860 } else if (VT == MVT::v16i8) {
6861 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6862 return;
6863 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6864 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6865 return;
6866 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6867 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6868 return;
6869 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6870 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6871 return;
6872 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6873 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6874 return;
6875 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6876 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6877 return;
6878 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6879 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6880 return;
6881 }
6882 break;
6883 }
6884 case AArch64ISD::LD1x3post: {
6885 if (VT == MVT::v8i8) {
6886 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6887 return;
6888 } else if (VT == MVT::v16i8) {
6889 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6890 return;
6891 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6892 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6893 return;
6894 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6895 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6896 return;
6897 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6898 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6899 return;
6900 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6901 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6902 return;
6903 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6904 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6905 return;
6906 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6907 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6908 return;
6909 }
6910 break;
6911 }
6912 case AArch64ISD::LD1x4post: {
6913 if (VT == MVT::v8i8) {
6914 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6915 return;
6916 } else if (VT == MVT::v16i8) {
6917 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6918 return;
6919 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6920 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6921 return;
6922 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6923 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6924 return;
6925 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6926 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6927 return;
6928 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6929 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6930 return;
6931 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6932 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6933 return;
6934 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6935 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6936 return;
6937 }
6938 break;
6939 }
6940 case AArch64ISD::LD1DUPpost: {
6941 if (VT == MVT::v8i8) {
6942 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6943 return;
6944 } else if (VT == MVT::v16i8) {
6945 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6946 return;
6947 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6948 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6949 return;
6950 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6951 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6952 return;
6953 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6954 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6955 return;
6956 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6957 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6958 return;
6959 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6960 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6961 return;
6962 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6963 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6964 return;
6965 }
6966 break;
6967 }
6968 case AArch64ISD::LD2DUPpost: {
6969 if (VT == MVT::v8i8) {
6970 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6971 return;
6972 } else if (VT == MVT::v16i8) {
6973 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6974 return;
6975 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6976 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6977 return;
6978 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6979 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6980 return;
6981 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6982 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6983 return;
6984 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6985 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6986 return;
6987 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6988 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6989 return;
6990 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6991 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6992 return;
6993 }
6994 break;
6995 }
6996 case AArch64ISD::LD3DUPpost: {
6997 if (VT == MVT::v8i8) {
6998 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6999 return;
7000 } else if (VT == MVT::v16i8) {
7001 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7002 return;
7003 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7004 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7005 return;
7006 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7007 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7008 return;
7009 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7010 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7011 return;
7012 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7013 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7014 return;
7015 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7016 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7017 return;
7018 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7019 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7020 return;
7021 }
7022 break;
7023 }
7024 case AArch64ISD::LD4DUPpost: {
7025 if (VT == MVT::v8i8) {
7026 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7027 return;
7028 } else if (VT == MVT::v16i8) {
7029 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7030 return;
7031 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7032 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7033 return;
7034 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7035 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7036 return;
7037 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7038 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7039 return;
7040 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7041 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7042 return;
7043 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7044 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7045 return;
7046 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7047 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7048 return;
7049 }
7050 break;
7051 }
7052 case AArch64ISD::LD1LANEpost: {
7053 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7054 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7055 return;
7056 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7057 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7058 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7059 return;
7060 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7061 VT == MVT::v2f32) {
7062 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7063 return;
7064 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7065 VT == MVT::v1f64) {
7066 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7067 return;
7068 }
7069 break;
7070 }
7071 case AArch64ISD::LD2LANEpost: {
7072 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7073 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7074 return;
7075 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7076 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7077 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7078 return;
7079 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7080 VT == MVT::v2f32) {
7081 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7082 return;
7083 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7084 VT == MVT::v1f64) {
7085 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7086 return;
7087 }
7088 break;
7089 }
7090 case AArch64ISD::LD3LANEpost: {
7091 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7092 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7093 return;
7094 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7095 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7096 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7097 return;
7098 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7099 VT == MVT::v2f32) {
7100 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7101 return;
7102 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7103 VT == MVT::v1f64) {
7104 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7105 return;
7106 }
7107 break;
7108 }
7109 case AArch64ISD::LD4LANEpost: {
7110 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7111 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7112 return;
7113 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7114 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7115 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7116 return;
7117 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7118 VT == MVT::v2f32) {
7119 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7120 return;
7121 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7122 VT == MVT::v1f64) {
7123 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7124 return;
7125 }
7126 break;
7127 }
7128 case AArch64ISD::ST2post: {
7129 VT = Node->getOperand(1).getValueType();
7130 if (VT == MVT::v8i8) {
7131 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7132 return;
7133 } else if (VT == MVT::v16i8) {
7134 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7135 return;
7136 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7137 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7138 return;
7139 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7140 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7141 return;
7142 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7143 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7144 return;
7145 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7146 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7147 return;
7148 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7149 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7150 return;
7151 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7152 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7153 return;
7154 }
7155 break;
7156 }
7157 case AArch64ISD::ST3post: {
7158 VT = Node->getOperand(1).getValueType();
7159 if (VT == MVT::v8i8) {
7160 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7161 return;
7162 } else if (VT == MVT::v16i8) {
7163 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7164 return;
7165 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7166 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7167 return;
7168 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7169 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7170 return;
7171 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7172 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7173 return;
7174 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7175 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7176 return;
7177 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7178 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7179 return;
7180 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7181 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7182 return;
7183 }
7184 break;
7185 }
7186 case AArch64ISD::ST4post: {
7187 VT = Node->getOperand(1).getValueType();
7188 if (VT == MVT::v8i8) {
7189 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7190 return;
7191 } else if (VT == MVT::v16i8) {
7192 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7193 return;
7194 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7195 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7196 return;
7197 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7198 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7199 return;
7200 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7201 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7202 return;
7203 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7204 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7205 return;
7206 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7207 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7208 return;
7209 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7210 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7211 return;
7212 }
7213 break;
7214 }
7215 case AArch64ISD::ST1x2post: {
7216 VT = Node->getOperand(1).getValueType();
7217 if (VT == MVT::v8i8) {
7218 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7219 return;
7220 } else if (VT == MVT::v16i8) {
7221 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7222 return;
7223 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7224 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7225 return;
7226 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7227 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7228 return;
7229 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7230 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7231 return;
7232 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7233 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7234 return;
7235 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7236 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7237 return;
7238 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7239 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7240 return;
7241 }
7242 break;
7243 }
7244 case AArch64ISD::ST1x3post: {
7245 VT = Node->getOperand(1).getValueType();
7246 if (VT == MVT::v8i8) {
7247 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7248 return;
7249 } else if (VT == MVT::v16i8) {
7250 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7251 return;
7252 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7253 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7254 return;
7255 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7256 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7257 return;
7258 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7259 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7260 return;
7261 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7262 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7263 return;
7264 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7265 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7266 return;
7267 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7268 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7269 return;
7270 }
7271 break;
7272 }
7273 case AArch64ISD::ST1x4post: {
7274 VT = Node->getOperand(1).getValueType();
7275 if (VT == MVT::v8i8) {
7276 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7277 return;
7278 } else if (VT == MVT::v16i8) {
7279 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7280 return;
7281 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7282 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7283 return;
7284 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7285 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7286 return;
7287 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7288 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7289 return;
7290 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7291 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7292 return;
7293 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7294 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7295 return;
7296 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7297 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7298 return;
7299 }
7300 break;
7301 }
7302 case AArch64ISD::ST2LANEpost: {
7303 VT = Node->getOperand(1).getValueType();
7304 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7305 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7306 return;
7307 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7308 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7309 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7310 return;
7311 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7312 VT == MVT::v2f32) {
7313 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7314 return;
7315 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7316 VT == MVT::v1f64) {
7317 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7318 return;
7319 }
7320 break;
7321 }
7322 case AArch64ISD::ST3LANEpost: {
7323 VT = Node->getOperand(1).getValueType();
7324 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7325 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7326 return;
7327 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7328 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7329 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7330 return;
7331 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7332 VT == MVT::v2f32) {
7333 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7334 return;
7335 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7336 VT == MVT::v1f64) {
7337 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7338 return;
7339 }
7340 break;
7341 }
7342 case AArch64ISD::ST4LANEpost: {
7343 VT = Node->getOperand(1).getValueType();
7344 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7345 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7346 return;
7347 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7348 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7349 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7350 return;
7351 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7352 VT == MVT::v2f32) {
7353 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7354 return;
7355 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7356 VT == MVT::v1f64) {
7357 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7358 return;
7359 }
7360 break;
7361 }
7362 }
7363
7364 // Select the default instruction
7365 SelectCode(Node);
7366}
7367
7368/// createAArch64ISelDag - This pass converts a legalized DAG into a
7369/// AArch64-specific DAG, ready for instruction scheduling.
7371 CodeGenOptLevel OptLevel) {
7372 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7373}
7374
7375/// When \p PredVT is a scalable vector predicate in the form
7376/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7377/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7378/// structured vectors (NumVec >1), the output data type is
7379/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7380/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7381/// EVT.
7383 unsigned NumVec) {
7384 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7385 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7386 return EVT();
7387
7388 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7389 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7390 return EVT();
7391
7392 ElementCount EC = PredVT.getVectorElementCount();
7393 EVT ScalarVT =
7394 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7395 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7396
7397 return MemVT;
7398}
7399
7400/// Return the EVT of the data associated to a memory operation in \p
7401/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7403 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7404 return MemIntr->getMemoryVT();
7405
7406 if (isa<MemSDNode>(Root)) {
7407 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7408
7409 EVT DataVT;
7410 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7411 DataVT = Load->getValueType(0);
7412 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7413 DataVT = Load->getValueType(0);
7414 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7415 DataVT = Store->getValue().getValueType();
7416 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7417 DataVT = Store->getValue().getValueType();
7418 else
7419 llvm_unreachable("Unexpected MemSDNode!");
7420
7421 return DataVT.changeVectorElementType(MemVT.getVectorElementType());
7422 }
7423
7424 const unsigned Opcode = Root->getOpcode();
7425 // For custom ISD nodes, we have to look at them individually to extract the
7426 // type of the data moved to/from memory.
7427 switch (Opcode) {
7428 case AArch64ISD::LD1_MERGE_ZERO:
7429 case AArch64ISD::LD1S_MERGE_ZERO:
7430 case AArch64ISD::LDNF1_MERGE_ZERO:
7431 case AArch64ISD::LDNF1S_MERGE_ZERO:
7432 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7433 case AArch64ISD::ST1_PRED:
7434 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7435 default:
7436 break;
7437 }
7438
7439 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7440 return EVT();
7441
7442 switch (Root->getConstantOperandVal(1)) {
7443 default:
7444 return EVT();
7445 case Intrinsic::aarch64_sme_ldr:
7446 case Intrinsic::aarch64_sme_str:
7447 return MVT::nxv16i8;
7448 case Intrinsic::aarch64_sve_prf:
7449 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7450 // width of the predicate.
7452 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7453 case Intrinsic::aarch64_sve_ld2_sret:
7454 case Intrinsic::aarch64_sve_ld2q_sret:
7456 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7457 case Intrinsic::aarch64_sve_st2q:
7459 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7460 case Intrinsic::aarch64_sve_ld3_sret:
7461 case Intrinsic::aarch64_sve_ld3q_sret:
7463 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7464 case Intrinsic::aarch64_sve_st3q:
7466 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7467 case Intrinsic::aarch64_sve_ld4_sret:
7468 case Intrinsic::aarch64_sve_ld4q_sret:
7470 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7471 case Intrinsic::aarch64_sve_st4q:
7473 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7474 case Intrinsic::aarch64_sve_ld1udq:
7475 case Intrinsic::aarch64_sve_st1dq:
7476 return EVT(MVT::nxv1i64);
7477 case Intrinsic::aarch64_sve_ld1uwq:
7478 case Intrinsic::aarch64_sve_st1wq:
7479 return EVT(MVT::nxv1i32);
7480 }
7481}
7482
7483/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7484/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7485/// where Root is the memory access using N for its address.
7486template <int64_t Min, int64_t Max>
7487bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7488 SDValue &Base,
7489 SDValue &OffImm) {
7490 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7491 const DataLayout &DL = CurDAG->getDataLayout();
7492 const MachineFrameInfo &MFI = MF->getFrameInfo();
7493
7494 if (N.getOpcode() == ISD::FrameIndex) {
7495 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7496 // We can only encode VL scaled offsets, so only fold in frame indexes
7497 // referencing SVE objects.
7499 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7500 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7501 return true;
7502 }
7503
7504 return false;
7505 }
7506
7507 if (MemVT == EVT())
7508 return false;
7509
7510 if (N.getOpcode() != ISD::ADD)
7511 return false;
7512
7513 SDValue VScale = N.getOperand(1);
7514 int64_t MulImm = std::numeric_limits<int64_t>::max();
7515 if (VScale.getOpcode() == ISD::VSCALE) {
7516 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7517 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7518 int64_t ByteOffset = C->getSExtValue();
7519 const auto KnownVScale =
7520 Subtarget->getSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
7521
7522 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7523 return false;
7524
7525 MulImm = ByteOffset / KnownVScale;
7526 } else
7527 return false;
7528
7529 TypeSize TS = MemVT.getSizeInBits();
7530 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7531
7532 if ((MulImm % MemWidthBytes) != 0)
7533 return false;
7534
7535 int64_t Offset = MulImm / MemWidthBytes;
7536 if (Offset < Min || Offset > Max)
7537 return false;
7538
7539 Base = N.getOperand(0);
7540 if (Base.getOpcode() == ISD::FrameIndex) {
7541 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7542 // We can only encode VL scaled offsets, so only fold in frame indexes
7543 // referencing SVE objects.
7545 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7546 }
7547
7548 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7549 return true;
7550}
7551
7552/// Select register plus register addressing mode for SVE, with scaled
7553/// offset.
7554bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7555 SDValue &Base,
7556 SDValue &Offset) {
7557 if (N.getOpcode() != ISD::ADD)
7558 return false;
7559
7560 // Process an ADD node.
7561 const SDValue LHS = N.getOperand(0);
7562 const SDValue RHS = N.getOperand(1);
7563
7564 // 8 bit data does not come with the SHL node, so it is treated
7565 // separately.
7566 if (Scale == 0) {
7567 Base = LHS;
7568 Offset = RHS;
7569 return true;
7570 }
7571
7572 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7573 int64_t ImmOff = C->getSExtValue();
7574 unsigned Size = 1 << Scale;
7575
7576 // To use the reg+reg addressing mode, the immediate must be a multiple of
7577 // the vector element's byte size.
7578 if (ImmOff % Size)
7579 return false;
7580
7581 SDLoc DL(N);
7582 Base = LHS;
7583 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7584 SDValue Ops[] = {Offset};
7585 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7586 Offset = SDValue(MI, 0);
7587 return true;
7588 }
7589
7590 // Check if the RHS is a shift node with a constant.
7591 if (RHS.getOpcode() != ISD::SHL)
7592 return false;
7593
7594 const SDValue ShiftRHS = RHS.getOperand(1);
7595 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7596 if (C->getZExtValue() == Scale) {
7597 Base = LHS;
7598 Offset = RHS.getOperand(0);
7599 return true;
7600 }
7601
7602 return false;
7603}
7604
7605bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7606 const AArch64TargetLowering *TLI =
7607 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7608
7609 return TLI->isAllActivePredicate(*CurDAG, N);
7610}
7611
7612bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7613 EVT VT = N.getValueType();
7614 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7615}
7616
7617bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7619 unsigned Scale) {
7620 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7621 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7622 int64_t ImmOff = C->getSExtValue();
7623 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7624 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7625 }
7626 return SDValue();
7627 };
7628
7629 if (SDValue C = MatchConstantOffset(N)) {
7630 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7631 Offset = C;
7632 return true;
7633 }
7634
7635 // Try to untangle an ADD node into a 'reg + offset'
7636 if (CurDAG->isBaseWithConstantOffset(N)) {
7637 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7638 Base = N.getOperand(0);
7639 Offset = C;
7640 return true;
7641 }
7642 }
7643
7644 // By default, just match reg + 0.
7645 Base = N;
7646 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7647 return true;
7648}
7649
7650bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7651 SDValue &Imm) {
7653 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7654 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7655 // Check conservatively if the immediate fits the valid range [0, 64).
7656 // Immediate variants for GE and HS definitely need to be decremented
7657 // when lowering the pseudos later, so an immediate of 1 would become 0.
7658 // For the inverse conditions LT and LO we don't know for sure if they
7659 // will need a decrement but should the decision be made to reverse the
7660 // branch condition, we again end up with the need to decrement.
7661 // The same argument holds for LE, LS, GT and HI and possibly
7662 // incremented immediates. This can lead to slightly less optimal
7663 // codegen, e.g. we never codegen the legal case
7664 // cblt w0, #63, A
7665 // because we could end up with the illegal case
7666 // cbge w0, #64, B
7667 // should the decision to reverse the branch direction be made. For the
7668 // lower bound cases this is no problem since we can express comparisons
7669 // against 0 with either tbz/tnbz or using wzr/xzr.
7670 uint64_t LowerBound = 0, UpperBound = 64;
7671 switch (CC) {
7672 case AArch64CC::GE:
7673 case AArch64CC::HS:
7674 case AArch64CC::LT:
7675 case AArch64CC::LO:
7676 LowerBound = 1;
7677 break;
7678 case AArch64CC::LE:
7679 case AArch64CC::LS:
7680 case AArch64CC::GT:
7681 case AArch64CC::HI:
7682 UpperBound = 63;
7683 break;
7684 default:
7685 break;
7686 }
7687
7688 if (CN->getAPIntValue().uge(LowerBound) &&
7689 CN->getAPIntValue().ult(UpperBound)) {
7690 SDLoc DL(N);
7691 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7692 return true;
7693 }
7694 }
7695
7696 return false;
7697}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
#define PASS_NAME
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
#define DEBUG_TYPE
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
uint64_t Size
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:205
support::ulittle16_t & Hi
Definition: aarch32.cpp:204
DEMANGLE_DUMP_METHOD void dump() const
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition: APFloat.cpp:5999
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1332
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1670
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1033
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1598
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1452
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:510
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
iterator begin() const
Definition: ArrayRef.h:135
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:459
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
size_t size() const
Definition: SmallVector.h:79
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:710
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:953
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:587
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1351
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ FrameIndex
Definition: ISDOpcodes.h:90
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ WRITE_REGISTER
Definition: ISDOpcodes.h:135
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1347
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:663
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:225
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:134
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1448
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1358
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:236
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:62
@ AssertZext
Definition: ISDOpcodes.h:63
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1634
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1665
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:477
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:260
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:276
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:282
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1987
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:270
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:82
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:858
#define N
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:458
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:44
Matching combinators.