LLVM 22.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
75 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
76 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
77 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
78 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
79 return SelectShiftedRegister(N, false, Reg, Shift);
80 }
81 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, true, Reg, Shift);
83 }
84 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
85 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
86 }
87 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
98 }
99 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexed(N, 1, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 2, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 4, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 8, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 16, Base, OffImm);
119 }
120 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
134 }
135 template <unsigned Size, unsigned Max>
136 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
137 // Test if there is an appropriate addressing mode and check if the
138 // immediate fits.
139 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
140 if (Found) {
141 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
142 int64_t C = CI->getSExtValue();
143 if (C <= Max)
144 return true;
145 }
146 }
147
148 // Otherwise, base only, materialize address in register.
149 Base = N;
150 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
151 return true;
152 }
153
154 template<int Width>
155 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
156 SDValue &SignExtend, SDValue &DoShift) {
157 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
158 }
159
160 template<int Width>
161 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
162 SDValue &SignExtend, SDValue &DoShift) {
163 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
164 }
165
166 bool SelectExtractHigh(SDValue N, SDValue &Res) {
167 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
168 N = N->getOperand(0);
169 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
170 !isa<ConstantSDNode>(N->getOperand(1)))
171 return false;
172 EVT VT = N->getValueType(0);
173 EVT LVT = N->getOperand(0).getValueType();
174 unsigned Index = N->getConstantOperandVal(1);
175 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
176 Index != VT.getVectorNumElements())
177 return false;
178 Res = N->getOperand(0);
179 return true;
180 }
181
182 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
183 if (N.getOpcode() != AArch64ISD::VLSHR)
184 return false;
185 SDValue Op = N->getOperand(0);
186 EVT VT = Op.getValueType();
187 unsigned ShtAmt = N->getConstantOperandVal(1);
188 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
189 return false;
190
191 APInt Imm;
192 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
193 Imm = APInt(VT.getScalarSizeInBits(),
194 Op.getOperand(1).getConstantOperandVal(0)
195 << Op.getOperand(1).getConstantOperandVal(1));
196 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
197 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
198 Imm = APInt(VT.getScalarSizeInBits(),
199 Op.getOperand(1).getConstantOperandVal(0));
200 else
201 return false;
202
203 if (Imm != 1ULL << (ShtAmt - 1))
204 return false;
205
206 Res1 = Op.getOperand(0);
207 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
208 return true;
209 }
210
211 bool SelectDupZeroOrUndef(SDValue N) {
212 switch(N->getOpcode()) {
213 case ISD::UNDEF:
214 return true;
215 case AArch64ISD::DUP:
216 case ISD::SPLAT_VECTOR: {
217 auto Opnd0 = N->getOperand(0);
218 if (isNullConstant(Opnd0))
219 return true;
220 if (isNullFPConstant(Opnd0))
221 return true;
222 break;
223 }
224 default:
225 break;
226 }
227
228 return false;
229 }
230
231 bool SelectAny(SDValue) { return true; }
232
233 bool SelectDupZero(SDValue N) {
234 switch(N->getOpcode()) {
235 case AArch64ISD::DUP:
236 case ISD::SPLAT_VECTOR: {
237 auto Opnd0 = N->getOperand(0);
238 if (isNullConstant(Opnd0))
239 return true;
240 if (isNullFPConstant(Opnd0))
241 return true;
242 break;
243 }
244 }
245
246 return false;
247 }
248
249 template <MVT::SimpleValueType VT, bool Negate>
250 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
251 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
252 }
253
254 template <MVT::SimpleValueType VT, bool Negate>
255 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
257 }
258
259 template <MVT::SimpleValueType VT>
260 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVECpyDupImm(N, VT, Imm, Shift);
262 }
263
264 template <MVT::SimpleValueType VT, bool Invert = false>
265 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
266 return SelectSVELogicalImm(N, VT, Imm, Invert);
267 }
268
269 template <MVT::SimpleValueType VT>
270 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
271 return SelectSVEArithImm(N, VT, Imm);
272 }
273
274 template <unsigned Low, unsigned High, bool AllowSaturation = false>
275 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
276 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
277 }
278
279 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
280 if (N->getOpcode() != ISD::SPLAT_VECTOR)
281 return false;
282
283 EVT EltVT = N->getValueType(0).getVectorElementType();
284 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
285 /* High */ EltVT.getFixedSizeInBits(),
286 /* AllowSaturation */ true, Imm);
287 }
288
289 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
290 template<signed Min, signed Max, signed Scale, bool Shift>
291 bool SelectCntImm(SDValue N, SDValue &Imm) {
292 if (!isa<ConstantSDNode>(N))
293 return false;
294
295 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
296 if (Shift)
297 MulImm = 1LL << MulImm;
298
299 if ((MulImm % std::abs(Scale)) != 0)
300 return false;
301
302 MulImm /= Scale;
303 if ((MulImm >= Min) && (MulImm <= Max)) {
304 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
305 return true;
306 }
307
308 return false;
309 }
310
311 template <signed Max, signed Scale>
312 bool SelectEXTImm(SDValue N, SDValue &Imm) {
313 if (!isa<ConstantSDNode>(N))
314 return false;
315
316 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
317
318 if (MulImm >= 0 && MulImm <= Max) {
319 MulImm *= Scale;
320 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
321 return true;
322 }
323
324 return false;
325 }
326
327 template <unsigned BaseReg, unsigned Max>
328 bool ImmToReg(SDValue N, SDValue &Imm) {
329 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
330 uint64_t C = CI->getZExtValue();
331
332 if (C > Max)
333 return false;
334
335 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
336 return true;
337 }
338 return false;
339 }
340
341 /// Form sequences of consecutive 64/128-bit registers for use in NEON
342 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
343 /// between 1 and 4 elements. If it contains a single element that is returned
344 /// unchanged; otherwise a REG_SEQUENCE value is returned.
347 // Form a sequence of SVE registers for instructions using list of vectors,
348 // e.g. structured loads and stores (ldN, stN).
349 SDValue createZTuple(ArrayRef<SDValue> Vecs);
350
351 // Similar to above, except the register must start at a multiple of the
352 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
353 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
354
355 /// Generic helper for the createDTuple/createQTuple
356 /// functions. Those should almost always be called instead.
357 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
358 const unsigned SubRegs[]);
359
360 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
361
362 bool tryIndexedLoad(SDNode *N);
363
364 void SelectPtrauthAuth(SDNode *N);
365 void SelectPtrauthResign(SDNode *N);
366
367 bool trySelectStackSlotTagP(SDNode *N);
368 void SelectTagP(SDNode *N);
369
370 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
371 unsigned SubRegIdx);
372 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
373 unsigned SubRegIdx);
374 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
375 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
376 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
377 unsigned Opc_rr, unsigned Opc_ri,
378 bool IsIntr = false);
379 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
380 unsigned Scale, unsigned Opc_ri,
381 unsigned Opc_rr);
382 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
383 bool IsZmMulti, unsigned Opcode,
384 bool HasPred = false);
385 void SelectPExtPair(SDNode *N, unsigned Opc);
386 void SelectWhilePair(SDNode *N, unsigned Opc);
387 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
388 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
389 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
390 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
391 bool IsTupleInput, unsigned Opc);
392 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
393
394 template <unsigned MaxIdx, unsigned Scale>
395 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
396 unsigned Op);
397 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
398 unsigned Op, unsigned MaxIdx, unsigned Scale,
399 unsigned BaseReg = 0);
400 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
401 /// SVE Reg+Imm addressing mode.
402 template <int64_t Min, int64_t Max>
403 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
404 SDValue &OffImm);
405 /// SVE Reg+Reg address mode.
406 template <unsigned Scale>
407 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
408 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
409 }
410
411 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
412 unsigned Opc, uint32_t MaxImm);
413
414 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
415
416 template <unsigned MaxIdx, unsigned Scale>
417 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
418 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
419 }
420
421 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
422 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
423 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
424 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
426 unsigned Opc_rr, unsigned Opc_ri);
427 std::tuple<unsigned, SDValue, SDValue>
428 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
429 const SDValue &OldBase, const SDValue &OldOffset,
430 unsigned Scale);
431
432 bool tryBitfieldExtractOp(SDNode *N);
433 bool tryBitfieldExtractOpFromSExt(SDNode *N);
434 bool tryBitfieldInsertOp(SDNode *N);
435 bool tryBitfieldInsertInZeroOp(SDNode *N);
436 bool tryShiftAmountMod(SDNode *N);
437
438 bool tryReadRegister(SDNode *N);
439 bool tryWriteRegister(SDNode *N);
440
441 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
442 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
443
444 bool trySelectXAR(SDNode *N);
445
446// Include the pieces autogenerated from the target description.
447#include "AArch64GenDAGISel.inc"
448
449private:
450 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
451 SDValue &Shift);
452 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
453 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
454 SDValue &OffImm) {
455 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
456 }
457 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
458 unsigned Size, SDValue &Base,
459 SDValue &OffImm);
460 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
461 SDValue &OffImm);
462 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
463 SDValue &OffImm);
464 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
465 SDValue &Offset, SDValue &SignExtend,
466 SDValue &DoShift);
467 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
471 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
472 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
473 SDValue &Offset, SDValue &SignExtend);
474
475 template<unsigned RegWidth>
476 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
477 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
478 }
479
480 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
481
482 template<unsigned RegWidth>
483 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
484 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
485 }
486
487 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
488 unsigned Width);
489
490 bool SelectCMP_SWAP(SDNode *N);
491
492 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
493 bool Negate);
494 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
495 bool Negate);
496 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
497 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
498
499 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
500 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
501 bool AllowSaturation, SDValue &Imm);
502
503 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
504 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
505 SDValue &Offset);
506 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
507 SDValue &Offset, unsigned Scale = 1);
508
509 bool SelectAllActivePredicate(SDValue N);
510 bool SelectAnyPredicate(SDValue N);
511
512 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
513};
514
515class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
516public:
517 static char ID;
518 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
519 CodeGenOptLevel OptLevel)
521 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
522};
523} // end anonymous namespace
524
525char AArch64DAGToDAGISelLegacy::ID = 0;
526
527INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
528
529/// isIntImmediate - This method tests to see if the node is a constant
530/// operand. If so Imm will receive the 32-bit value.
531static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
532 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
533 Imm = C->getZExtValue();
534 return true;
535 }
536 return false;
537}
538
539// isIntImmediate - This method tests to see if a constant operand.
540// If so Imm will receive the value.
541static bool isIntImmediate(SDValue N, uint64_t &Imm) {
542 return isIntImmediate(N.getNode(), Imm);
543}
544
545// isOpcWithIntImmediate - This method tests to see if the node is a specific
546// opcode and that it has a immediate integer right operand.
547// If so Imm will receive the 32 bit value.
548static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
549 uint64_t &Imm) {
550 return N->getOpcode() == Opc &&
551 isIntImmediate(N->getOperand(1).getNode(), Imm);
552}
553
554// isIntImmediateEq - This method tests to see if N is a constant operand that
555// is equivalent to 'ImmExpected'.
556#ifndef NDEBUG
557static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
558 uint64_t Imm;
559 if (!isIntImmediate(N.getNode(), Imm))
560 return false;
561 return Imm == ImmExpected;
562}
563#endif
564
565bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
566 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
567 std::vector<SDValue> &OutOps) {
568 switch(ConstraintID) {
569 default:
570 llvm_unreachable("Unexpected asm memory constraint");
571 case InlineAsm::ConstraintCode::m:
572 case InlineAsm::ConstraintCode::o:
573 case InlineAsm::ConstraintCode::Q:
574 // We need to make sure that this one operand does not end up in XZR, thus
575 // require the address to be in a PointerRegClass register.
576 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
577 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
578 SDLoc dl(Op);
579 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
580 SDValue NewOp =
581 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
582 dl, Op.getValueType(),
583 Op, RC), 0);
584 OutOps.push_back(NewOp);
585 return false;
586 }
587 return true;
588}
589
590/// SelectArithImmed - Select an immediate value that can be represented as
591/// a 12-bit value shifted left by either 0 or 12. If so, return true with
592/// Val set to the 12-bit value and Shift set to the shifter operand.
593bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
594 SDValue &Shift) {
595 // This function is called from the addsub_shifted_imm ComplexPattern,
596 // which lists [imm] as the list of opcode it's interested in, however
597 // we still need to check whether the operand is actually an immediate
598 // here because the ComplexPattern opcode list is only used in
599 // root-level opcode matching.
600 if (!isa<ConstantSDNode>(N.getNode()))
601 return false;
602
603 uint64_t Immed = N.getNode()->getAsZExtVal();
604 unsigned ShiftAmt;
605
606 if (Immed >> 12 == 0) {
607 ShiftAmt = 0;
608 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
609 ShiftAmt = 12;
610 Immed = Immed >> 12;
611 } else
612 return false;
613
614 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
615 SDLoc dl(N);
616 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
617 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
618 return true;
619}
620
621/// SelectNegArithImmed - As above, but negates the value before trying to
622/// select it.
623bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
624 SDValue &Shift) {
625 // This function is called from the addsub_shifted_imm ComplexPattern,
626 // which lists [imm] as the list of opcode it's interested in, however
627 // we still need to check whether the operand is actually an immediate
628 // here because the ComplexPattern opcode list is only used in
629 // root-level opcode matching.
630 if (!isa<ConstantSDNode>(N.getNode()))
631 return false;
632
633 // The immediate operand must be a 24-bit zero-extended immediate.
634 uint64_t Immed = N.getNode()->getAsZExtVal();
635
636 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
637 // have the opposite effect on the C flag, so this pattern mustn't match under
638 // those circumstances.
639 if (Immed == 0)
640 return false;
641
642 if (N.getValueType() == MVT::i32)
643 Immed = ~((uint32_t)Immed) + 1;
644 else
645 Immed = ~Immed + 1ULL;
646 if (Immed & 0xFFFFFFFFFF000000ULL)
647 return false;
648
649 Immed &= 0xFFFFFFULL;
650 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
651 Shift);
652}
653
654/// getShiftTypeForNode - Translate a shift node to the corresponding
655/// ShiftType value.
657 switch (N.getOpcode()) {
658 default:
660 case ISD::SHL:
661 return AArch64_AM::LSL;
662 case ISD::SRL:
663 return AArch64_AM::LSR;
664 case ISD::SRA:
665 return AArch64_AM::ASR;
666 case ISD::ROTR:
667 return AArch64_AM::ROR;
668 }
669}
670
672 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
673}
674
675/// Determine whether it is worth it to fold SHL into the addressing
676/// mode.
678 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
679 // It is worth folding logical shift of up to three places.
680 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
681 if (!CSD)
682 return false;
683 unsigned ShiftVal = CSD->getZExtValue();
684 if (ShiftVal > 3)
685 return false;
686
687 // Check if this particular node is reused in any non-memory related
688 // operation. If yes, do not try to fold this node into the address
689 // computation, since the computation will be kept.
690 const SDNode *Node = V.getNode();
691 for (SDNode *UI : Node->users())
692 if (!isMemOpOrPrefetch(UI))
693 for (SDNode *UII : UI->users())
694 if (!isMemOpOrPrefetch(UII))
695 return false;
696 return true;
697}
698
699/// Determine whether it is worth to fold V into an extended register addressing
700/// mode.
701bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
702 // Trivial if we are optimizing for code size or if there is only
703 // one use of the value.
704 if (CurDAG->shouldOptForSize() || V.hasOneUse())
705 return true;
706
707 // If a subtarget has a slow shift, folding a shift into multiple loads
708 // costs additional micro-ops.
709 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
710 return false;
711
712 // Check whether we're going to emit the address arithmetic anyway because
713 // it's used by a non-address operation.
714 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
715 return true;
716 if (V.getOpcode() == ISD::ADD) {
717 const SDValue LHS = V.getOperand(0);
718 const SDValue RHS = V.getOperand(1);
719 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
720 return true;
721 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
722 return true;
723 }
724
725 // It hurts otherwise, since the value will be reused.
726 return false;
727}
728
729/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
730/// to select more shifted register
731bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
732 SDValue &Shift) {
733 EVT VT = N.getValueType();
734 if (VT != MVT::i32 && VT != MVT::i64)
735 return false;
736
737 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
738 return false;
739 SDValue LHS = N.getOperand(0);
740 if (!LHS->hasOneUse())
741 return false;
742
743 unsigned LHSOpcode = LHS->getOpcode();
744 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
745 return false;
746
747 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
748 if (!ShiftAmtNode)
749 return false;
750
751 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
752 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
753 if (!RHSC)
754 return false;
755
756 APInt AndMask = RHSC->getAPIntValue();
757 unsigned LowZBits, MaskLen;
758 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
759 return false;
760
761 unsigned BitWidth = N.getValueSizeInBits();
762 SDLoc DL(LHS);
763 uint64_t NewShiftC;
764 unsigned NewShiftOp;
765 if (LHSOpcode == ISD::SHL) {
766 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
767 // BitWidth != LowZBits + MaskLen doesn't match the pattern
768 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
769 return false;
770
771 NewShiftC = LowZBits - ShiftAmtC;
772 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
773 } else {
774 if (LowZBits == 0)
775 return false;
776
777 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
778 NewShiftC = LowZBits + ShiftAmtC;
779 if (NewShiftC >= BitWidth)
780 return false;
781
782 // SRA need all high bits
783 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
784 return false;
785
786 // SRL high bits can be 0 or 1
787 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
788 return false;
789
790 if (LHSOpcode == ISD::SRL)
791 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
792 else
793 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
794 }
795
796 assert(NewShiftC < BitWidth && "Invalid shift amount");
797 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
798 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
799 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
800 NewShiftAmt, BitWidthMinus1),
801 0);
802 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
803 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
804 return true;
805}
806
807/// getExtendTypeForNode - Translate an extend node to the corresponding
808/// ExtendType value.
810getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
811 if (N.getOpcode() == ISD::SIGN_EXTEND ||
812 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
813 EVT SrcVT;
814 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
815 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
816 else
817 SrcVT = N.getOperand(0).getValueType();
818
819 if (!IsLoadStore && SrcVT == MVT::i8)
820 return AArch64_AM::SXTB;
821 else if (!IsLoadStore && SrcVT == MVT::i16)
822 return AArch64_AM::SXTH;
823 else if (SrcVT == MVT::i32)
824 return AArch64_AM::SXTW;
825 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
826
828 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
829 N.getOpcode() == ISD::ANY_EXTEND) {
830 EVT SrcVT = N.getOperand(0).getValueType();
831 if (!IsLoadStore && SrcVT == MVT::i8)
832 return AArch64_AM::UXTB;
833 else if (!IsLoadStore && SrcVT == MVT::i16)
834 return AArch64_AM::UXTH;
835 else if (SrcVT == MVT::i32)
836 return AArch64_AM::UXTW;
837 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
838
840 } else if (N.getOpcode() == ISD::AND) {
841 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
842 if (!CSD)
844 uint64_t AndMask = CSD->getZExtValue();
845
846 switch (AndMask) {
847 default:
849 case 0xFF:
850 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
851 case 0xFFFF:
852 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
853 case 0xFFFFFFFF:
854 return AArch64_AM::UXTW;
855 }
856 }
857
859}
860
861/// Determine whether it is worth to fold V into an extended register of an
862/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
863/// instruction, and the shift should be treated as worth folding even if has
864/// multiple uses.
865bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
866 // Trivial if we are optimizing for code size or if there is only
867 // one use of the value.
868 if (CurDAG->shouldOptForSize() || V.hasOneUse())
869 return true;
870
871 // If a subtarget has a fastpath LSL we can fold a logical shift into
872 // the add/sub and save a cycle.
873 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
874 V.getConstantOperandVal(1) <= 4 &&
876 return true;
877
878 // It hurts otherwise, since the value will be reused.
879 return false;
880}
881
882/// SelectShiftedRegister - Select a "shifted register" operand. If the value
883/// is not shifted, set the Shift operand to default of "LSL 0". The logical
884/// instructions allow the shifted register to be rotated, but the arithmetic
885/// instructions do not. The AllowROR parameter specifies whether ROR is
886/// supported.
887bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
888 SDValue &Reg, SDValue &Shift) {
889 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
890 return true;
891
893 if (ShType == AArch64_AM::InvalidShiftExtend)
894 return false;
895 if (!AllowROR && ShType == AArch64_AM::ROR)
896 return false;
897
898 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
899 unsigned BitSize = N.getValueSizeInBits();
900 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
901 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
902
903 Reg = N.getOperand(0);
904 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
905 return isWorthFoldingALU(N, true);
906 }
907
908 return false;
909}
910
911/// Instructions that accept extend modifiers like UXTW expect the register
912/// being extended to be a GPR32, but the incoming DAG might be acting on a
913/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
914/// this is the case.
916 if (N.getValueType() == MVT::i32)
917 return N;
918
919 SDLoc dl(N);
920 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
921}
922
923// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
924template<signed Low, signed High, signed Scale>
925bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
926 if (!isa<ConstantSDNode>(N))
927 return false;
928
929 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
930 if ((MulImm % std::abs(Scale)) == 0) {
931 int64_t RDVLImm = MulImm / Scale;
932 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
933 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
934 return true;
935 }
936 }
937
938 return false;
939}
940
941/// SelectArithExtendedRegister - Select a "extended register" operand. This
942/// operand folds in an extend followed by an optional left shift.
943bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
944 SDValue &Shift) {
945 unsigned ShiftVal = 0;
947
948 if (N.getOpcode() == ISD::SHL) {
949 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
950 if (!CSD)
951 return false;
952 ShiftVal = CSD->getZExtValue();
953 if (ShiftVal > 4)
954 return false;
955
956 Ext = getExtendTypeForNode(N.getOperand(0));
958 return false;
959
960 Reg = N.getOperand(0).getOperand(0);
961 } else {
964 return false;
965
966 Reg = N.getOperand(0);
967
968 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
969 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
970 auto isDef32 = [](SDValue N) {
971 unsigned Opc = N.getOpcode();
972 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
975 Opc != ISD::FREEZE;
976 };
977 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
978 isDef32(Reg))
979 return false;
980 }
981
982 // AArch64 mandates that the RHS of the operation must use the smallest
983 // register class that could contain the size being extended from. Thus,
984 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
985 // there might not be an actual 32-bit value in the program. We can
986 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
987 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
988 Reg = narrowIfNeeded(CurDAG, Reg);
989 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
990 MVT::i32);
991 return isWorthFoldingALU(N);
992}
993
994/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
995/// operand is referred by the instructions have SP operand
996bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
997 SDValue &Shift) {
998 unsigned ShiftVal = 0;
1000
1001 if (N.getOpcode() != ISD::SHL)
1002 return false;
1003
1004 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1005 if (!CSD)
1006 return false;
1007 ShiftVal = CSD->getZExtValue();
1008 if (ShiftVal > 4)
1009 return false;
1010
1012 Reg = N.getOperand(0);
1013 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1014 MVT::i32);
1015 return isWorthFoldingALU(N);
1016}
1017
1018/// If there's a use of this ADDlow that's not itself a load/store then we'll
1019/// need to create a real ADD instruction from it anyway and there's no point in
1020/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1021/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1022/// leads to duplicated ADRP instructions.
1024 for (auto *User : N->users()) {
1025 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1026 User->getOpcode() != ISD::ATOMIC_LOAD &&
1027 User->getOpcode() != ISD::ATOMIC_STORE)
1028 return false;
1029
1030 // ldar and stlr have much more restrictive addressing modes (just a
1031 // register).
1032 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1033 return false;
1034 }
1035
1036 return true;
1037}
1038
1039/// Check if the immediate offset is valid as a scaled immediate.
1040static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1041 unsigned Size) {
1042 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1043 Offset < (Range << Log2_32(Size)))
1044 return true;
1045 return false;
1046}
1047
1048/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1049/// immediate" address. The "Size" argument is the size in bytes of the memory
1050/// reference, which determines the scale.
1051bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1052 unsigned BW, unsigned Size,
1053 SDValue &Base,
1054 SDValue &OffImm) {
1055 SDLoc dl(N);
1056 const DataLayout &DL = CurDAG->getDataLayout();
1057 const TargetLowering *TLI = getTargetLowering();
1058 if (N.getOpcode() == ISD::FrameIndex) {
1059 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1060 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1061 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1062 return true;
1063 }
1064
1065 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1066 // selected here doesn't support labels/immediates, only base+offset.
1067 if (CurDAG->isBaseWithConstantOffset(N)) {
1068 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1069 if (IsSignedImm) {
1070 int64_t RHSC = RHS->getSExtValue();
1071 unsigned Scale = Log2_32(Size);
1072 int64_t Range = 0x1LL << (BW - 1);
1073
1074 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1075 RHSC < (Range << Scale)) {
1076 Base = N.getOperand(0);
1077 if (Base.getOpcode() == ISD::FrameIndex) {
1078 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1079 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1080 }
1081 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1082 return true;
1083 }
1084 } else {
1085 // unsigned Immediate
1086 uint64_t RHSC = RHS->getZExtValue();
1087 unsigned Scale = Log2_32(Size);
1088 uint64_t Range = 0x1ULL << BW;
1089
1090 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1091 Base = N.getOperand(0);
1092 if (Base.getOpcode() == ISD::FrameIndex) {
1093 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1094 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1095 }
1096 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1097 return true;
1098 }
1099 }
1100 }
1101 }
1102 // Base only. The address will be materialized into a register before
1103 // the memory is accessed.
1104 // add x0, Xbase, #offset
1105 // stp x1, x2, [x0]
1106 Base = N;
1107 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1108 return true;
1109}
1110
1111/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1112/// immediate" address. The "Size" argument is the size in bytes of the memory
1113/// reference, which determines the scale.
1114bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1115 SDValue &Base, SDValue &OffImm) {
1116 SDLoc dl(N);
1117 const DataLayout &DL = CurDAG->getDataLayout();
1118 const TargetLowering *TLI = getTargetLowering();
1119 if (N.getOpcode() == ISD::FrameIndex) {
1120 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1121 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1122 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1123 return true;
1124 }
1125
1126 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1127 GlobalAddressSDNode *GAN =
1128 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1129 Base = N.getOperand(0);
1130 OffImm = N.getOperand(1);
1131 if (!GAN)
1132 return true;
1133
1134 if (GAN->getOffset() % Size == 0 &&
1136 return true;
1137 }
1138
1139 if (CurDAG->isBaseWithConstantOffset(N)) {
1140 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1141 int64_t RHSC = (int64_t)RHS->getZExtValue();
1142 unsigned Scale = Log2_32(Size);
1143 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1144 Base = N.getOperand(0);
1145 if (Base.getOpcode() == ISD::FrameIndex) {
1146 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1147 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1148 }
1149 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1150 return true;
1151 }
1152 }
1153 }
1154
1155 // Before falling back to our general case, check if the unscaled
1156 // instructions can handle this. If so, that's preferable.
1157 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1158 return false;
1159
1160 // Base only. The address will be materialized into a register before
1161 // the memory is accessed.
1162 // add x0, Xbase, #offset
1163 // ldr x0, [x0]
1164 Base = N;
1165 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1166 return true;
1167}
1168
1169/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1170/// immediate" address. This should only match when there is an offset that
1171/// is not valid for a scaled immediate addressing mode. The "Size" argument
1172/// is the size in bytes of the memory reference, which is needed here to know
1173/// what is valid for a scaled immediate.
1174bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1175 SDValue &Base,
1176 SDValue &OffImm) {
1177 if (!CurDAG->isBaseWithConstantOffset(N))
1178 return false;
1179 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1180 int64_t RHSC = RHS->getSExtValue();
1181 if (RHSC >= -256 && RHSC < 256) {
1182 Base = N.getOperand(0);
1183 if (Base.getOpcode() == ISD::FrameIndex) {
1184 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1185 const TargetLowering *TLI = getTargetLowering();
1186 Base = CurDAG->getTargetFrameIndex(
1187 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1188 }
1189 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1190 return true;
1191 }
1192 }
1193 return false;
1194}
1195
1197 SDLoc dl(N);
1198 SDValue ImpDef = SDValue(
1199 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1200 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1201 N);
1202}
1203
1204/// Check if the given SHL node (\p N), can be used to form an
1205/// extended register for an addressing mode.
1206bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1207 bool WantExtend, SDValue &Offset,
1208 SDValue &SignExtend) {
1209 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1210 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1211 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1212 return false;
1213
1214 SDLoc dl(N);
1215 if (WantExtend) {
1217 getExtendTypeForNode(N.getOperand(0), true);
1219 return false;
1220
1221 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1222 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1223 MVT::i32);
1224 } else {
1225 Offset = N.getOperand(0);
1226 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1227 }
1228
1229 unsigned LegalShiftVal = Log2_32(Size);
1230 unsigned ShiftVal = CSD->getZExtValue();
1231
1232 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1233 return false;
1234
1235 return isWorthFoldingAddr(N, Size);
1236}
1237
1238bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1240 SDValue &SignExtend,
1241 SDValue &DoShift) {
1242 if (N.getOpcode() != ISD::ADD)
1243 return false;
1244 SDValue LHS = N.getOperand(0);
1245 SDValue RHS = N.getOperand(1);
1246 SDLoc dl(N);
1247
1248 // We don't want to match immediate adds here, because they are better lowered
1249 // to the register-immediate addressing modes.
1250 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1251 return false;
1252
1253 // Check if this particular node is reused in any non-memory related
1254 // operation. If yes, do not try to fold this node into the address
1255 // computation, since the computation will be kept.
1256 const SDNode *Node = N.getNode();
1257 for (SDNode *UI : Node->users()) {
1258 if (!isMemOpOrPrefetch(UI))
1259 return false;
1260 }
1261
1262 // Remember if it is worth folding N when it produces extended register.
1263 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1264
1265 // Try to match a shifted extend on the RHS.
1266 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1267 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1268 Base = LHS;
1269 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1270 return true;
1271 }
1272
1273 // Try to match a shifted extend on the LHS.
1274 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1275 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1276 Base = RHS;
1277 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1278 return true;
1279 }
1280
1281 // There was no shift, whatever else we find.
1282 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1283
1285 // Try to match an unshifted extend on the LHS.
1286 if (IsExtendedRegisterWorthFolding &&
1287 (Ext = getExtendTypeForNode(LHS, true)) !=
1289 Base = RHS;
1290 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1291 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1292 MVT::i32);
1293 if (isWorthFoldingAddr(LHS, Size))
1294 return true;
1295 }
1296
1297 // Try to match an unshifted extend on the RHS.
1298 if (IsExtendedRegisterWorthFolding &&
1299 (Ext = getExtendTypeForNode(RHS, true)) !=
1301 Base = LHS;
1302 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1303 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1304 MVT::i32);
1305 if (isWorthFoldingAddr(RHS, Size))
1306 return true;
1307 }
1308
1309 return false;
1310}
1311
1312// Check if the given immediate is preferred by ADD. If an immediate can be
1313// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1314// encoded by one MOVZ, return true.
1315static bool isPreferredADD(int64_t ImmOff) {
1316 // Constant in [0x0, 0xfff] can be encoded in ADD.
1317 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1318 return true;
1319 // Check if it can be encoded in an "ADD LSL #12".
1320 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1321 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1322 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1323 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1324 return false;
1325}
1326
1327bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1329 SDValue &SignExtend,
1330 SDValue &DoShift) {
1331 if (N.getOpcode() != ISD::ADD)
1332 return false;
1333 SDValue LHS = N.getOperand(0);
1334 SDValue RHS = N.getOperand(1);
1335 SDLoc DL(N);
1336
1337 // Check if this particular node is reused in any non-memory related
1338 // operation. If yes, do not try to fold this node into the address
1339 // computation, since the computation will be kept.
1340 const SDNode *Node = N.getNode();
1341 for (SDNode *UI : Node->users()) {
1342 if (!isMemOpOrPrefetch(UI))
1343 return false;
1344 }
1345
1346 // Watch out if RHS is a wide immediate, it can not be selected into
1347 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1348 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1349 // instructions like:
1350 // MOV X0, WideImmediate
1351 // ADD X1, BaseReg, X0
1352 // LDR X2, [X1, 0]
1353 // For such situation, using [BaseReg, XReg] addressing mode can save one
1354 // ADD/SUB:
1355 // MOV X0, WideImmediate
1356 // LDR X2, [BaseReg, X0]
1357 if (isa<ConstantSDNode>(RHS)) {
1358 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1359 // Skip the immediate can be selected by load/store addressing mode.
1360 // Also skip the immediate can be encoded by a single ADD (SUB is also
1361 // checked by using -ImmOff).
1362 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1363 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1364 return false;
1365
1366 SDValue Ops[] = { RHS };
1367 SDNode *MOVI =
1368 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1369 SDValue MOVIV = SDValue(MOVI, 0);
1370 // This ADD of two X register will be selected into [Reg+Reg] mode.
1371 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1372 }
1373
1374 // Remember if it is worth folding N when it produces extended register.
1375 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1376
1377 // Try to match a shifted extend on the RHS.
1378 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1379 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1380 Base = LHS;
1381 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1382 return true;
1383 }
1384
1385 // Try to match a shifted extend on the LHS.
1386 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1387 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1388 Base = RHS;
1389 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1390 return true;
1391 }
1392
1393 // Match any non-shifted, non-extend, non-immediate add expression.
1394 Base = LHS;
1395 Offset = RHS;
1396 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1397 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1398 // Reg1 + Reg2 is free: no check needed.
1399 return true;
1400}
1401
1402SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1403 static const unsigned RegClassIDs[] = {
1404 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1405 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1406 AArch64::dsub2, AArch64::dsub3};
1407
1408 return createTuple(Regs, RegClassIDs, SubRegs);
1409}
1410
1411SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1412 static const unsigned RegClassIDs[] = {
1413 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1414 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1415 AArch64::qsub2, AArch64::qsub3};
1416
1417 return createTuple(Regs, RegClassIDs, SubRegs);
1418}
1419
1420SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1421 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1422 AArch64::ZPR3RegClassID,
1423 AArch64::ZPR4RegClassID};
1424 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1425 AArch64::zsub2, AArch64::zsub3};
1426
1427 return createTuple(Regs, RegClassIDs, SubRegs);
1428}
1429
1430SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1431 assert(Regs.size() == 2 || Regs.size() == 4);
1432
1433 // The createTuple interface requires 3 RegClassIDs for each possible
1434 // tuple type even though we only have them for ZPR2 and ZPR4.
1435 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1436 AArch64::ZPR4Mul4RegClassID};
1437 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1438 AArch64::zsub2, AArch64::zsub3};
1439 return createTuple(Regs, RegClassIDs, SubRegs);
1440}
1441
1442SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1443 const unsigned RegClassIDs[],
1444 const unsigned SubRegs[]) {
1445 // There's no special register-class for a vector-list of 1 element: it's just
1446 // a vector.
1447 if (Regs.size() == 1)
1448 return Regs[0];
1449
1450 assert(Regs.size() >= 2 && Regs.size() <= 4);
1451
1452 SDLoc DL(Regs[0]);
1453
1455
1456 // First operand of REG_SEQUENCE is the desired RegClass.
1457 Ops.push_back(
1458 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1459
1460 // Then we get pairs of source & subregister-position for the components.
1461 for (unsigned i = 0; i < Regs.size(); ++i) {
1462 Ops.push_back(Regs[i]);
1463 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1464 }
1465
1466 SDNode *N =
1467 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1468 return SDValue(N, 0);
1469}
1470
1471void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1472 bool isExt) {
1473 SDLoc dl(N);
1474 EVT VT = N->getValueType(0);
1475
1476 unsigned ExtOff = isExt;
1477
1478 // Form a REG_SEQUENCE to force register allocation.
1479 unsigned Vec0Off = ExtOff + 1;
1480 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1481 SDValue RegSeq = createQTuple(Regs);
1482
1484 if (isExt)
1485 Ops.push_back(N->getOperand(1));
1486 Ops.push_back(RegSeq);
1487 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1488 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1489}
1490
1491static std::tuple<SDValue, SDValue>
1493 SDLoc DL(Disc);
1494 SDValue AddrDisc;
1495 SDValue ConstDisc;
1496
1497 // If this is a blend, remember the constant and address discriminators.
1498 // Otherwise, it's either a constant discriminator, or a non-blended
1499 // address discriminator.
1500 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1501 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1502 AddrDisc = Disc->getOperand(1);
1503 ConstDisc = Disc->getOperand(2);
1504 } else {
1505 ConstDisc = Disc;
1506 }
1507
1508 // If the constant discriminator (either the blend RHS, or the entire
1509 // discriminator value) isn't a 16-bit constant, bail out, and let the
1510 // discriminator be computed separately.
1511 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1512 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1513 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1514
1515 // If there's no address discriminator, use XZR directly.
1516 if (!AddrDisc)
1517 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1518
1519 return std::make_tuple(
1520 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1521 AddrDisc);
1522}
1523
1524void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1525 SDLoc DL(N);
1526 // IntrinsicID is operand #0
1527 SDValue Val = N->getOperand(1);
1528 SDValue AUTKey = N->getOperand(2);
1529 SDValue AUTDisc = N->getOperand(3);
1530
1531 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1532 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1533
1534 SDValue AUTAddrDisc, AUTConstDisc;
1535 std::tie(AUTConstDisc, AUTAddrDisc) =
1536 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1537
1538 if (!Subtarget->isX16X17Safer()) {
1539 SDValue Ops[] = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1540
1541 SDNode *AUT =
1542 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1543 ReplaceNode(N, AUT);
1544 } else {
1545 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1546 AArch64::X16, Val, SDValue());
1547 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1548
1549 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1550 ReplaceNode(N, AUT);
1551 }
1552}
1553
1554void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1555 SDLoc DL(N);
1556 // IntrinsicID is operand #0
1557 SDValue Val = N->getOperand(1);
1558 SDValue AUTKey = N->getOperand(2);
1559 SDValue AUTDisc = N->getOperand(3);
1560 SDValue PACKey = N->getOperand(4);
1561 SDValue PACDisc = N->getOperand(5);
1562
1563 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1564 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1565
1566 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1567 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1568
1569 SDValue AUTAddrDisc, AUTConstDisc;
1570 std::tie(AUTConstDisc, AUTAddrDisc) =
1571 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1572
1573 SDValue PACAddrDisc, PACConstDisc;
1574 std::tie(PACConstDisc, PACAddrDisc) =
1575 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1576
1577 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1578 AArch64::X16, Val, SDValue());
1579
1580 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1581 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1582
1583 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1584 ReplaceNode(N, AUTPAC);
1585}
1586
1587bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1588 LoadSDNode *LD = cast<LoadSDNode>(N);
1589 if (LD->isUnindexed())
1590 return false;
1591 EVT VT = LD->getMemoryVT();
1592 EVT DstVT = N->getValueType(0);
1593 ISD::MemIndexedMode AM = LD->getAddressingMode();
1594 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1595 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1596 int OffsetVal = (int)OffsetOp->getZExtValue();
1597
1598 // We're not doing validity checking here. That was done when checking
1599 // if we should mark the load as indexed or not. We're just selecting
1600 // the right instruction.
1601 unsigned Opcode = 0;
1602
1603 ISD::LoadExtType ExtType = LD->getExtensionType();
1604 bool InsertTo64 = false;
1605 if (VT == MVT::i64)
1606 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1607 else if (VT == MVT::i32) {
1608 if (ExtType == ISD::NON_EXTLOAD)
1609 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1610 else if (ExtType == ISD::SEXTLOAD)
1611 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1612 else {
1613 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1614 InsertTo64 = true;
1615 // The result of the load is only i32. It's the subreg_to_reg that makes
1616 // it into an i64.
1617 DstVT = MVT::i32;
1618 }
1619 } else if (VT == MVT::i16) {
1620 if (ExtType == ISD::SEXTLOAD) {
1621 if (DstVT == MVT::i64)
1622 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1623 else
1624 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1625 } else {
1626 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1627 InsertTo64 = DstVT == MVT::i64;
1628 // The result of the load is only i32. It's the subreg_to_reg that makes
1629 // it into an i64.
1630 DstVT = MVT::i32;
1631 }
1632 } else if (VT == MVT::i8) {
1633 if (ExtType == ISD::SEXTLOAD) {
1634 if (DstVT == MVT::i64)
1635 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1636 else
1637 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1638 } else {
1639 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1640 InsertTo64 = DstVT == MVT::i64;
1641 // The result of the load is only i32. It's the subreg_to_reg that makes
1642 // it into an i64.
1643 DstVT = MVT::i32;
1644 }
1645 } else if (VT == MVT::f16) {
1646 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1647 } else if (VT == MVT::bf16) {
1648 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1649 } else if (VT == MVT::f32) {
1650 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1651 } else if (VT == MVT::f64 ||
1652 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1653 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1654 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1655 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1656 } else if (VT.is64BitVector()) {
1657 if (IsPre || OffsetVal != 8)
1658 return false;
1659 switch (VT.getScalarSizeInBits()) {
1660 case 8:
1661 Opcode = AArch64::LD1Onev8b_POST;
1662 break;
1663 case 16:
1664 Opcode = AArch64::LD1Onev4h_POST;
1665 break;
1666 case 32:
1667 Opcode = AArch64::LD1Onev2s_POST;
1668 break;
1669 case 64:
1670 Opcode = AArch64::LD1Onev1d_POST;
1671 break;
1672 default:
1673 llvm_unreachable("Expected vector element to be a power of 2");
1674 }
1675 } else if (VT.is128BitVector()) {
1676 if (IsPre || OffsetVal != 16)
1677 return false;
1678 switch (VT.getScalarSizeInBits()) {
1679 case 8:
1680 Opcode = AArch64::LD1Onev16b_POST;
1681 break;
1682 case 16:
1683 Opcode = AArch64::LD1Onev8h_POST;
1684 break;
1685 case 32:
1686 Opcode = AArch64::LD1Onev4s_POST;
1687 break;
1688 case 64:
1689 Opcode = AArch64::LD1Onev2d_POST;
1690 break;
1691 default:
1692 llvm_unreachable("Expected vector element to be a power of 2");
1693 }
1694 } else
1695 return false;
1696 SDValue Chain = LD->getChain();
1697 SDValue Base = LD->getBasePtr();
1698 SDLoc dl(N);
1699 // LD1 encodes an immediate offset by using XZR as the offset register.
1700 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1701 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1702 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1703 SDValue Ops[] = { Base, Offset, Chain };
1704 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1705 MVT::Other, Ops);
1706
1707 // Transfer memoperands.
1708 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1709 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1710
1711 // Either way, we're replacing the node, so tell the caller that.
1712 SDValue LoadedVal = SDValue(Res, 1);
1713 if (InsertTo64) {
1714 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1715 LoadedVal =
1716 SDValue(CurDAG->getMachineNode(
1717 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1718 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1719 SubReg),
1720 0);
1721 }
1722
1723 ReplaceUses(SDValue(N, 0), LoadedVal);
1724 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1725 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1726 CurDAG->RemoveDeadNode(N);
1727 return true;
1728}
1729
1730void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1731 unsigned SubRegIdx) {
1732 SDLoc dl(N);
1733 EVT VT = N->getValueType(0);
1734 SDValue Chain = N->getOperand(0);
1735
1736 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1737 Chain};
1738
1739 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1740
1741 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1742 SDValue SuperReg = SDValue(Ld, 0);
1743 for (unsigned i = 0; i < NumVecs; ++i)
1744 ReplaceUses(SDValue(N, i),
1745 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1746
1747 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1748
1749 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1750 // because it's too simple to have needed special treatment during lowering.
1751 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1752 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1753 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1754 }
1755
1756 CurDAG->RemoveDeadNode(N);
1757}
1758
1759void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1760 unsigned Opc, unsigned SubRegIdx) {
1761 SDLoc dl(N);
1762 EVT VT = N->getValueType(0);
1763 SDValue Chain = N->getOperand(0);
1764
1765 SDValue Ops[] = {N->getOperand(1), // Mem operand
1766 N->getOperand(2), // Incremental
1767 Chain};
1768
1769 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1770 MVT::Untyped, MVT::Other};
1771
1772 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1773
1774 // Update uses of write back register
1775 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1776
1777 // Update uses of vector list
1778 SDValue SuperReg = SDValue(Ld, 1);
1779 if (NumVecs == 1)
1780 ReplaceUses(SDValue(N, 0), SuperReg);
1781 else
1782 for (unsigned i = 0; i < NumVecs; ++i)
1783 ReplaceUses(SDValue(N, i),
1784 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1785
1786 // Update the chain
1787 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1788 CurDAG->RemoveDeadNode(N);
1789}
1790
1791/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1792/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1793/// new Base and an SDValue representing the new offset.
1794std::tuple<unsigned, SDValue, SDValue>
1795AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1796 unsigned Opc_ri,
1797 const SDValue &OldBase,
1798 const SDValue &OldOffset,
1799 unsigned Scale) {
1800 SDValue NewBase = OldBase;
1801 SDValue NewOffset = OldOffset;
1802 // Detect a possible Reg+Imm addressing mode.
1803 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1804 N, OldBase, NewBase, NewOffset);
1805
1806 // Detect a possible reg+reg addressing mode, but only if we haven't already
1807 // detected a Reg+Imm one.
1808 const bool IsRegReg =
1809 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1810
1811 // Select the instruction.
1812 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1813}
1814
1815enum class SelectTypeKind {
1816 Int1 = 0,
1817 Int = 1,
1818 FP = 2,
1819 AnyType = 3,
1820};
1821
1822/// This function selects an opcode from a list of opcodes, which is
1823/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1824/// element types, in this order.
1825template <SelectTypeKind Kind>
1826static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1827 // Only match scalable vector VTs
1828 if (!VT.isScalableVector())
1829 return 0;
1830
1831 EVT EltVT = VT.getVectorElementType();
1832 unsigned Key = VT.getVectorMinNumElements();
1833 switch (Kind) {
1835 break;
1837 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1838 EltVT != MVT::i64)
1839 return 0;
1840 break;
1842 if (EltVT != MVT::i1)
1843 return 0;
1844 break;
1845 case SelectTypeKind::FP:
1846 if (EltVT == MVT::bf16)
1847 Key = 16;
1848 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1849 EltVT != MVT::f64)
1850 return 0;
1851 break;
1852 }
1853
1854 unsigned Offset;
1855 switch (Key) {
1856 case 16: // 8-bit or bf16
1857 Offset = 0;
1858 break;
1859 case 8: // 16-bit
1860 Offset = 1;
1861 break;
1862 case 4: // 32-bit
1863 Offset = 2;
1864 break;
1865 case 2: // 64-bit
1866 Offset = 3;
1867 break;
1868 default:
1869 return 0;
1870 }
1871
1872 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1873}
1874
1875// This function is almost identical to SelectWhilePair, but has an
1876// extra check on the range of the immediate operand.
1877// TODO: Merge these two functions together at some point?
1878void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1879 // Immediate can be either 0 or 1.
1880 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1881 if (Imm->getZExtValue() > 1)
1882 return;
1883
1884 SDLoc DL(N);
1885 EVT VT = N->getValueType(0);
1886 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1887 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1888 SDValue SuperReg = SDValue(WhilePair, 0);
1889
1890 for (unsigned I = 0; I < 2; ++I)
1891 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1892 AArch64::psub0 + I, DL, VT, SuperReg));
1893
1894 CurDAG->RemoveDeadNode(N);
1895}
1896
1897void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1898 SDLoc DL(N);
1899 EVT VT = N->getValueType(0);
1900
1901 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1902
1903 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1904 SDValue SuperReg = SDValue(WhilePair, 0);
1905
1906 for (unsigned I = 0; I < 2; ++I)
1907 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1908 AArch64::psub0 + I, DL, VT, SuperReg));
1909
1910 CurDAG->RemoveDeadNode(N);
1911}
1912
1913void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1914 unsigned Opcode) {
1915 EVT VT = N->getValueType(0);
1916 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1917 SDValue Ops = createZTuple(Regs);
1918 SDLoc DL(N);
1919 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1920 SDValue SuperReg = SDValue(Intrinsic, 0);
1921 for (unsigned i = 0; i < NumVecs; ++i)
1922 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1923 AArch64::zsub0 + i, DL, VT, SuperReg));
1924
1925 CurDAG->RemoveDeadNode(N);
1926}
1927
1928void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1929 unsigned Opcode) {
1930 SDLoc DL(N);
1931 EVT VT = N->getValueType(0);
1932 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1933 Ops.push_back(/*Chain*/ N->getOperand(0));
1934
1936 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1937 SDValue SuperReg = SDValue(Instruction, 0);
1938
1939 for (unsigned i = 0; i < NumVecs; ++i)
1940 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1941 AArch64::zsub0 + i, DL, VT, SuperReg));
1942
1943 // Copy chain
1944 unsigned ChainIdx = NumVecs;
1945 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1946 CurDAG->RemoveDeadNode(N);
1947}
1948
1949void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1950 unsigned NumVecs,
1951 bool IsZmMulti,
1952 unsigned Opcode,
1953 bool HasPred) {
1954 assert(Opcode != 0 && "Unexpected opcode");
1955
1956 SDLoc DL(N);
1957 EVT VT = N->getValueType(0);
1958 unsigned FirstVecIdx = HasPred ? 2 : 1;
1959
1960 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1961 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1962 return createZMulTuple(Regs);
1963 };
1964
1965 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1966
1967 SDValue Zm;
1968 if (IsZmMulti)
1969 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1970 else
1971 Zm = N->getOperand(NumVecs + FirstVecIdx);
1972
1974 if (HasPred)
1975 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1976 N->getOperand(1), Zdn, Zm);
1977 else
1978 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1979 SDValue SuperReg = SDValue(Intrinsic, 0);
1980 for (unsigned i = 0; i < NumVecs; ++i)
1981 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1982 AArch64::zsub0 + i, DL, VT, SuperReg));
1983
1984 CurDAG->RemoveDeadNode(N);
1985}
1986
1987void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1988 unsigned Scale, unsigned Opc_ri,
1989 unsigned Opc_rr, bool IsIntr) {
1990 assert(Scale < 5 && "Invalid scaling value.");
1991 SDLoc DL(N);
1992 EVT VT = N->getValueType(0);
1993 SDValue Chain = N->getOperand(0);
1994
1995 // Optimize addressing mode.
1997 unsigned Opc;
1998 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1999 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2000 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2001
2002 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2003 Base, // Memory operand
2004 Offset, Chain};
2005
2006 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2007
2008 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2009 SDValue SuperReg = SDValue(Load, 0);
2010 for (unsigned i = 0; i < NumVecs; ++i)
2011 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2012 AArch64::zsub0 + i, DL, VT, SuperReg));
2013
2014 // Copy chain
2015 unsigned ChainIdx = NumVecs;
2016 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2017 CurDAG->RemoveDeadNode(N);
2018}
2019
2020void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2021 unsigned NumVecs,
2022 unsigned Scale,
2023 unsigned Opc_ri,
2024 unsigned Opc_rr) {
2025 assert(Scale < 4 && "Invalid scaling value.");
2026 SDLoc DL(N);
2027 EVT VT = N->getValueType(0);
2028 SDValue Chain = N->getOperand(0);
2029
2030 SDValue PNg = N->getOperand(2);
2031 SDValue Base = N->getOperand(3);
2032 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2033 unsigned Opc;
2034 std::tie(Opc, Base, Offset) =
2035 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2036
2037 SDValue Ops[] = {PNg, // Predicate-as-counter
2038 Base, // Memory operand
2039 Offset, Chain};
2040
2041 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2042
2043 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2044 SDValue SuperReg = SDValue(Load, 0);
2045 for (unsigned i = 0; i < NumVecs; ++i)
2046 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2047 AArch64::zsub0 + i, DL, VT, SuperReg));
2048
2049 // Copy chain
2050 unsigned ChainIdx = NumVecs;
2051 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2052 CurDAG->RemoveDeadNode(N);
2053}
2054
2055void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2056 unsigned Opcode) {
2057 if (N->getValueType(0) != MVT::nxv4f32)
2058 return;
2059 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2060}
2061
2062void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2063 unsigned NumOutVecs,
2064 unsigned Opc,
2065 uint32_t MaxImm) {
2066 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2067 if (Imm->getZExtValue() > MaxImm)
2068 return;
2069
2070 SDValue ZtValue;
2071 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2072 return;
2073
2074 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
2075 SDLoc DL(Node);
2076 EVT VT = Node->getValueType(0);
2077
2079 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2080 SDValue SuperReg = SDValue(Instruction, 0);
2081
2082 for (unsigned I = 0; I < NumOutVecs; ++I)
2083 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2084 AArch64::zsub0 + I, DL, VT, SuperReg));
2085
2086 // Copy chain
2087 unsigned ChainIdx = NumOutVecs;
2088 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2089 CurDAG->RemoveDeadNode(Node);
2090}
2091
2092void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2093 unsigned NumOutVecs,
2094 unsigned Opc) {
2095
2096 SDValue ZtValue;
2098 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2099 return;
2100
2101 Ops.push_back(ZtValue);
2102 Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)}));
2103 SDLoc DL(Node);
2104 EVT VT = Node->getValueType(0);
2105
2107 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2108 SDValue SuperReg = SDValue(Instruction, 0);
2109
2110 for (unsigned I = 0; I < NumOutVecs; ++I)
2111 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2112 AArch64::zsub0 + I, DL, VT, SuperReg));
2113
2114 // Copy chain
2115 unsigned ChainIdx = NumOutVecs;
2116 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2117 CurDAG->RemoveDeadNode(Node);
2118}
2119
2120void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2121 unsigned Op) {
2122 SDLoc DL(N);
2123 EVT VT = N->getValueType(0);
2124
2125 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2126 SDValue Zd = createZMulTuple(Regs);
2127 SDValue Zn = N->getOperand(1 + NumVecs);
2128 SDValue Zm = N->getOperand(2 + NumVecs);
2129
2130 SDValue Ops[] = {Zd, Zn, Zm};
2131
2132 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2133 SDValue SuperReg = SDValue(Intrinsic, 0);
2134 for (unsigned i = 0; i < NumVecs; ++i)
2135 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2136 AArch64::zsub0 + i, DL, VT, SuperReg));
2137
2138 CurDAG->RemoveDeadNode(N);
2139}
2140
2141bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2142 switch (BaseReg) {
2143 default:
2144 return false;
2145 case AArch64::ZA:
2146 case AArch64::ZAB0:
2147 if (TileNum == 0)
2148 break;
2149 return false;
2150 case AArch64::ZAH0:
2151 if (TileNum <= 1)
2152 break;
2153 return false;
2154 case AArch64::ZAS0:
2155 if (TileNum <= 3)
2156 break;
2157 return false;
2158 case AArch64::ZAD0:
2159 if (TileNum <= 7)
2160 break;
2161 return false;
2162 }
2163
2164 BaseReg += TileNum;
2165 return true;
2166}
2167
2168template <unsigned MaxIdx, unsigned Scale>
2169void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2170 unsigned BaseReg, unsigned Op) {
2171 unsigned TileNum = 0;
2172 if (BaseReg != AArch64::ZA)
2173 TileNum = N->getConstantOperandVal(2);
2174
2175 if (!SelectSMETile(BaseReg, TileNum))
2176 return;
2177
2178 SDValue SliceBase, Base, Offset;
2179 if (BaseReg == AArch64::ZA)
2180 SliceBase = N->getOperand(2);
2181 else
2182 SliceBase = N->getOperand(3);
2183
2184 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2185 return;
2186
2187 SDLoc DL(N);
2188 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2189 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2190 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2191
2192 EVT VT = N->getValueType(0);
2193 for (unsigned I = 0; I < NumVecs; ++I)
2194 ReplaceUses(SDValue(N, I),
2195 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2196 SDValue(Mov, 0)));
2197 // Copy chain
2198 unsigned ChainIdx = NumVecs;
2199 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2200 CurDAG->RemoveDeadNode(N);
2201}
2202
2203void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2204 unsigned Op, unsigned MaxIdx,
2205 unsigned Scale, unsigned BaseReg) {
2206 // Slice can be in different positions
2207 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2208 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2209 SDValue SliceBase = N->getOperand(2);
2210 if (BaseReg != AArch64::ZA)
2211 SliceBase = N->getOperand(3);
2212
2214 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2215 return;
2216 // The correct Za tile number is computed in Machine Instruction
2217 // See EmitZAInstr
2218 // DAG cannot select Za tile as an output register with ZReg
2219 SDLoc DL(N);
2221 if (BaseReg != AArch64::ZA )
2222 Ops.push_back(N->getOperand(2));
2223 Ops.push_back(Base);
2224 Ops.push_back(Offset);
2225 Ops.push_back(N->getOperand(0)); //Chain
2226 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2227
2228 EVT VT = N->getValueType(0);
2229 for (unsigned I = 0; I < NumVecs; ++I)
2230 ReplaceUses(SDValue(N, I),
2231 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2232 SDValue(Mov, 0)));
2233
2234 // Copy chain
2235 unsigned ChainIdx = NumVecs;
2236 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2237 CurDAG->RemoveDeadNode(N);
2238}
2239
2240void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2241 unsigned NumOutVecs,
2242 bool IsTupleInput,
2243 unsigned Opc) {
2244 SDLoc DL(N);
2245 EVT VT = N->getValueType(0);
2246 unsigned NumInVecs = N->getNumOperands() - 1;
2247
2249 if (IsTupleInput) {
2250 assert((NumInVecs == 2 || NumInVecs == 4) &&
2251 "Don't know how to handle multi-register input!");
2252 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2253 Ops.push_back(createZMulTuple(Regs));
2254 } else {
2255 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2256 for (unsigned I = 0; I < NumInVecs; I++)
2257 Ops.push_back(N->getOperand(1 + I));
2258 }
2259
2260 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2261 SDValue SuperReg = SDValue(Res, 0);
2262
2263 for (unsigned I = 0; I < NumOutVecs; I++)
2264 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2265 AArch64::zsub0 + I, DL, VT, SuperReg));
2266 CurDAG->RemoveDeadNode(N);
2267}
2268
2269void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2270 unsigned Opc) {
2271 SDLoc dl(N);
2272 EVT VT = N->getOperand(2)->getValueType(0);
2273
2274 // Form a REG_SEQUENCE to force register allocation.
2275 bool Is128Bit = VT.getSizeInBits() == 128;
2276 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2277 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2278
2279 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2280 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2281
2282 // Transfer memoperands.
2283 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2284 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2285
2286 ReplaceNode(N, St);
2287}
2288
2289void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2290 unsigned Scale, unsigned Opc_rr,
2291 unsigned Opc_ri) {
2292 SDLoc dl(N);
2293
2294 // Form a REG_SEQUENCE to force register allocation.
2295 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2296 SDValue RegSeq = createZTuple(Regs);
2297
2298 // Optimize addressing mode.
2299 unsigned Opc;
2301 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2302 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2303 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2304
2305 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2306 Base, // address
2307 Offset, // offset
2308 N->getOperand(0)}; // chain
2309 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2310
2311 ReplaceNode(N, St);
2312}
2313
2314bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2315 SDValue &OffImm) {
2316 SDLoc dl(N);
2317 const DataLayout &DL = CurDAG->getDataLayout();
2318 const TargetLowering *TLI = getTargetLowering();
2319
2320 // Try to match it for the frame address
2321 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2322 int FI = FINode->getIndex();
2323 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2324 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2325 return true;
2326 }
2327
2328 return false;
2329}
2330
2331void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2332 unsigned Opc) {
2333 SDLoc dl(N);
2334 EVT VT = N->getOperand(2)->getValueType(0);
2335 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2336 MVT::Other}; // Type for the Chain
2337
2338 // Form a REG_SEQUENCE to force register allocation.
2339 bool Is128Bit = VT.getSizeInBits() == 128;
2340 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2341 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2342
2343 SDValue Ops[] = {RegSeq,
2344 N->getOperand(NumVecs + 1), // base register
2345 N->getOperand(NumVecs + 2), // Incremental
2346 N->getOperand(0)}; // Chain
2347 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2348
2349 ReplaceNode(N, St);
2350}
2351
2352namespace {
2353/// WidenVector - Given a value in the V64 register class, produce the
2354/// equivalent value in the V128 register class.
2355class WidenVector {
2356 SelectionDAG &DAG;
2357
2358public:
2359 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2360
2361 SDValue operator()(SDValue V64Reg) {
2362 EVT VT = V64Reg.getValueType();
2363 unsigned NarrowSize = VT.getVectorNumElements();
2364 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2365 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2366 SDLoc DL(V64Reg);
2367
2368 SDValue Undef =
2369 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2370 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2371 }
2372};
2373} // namespace
2374
2375/// NarrowVector - Given a value in the V128 register class, produce the
2376/// equivalent value in the V64 register class.
2378 EVT VT = V128Reg.getValueType();
2379 unsigned WideSize = VT.getVectorNumElements();
2380 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2381 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2382
2383 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2384 V128Reg);
2385}
2386
2387void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2388 unsigned Opc) {
2389 SDLoc dl(N);
2390 EVT VT = N->getValueType(0);
2391 bool Narrow = VT.getSizeInBits() == 64;
2392
2393 // Form a REG_SEQUENCE to force register allocation.
2394 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2395
2396 if (Narrow)
2397 transform(Regs, Regs.begin(),
2398 WidenVector(*CurDAG));
2399
2400 SDValue RegSeq = createQTuple(Regs);
2401
2402 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2403
2404 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2405
2406 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2407 N->getOperand(NumVecs + 3), N->getOperand(0)};
2408 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2409 SDValue SuperReg = SDValue(Ld, 0);
2410
2411 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2412 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2413 AArch64::qsub2, AArch64::qsub3 };
2414 for (unsigned i = 0; i < NumVecs; ++i) {
2415 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2416 if (Narrow)
2417 NV = NarrowVector(NV, *CurDAG);
2418 ReplaceUses(SDValue(N, i), NV);
2419 }
2420
2421 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2422 CurDAG->RemoveDeadNode(N);
2423}
2424
2425void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2426 unsigned Opc) {
2427 SDLoc dl(N);
2428 EVT VT = N->getValueType(0);
2429 bool Narrow = VT.getSizeInBits() == 64;
2430
2431 // Form a REG_SEQUENCE to force register allocation.
2432 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2433
2434 if (Narrow)
2435 transform(Regs, Regs.begin(),
2436 WidenVector(*CurDAG));
2437
2438 SDValue RegSeq = createQTuple(Regs);
2439
2440 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2441 RegSeq->getValueType(0), MVT::Other};
2442
2443 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2444
2445 SDValue Ops[] = {RegSeq,
2446 CurDAG->getTargetConstant(LaneNo, dl,
2447 MVT::i64), // Lane Number
2448 N->getOperand(NumVecs + 2), // Base register
2449 N->getOperand(NumVecs + 3), // Incremental
2450 N->getOperand(0)};
2451 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2452
2453 // Update uses of the write back register
2454 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2455
2456 // Update uses of the vector list
2457 SDValue SuperReg = SDValue(Ld, 1);
2458 if (NumVecs == 1) {
2459 ReplaceUses(SDValue(N, 0),
2460 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2461 } else {
2462 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2463 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2464 AArch64::qsub2, AArch64::qsub3 };
2465 for (unsigned i = 0; i < NumVecs; ++i) {
2466 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2467 SuperReg);
2468 if (Narrow)
2469 NV = NarrowVector(NV, *CurDAG);
2470 ReplaceUses(SDValue(N, i), NV);
2471 }
2472 }
2473
2474 // Update the Chain
2475 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2476 CurDAG->RemoveDeadNode(N);
2477}
2478
2479void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2480 unsigned Opc) {
2481 SDLoc dl(N);
2482 EVT VT = N->getOperand(2)->getValueType(0);
2483 bool Narrow = VT.getSizeInBits() == 64;
2484
2485 // Form a REG_SEQUENCE to force register allocation.
2486 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2487
2488 if (Narrow)
2489 transform(Regs, Regs.begin(),
2490 WidenVector(*CurDAG));
2491
2492 SDValue RegSeq = createQTuple(Regs);
2493
2494 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2495
2496 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2497 N->getOperand(NumVecs + 3), N->getOperand(0)};
2498 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2499
2500 // Transfer memoperands.
2501 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2502 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2503
2504 ReplaceNode(N, St);
2505}
2506
2507void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2508 unsigned Opc) {
2509 SDLoc dl(N);
2510 EVT VT = N->getOperand(2)->getValueType(0);
2511 bool Narrow = VT.getSizeInBits() == 64;
2512
2513 // Form a REG_SEQUENCE to force register allocation.
2514 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2515
2516 if (Narrow)
2517 transform(Regs, Regs.begin(),
2518 WidenVector(*CurDAG));
2519
2520 SDValue RegSeq = createQTuple(Regs);
2521
2522 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2523 MVT::Other};
2524
2525 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2526
2527 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2528 N->getOperand(NumVecs + 2), // Base Register
2529 N->getOperand(NumVecs + 3), // Incremental
2530 N->getOperand(0)};
2531 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2532
2533 // Transfer memoperands.
2534 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2535 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2536
2537 ReplaceNode(N, St);
2538}
2539
2541 unsigned &Opc, SDValue &Opd0,
2542 unsigned &LSB, unsigned &MSB,
2543 unsigned NumberOfIgnoredLowBits,
2544 bool BiggerPattern) {
2545 assert(N->getOpcode() == ISD::AND &&
2546 "N must be a AND operation to call this function");
2547
2548 EVT VT = N->getValueType(0);
2549
2550 // Here we can test the type of VT and return false when the type does not
2551 // match, but since it is done prior to that call in the current context
2552 // we turned that into an assert to avoid redundant code.
2553 assert((VT == MVT::i32 || VT == MVT::i64) &&
2554 "Type checking must have been done before calling this function");
2555
2556 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2557 // changed the AND node to a 32-bit mask operation. We'll have to
2558 // undo that as part of the transform here if we want to catch all
2559 // the opportunities.
2560 // Currently the NumberOfIgnoredLowBits argument helps to recover
2561 // from these situations when matching bigger pattern (bitfield insert).
2562
2563 // For unsigned extracts, check for a shift right and mask
2564 uint64_t AndImm = 0;
2565 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2566 return false;
2567
2568 const SDNode *Op0 = N->getOperand(0).getNode();
2569
2570 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2571 // simplified. Try to undo that
2572 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2573
2574 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2575 if (AndImm & (AndImm + 1))
2576 return false;
2577
2578 bool ClampMSB = false;
2579 uint64_t SrlImm = 0;
2580 // Handle the SRL + ANY_EXTEND case.
2581 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2582 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2583 // Extend the incoming operand of the SRL to 64-bit.
2584 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2585 // Make sure to clamp the MSB so that we preserve the semantics of the
2586 // original operations.
2587 ClampMSB = true;
2588 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2590 SrlImm)) {
2591 // If the shift result was truncated, we can still combine them.
2592 Opd0 = Op0->getOperand(0).getOperand(0);
2593
2594 // Use the type of SRL node.
2595 VT = Opd0->getValueType(0);
2596 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2597 Opd0 = Op0->getOperand(0);
2598 ClampMSB = (VT == MVT::i32);
2599 } else if (BiggerPattern) {
2600 // Let's pretend a 0 shift right has been performed.
2601 // The resulting code will be at least as good as the original one
2602 // plus it may expose more opportunities for bitfield insert pattern.
2603 // FIXME: Currently we limit this to the bigger pattern, because
2604 // some optimizations expect AND and not UBFM.
2605 Opd0 = N->getOperand(0);
2606 } else
2607 return false;
2608
2609 // Bail out on large immediates. This happens when no proper
2610 // combining/constant folding was performed.
2611 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2612 LLVM_DEBUG(
2613 (dbgs() << N
2614 << ": Found large shift immediate, this should not happen\n"));
2615 return false;
2616 }
2617
2618 LSB = SrlImm;
2619 MSB = SrlImm +
2620 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2621 : llvm::countr_one<uint64_t>(AndImm)) -
2622 1;
2623 if (ClampMSB)
2624 // Since we're moving the extend before the right shift operation, we need
2625 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2626 // the zeros which would get shifted in with the original right shift
2627 // operation.
2628 MSB = MSB > 31 ? 31 : MSB;
2629
2630 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2631 return true;
2632}
2633
2635 SDValue &Opd0, unsigned &Immr,
2636 unsigned &Imms) {
2637 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2638
2639 EVT VT = N->getValueType(0);
2640 unsigned BitWidth = VT.getSizeInBits();
2641 assert((VT == MVT::i32 || VT == MVT::i64) &&
2642 "Type checking must have been done before calling this function");
2643
2644 SDValue Op = N->getOperand(0);
2645 if (Op->getOpcode() == ISD::TRUNCATE) {
2646 Op = Op->getOperand(0);
2647 VT = Op->getValueType(0);
2648 BitWidth = VT.getSizeInBits();
2649 }
2650
2651 uint64_t ShiftImm;
2652 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2653 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2654 return false;
2655
2656 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2657 if (ShiftImm + Width > BitWidth)
2658 return false;
2659
2660 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2661 Opd0 = Op.getOperand(0);
2662 Immr = ShiftImm;
2663 Imms = ShiftImm + Width - 1;
2664 return true;
2665}
2666
2668 SDValue &Opd0, unsigned &LSB,
2669 unsigned &MSB) {
2670 // We are looking for the following pattern which basically extracts several
2671 // continuous bits from the source value and places it from the LSB of the
2672 // destination value, all other bits of the destination value or set to zero:
2673 //
2674 // Value2 = AND Value, MaskImm
2675 // SRL Value2, ShiftImm
2676 //
2677 // with MaskImm >> ShiftImm to search for the bit width.
2678 //
2679 // This gets selected into a single UBFM:
2680 //
2681 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2682 //
2683
2684 if (N->getOpcode() != ISD::SRL)
2685 return false;
2686
2687 uint64_t AndMask = 0;
2688 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2689 return false;
2690
2691 Opd0 = N->getOperand(0).getOperand(0);
2692
2693 uint64_t SrlImm = 0;
2694 if (!isIntImmediate(N->getOperand(1), SrlImm))
2695 return false;
2696
2697 // Check whether we really have several bits extract here.
2698 if (!isMask_64(AndMask >> SrlImm))
2699 return false;
2700
2701 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2702 LSB = SrlImm;
2703 MSB = llvm::Log2_64(AndMask);
2704 return true;
2705}
2706
2707static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2708 unsigned &Immr, unsigned &Imms,
2709 bool BiggerPattern) {
2710 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2711 "N must be a SHR/SRA operation to call this function");
2712
2713 EVT VT = N->getValueType(0);
2714
2715 // Here we can test the type of VT and return false when the type does not
2716 // match, but since it is done prior to that call in the current context
2717 // we turned that into an assert to avoid redundant code.
2718 assert((VT == MVT::i32 || VT == MVT::i64) &&
2719 "Type checking must have been done before calling this function");
2720
2721 // Check for AND + SRL doing several bits extract.
2722 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2723 return true;
2724
2725 // We're looking for a shift of a shift.
2726 uint64_t ShlImm = 0;
2727 uint64_t TruncBits = 0;
2728 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2729 Opd0 = N->getOperand(0).getOperand(0);
2730 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2731 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2732 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2733 // be considered as setting high 32 bits as zero. Our strategy here is to
2734 // always generate 64bit UBFM. This consistency will help the CSE pass
2735 // later find more redundancy.
2736 Opd0 = N->getOperand(0).getOperand(0);
2737 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2738 VT = Opd0.getValueType();
2739 assert(VT == MVT::i64 && "the promoted type should be i64");
2740 } else if (BiggerPattern) {
2741 // Let's pretend a 0 shift left has been performed.
2742 // FIXME: Currently we limit this to the bigger pattern case,
2743 // because some optimizations expect AND and not UBFM
2744 Opd0 = N->getOperand(0);
2745 } else
2746 return false;
2747
2748 // Missing combines/constant folding may have left us with strange
2749 // constants.
2750 if (ShlImm >= VT.getSizeInBits()) {
2751 LLVM_DEBUG(
2752 (dbgs() << N
2753 << ": Found large shift immediate, this should not happen\n"));
2754 return false;
2755 }
2756
2757 uint64_t SrlImm = 0;
2758 if (!isIntImmediate(N->getOperand(1), SrlImm))
2759 return false;
2760
2761 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2762 "bad amount in shift node!");
2763 int immr = SrlImm - ShlImm;
2764 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2765 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2766 // SRA requires a signed extraction
2767 if (VT == MVT::i32)
2768 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2769 else
2770 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2771 return true;
2772}
2773
2774bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2775 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2776
2777 EVT VT = N->getValueType(0);
2778 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2779 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2780 return false;
2781
2782 uint64_t ShiftImm;
2783 SDValue Op = N->getOperand(0);
2784 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2785 return false;
2786
2787 SDLoc dl(N);
2788 // Extend the incoming operand of the shift to 64-bits.
2789 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2790 unsigned Immr = ShiftImm;
2791 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2792 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2793 CurDAG->getTargetConstant(Imms, dl, VT)};
2794 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2795 return true;
2796}
2797
2798static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2799 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2800 unsigned NumberOfIgnoredLowBits = 0,
2801 bool BiggerPattern = false) {
2802 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2803 return false;
2804
2805 switch (N->getOpcode()) {
2806 default:
2807 if (!N->isMachineOpcode())
2808 return false;
2809 break;
2810 case ISD::AND:
2811 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2812 NumberOfIgnoredLowBits, BiggerPattern);
2813 case ISD::SRL:
2814 case ISD::SRA:
2815 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2816
2818 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2819 }
2820
2821 unsigned NOpc = N->getMachineOpcode();
2822 switch (NOpc) {
2823 default:
2824 return false;
2825 case AArch64::SBFMWri:
2826 case AArch64::UBFMWri:
2827 case AArch64::SBFMXri:
2828 case AArch64::UBFMXri:
2829 Opc = NOpc;
2830 Opd0 = N->getOperand(0);
2831 Immr = N->getConstantOperandVal(1);
2832 Imms = N->getConstantOperandVal(2);
2833 return true;
2834 }
2835 // Unreachable
2836 return false;
2837}
2838
2839bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2840 unsigned Opc, Immr, Imms;
2841 SDValue Opd0;
2842 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2843 return false;
2844
2845 EVT VT = N->getValueType(0);
2846 SDLoc dl(N);
2847
2848 // If the bit extract operation is 64bit but the original type is 32bit, we
2849 // need to add one EXTRACT_SUBREG.
2850 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2851 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2852 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2853
2854 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2855 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2856 MVT::i32, SDValue(BFM, 0));
2857 ReplaceNode(N, Inner.getNode());
2858 return true;
2859 }
2860
2861 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2862 CurDAG->getTargetConstant(Imms, dl, VT)};
2863 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2864 return true;
2865}
2866
2867/// Does DstMask form a complementary pair with the mask provided by
2868/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2869/// this asks whether DstMask zeroes precisely those bits that will be set by
2870/// the other half.
2871static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2872 unsigned NumberOfIgnoredHighBits, EVT VT) {
2873 assert((VT == MVT::i32 || VT == MVT::i64) &&
2874 "i32 or i64 mask type expected!");
2875 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2876
2877 // Enable implicitTrunc as we're intentionally ignoring high bits.
2878 APInt SignificantDstMask =
2879 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2880 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2881
2882 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2883 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2884}
2885
2886// Look for bits that will be useful for later uses.
2887// A bit is consider useless as soon as it is dropped and never used
2888// before it as been dropped.
2889// E.g., looking for useful bit of x
2890// 1. y = x & 0x7
2891// 2. z = y >> 2
2892// After #1, x useful bits are 0x7, then the useful bits of x, live through
2893// y.
2894// After #2, the useful bits of x are 0x4.
2895// However, if x is used on an unpredictable instruction, then all its bits
2896// are useful.
2897// E.g.
2898// 1. y = x & 0x7
2899// 2. z = y >> 2
2900// 3. str x, [@x]
2901static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2902
2904 unsigned Depth) {
2905 uint64_t Imm =
2906 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2907 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2908 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2909 getUsefulBits(Op, UsefulBits, Depth + 1);
2910}
2911
2913 uint64_t Imm, uint64_t MSB,
2914 unsigned Depth) {
2915 // inherit the bitwidth value
2916 APInt OpUsefulBits(UsefulBits);
2917 OpUsefulBits = 1;
2918
2919 if (MSB >= Imm) {
2920 OpUsefulBits <<= MSB - Imm + 1;
2921 --OpUsefulBits;
2922 // The interesting part will be in the lower part of the result
2923 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2924 // The interesting part was starting at Imm in the argument
2925 OpUsefulBits <<= Imm;
2926 } else {
2927 OpUsefulBits <<= MSB + 1;
2928 --OpUsefulBits;
2929 // The interesting part will be shifted in the result
2930 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2931 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2932 // The interesting part was at zero in the argument
2933 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2934 }
2935
2936 UsefulBits &= OpUsefulBits;
2937}
2938
2939static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2940 unsigned Depth) {
2941 uint64_t Imm =
2942 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2943 uint64_t MSB =
2944 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2945
2946 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2947}
2948
2950 unsigned Depth) {
2951 uint64_t ShiftTypeAndValue =
2952 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2953 APInt Mask(UsefulBits);
2954 Mask.clearAllBits();
2955 Mask.flipAllBits();
2956
2957 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2958 // Shift Left
2959 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2960 Mask <<= ShiftAmt;
2961 getUsefulBits(Op, Mask, Depth + 1);
2962 Mask.lshrInPlace(ShiftAmt);
2963 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2964 // Shift Right
2965 // We do not handle AArch64_AM::ASR, because the sign will change the
2966 // number of useful bits
2967 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2968 Mask.lshrInPlace(ShiftAmt);
2969 getUsefulBits(Op, Mask, Depth + 1);
2970 Mask <<= ShiftAmt;
2971 } else
2972 return;
2973
2974 UsefulBits &= Mask;
2975}
2976
2977static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2978 unsigned Depth) {
2979 uint64_t Imm =
2980 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2981 uint64_t MSB =
2982 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2983
2984 APInt OpUsefulBits(UsefulBits);
2985 OpUsefulBits = 1;
2986
2987 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2988 ResultUsefulBits.flipAllBits();
2989 APInt Mask(UsefulBits.getBitWidth(), 0);
2990
2991 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2992
2993 if (MSB >= Imm) {
2994 // The instruction is a BFXIL.
2995 uint64_t Width = MSB - Imm + 1;
2996 uint64_t LSB = Imm;
2997
2998 OpUsefulBits <<= Width;
2999 --OpUsefulBits;
3000
3001 if (Op.getOperand(1) == Orig) {
3002 // Copy the low bits from the result to bits starting from LSB.
3003 Mask = ResultUsefulBits & OpUsefulBits;
3004 Mask <<= LSB;
3005 }
3006
3007 if (Op.getOperand(0) == Orig)
3008 // Bits starting from LSB in the input contribute to the result.
3009 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3010 } else {
3011 // The instruction is a BFI.
3012 uint64_t Width = MSB + 1;
3013 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3014
3015 OpUsefulBits <<= Width;
3016 --OpUsefulBits;
3017 OpUsefulBits <<= LSB;
3018
3019 if (Op.getOperand(1) == Orig) {
3020 // Copy the bits from the result to the zero bits.
3021 Mask = ResultUsefulBits & OpUsefulBits;
3022 Mask.lshrInPlace(LSB);
3023 }
3024
3025 if (Op.getOperand(0) == Orig)
3026 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3027 }
3028
3029 UsefulBits &= Mask;
3030}
3031
3032static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3033 SDValue Orig, unsigned Depth) {
3034
3035 // Users of this node should have already been instruction selected
3036 // FIXME: Can we turn that into an assert?
3037 if (!UserNode->isMachineOpcode())
3038 return;
3039
3040 switch (UserNode->getMachineOpcode()) {
3041 default:
3042 return;
3043 case AArch64::ANDSWri:
3044 case AArch64::ANDSXri:
3045 case AArch64::ANDWri:
3046 case AArch64::ANDXri:
3047 // We increment Depth only when we call the getUsefulBits
3048 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3049 Depth);
3050 case AArch64::UBFMWri:
3051 case AArch64::UBFMXri:
3052 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3053
3054 case AArch64::ORRWrs:
3055 case AArch64::ORRXrs:
3056 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3057 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3058 Depth);
3059 return;
3060 case AArch64::BFMWri:
3061 case AArch64::BFMXri:
3062 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3063
3064 case AArch64::STRBBui:
3065 case AArch64::STURBBi:
3066 if (UserNode->getOperand(0) != Orig)
3067 return;
3068 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3069 return;
3070
3071 case AArch64::STRHHui:
3072 case AArch64::STURHHi:
3073 if (UserNode->getOperand(0) != Orig)
3074 return;
3075 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3076 return;
3077 }
3078}
3079
3080static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3082 return;
3083 // Initialize UsefulBits
3084 if (!Depth) {
3085 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3086 // At the beginning, assume every produced bits is useful
3087 UsefulBits = APInt(Bitwidth, 0);
3088 UsefulBits.flipAllBits();
3089 }
3090 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3091
3092 for (SDNode *Node : Op.getNode()->users()) {
3093 // A use cannot produce useful bits
3094 APInt UsefulBitsForUse = APInt(UsefulBits);
3095 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3096 UsersUsefulBits |= UsefulBitsForUse;
3097 }
3098 // UsefulBits contains the produced bits that are meaningful for the
3099 // current definition, thus a user cannot make a bit meaningful at
3100 // this point
3101 UsefulBits &= UsersUsefulBits;
3102}
3103
3104/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3105/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3106/// 0, return Op unchanged.
3107static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3108 if (ShlAmount == 0)
3109 return Op;
3110
3111 EVT VT = Op.getValueType();
3112 SDLoc dl(Op);
3113 unsigned BitWidth = VT.getSizeInBits();
3114 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3115
3116 SDNode *ShiftNode;
3117 if (ShlAmount > 0) {
3118 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3119 ShiftNode = CurDAG->getMachineNode(
3120 UBFMOpc, dl, VT, Op,
3121 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3122 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3123 } else {
3124 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3125 assert(ShlAmount < 0 && "expected right shift");
3126 int ShrAmount = -ShlAmount;
3127 ShiftNode = CurDAG->getMachineNode(
3128 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3129 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3130 }
3131
3132 return SDValue(ShiftNode, 0);
3133}
3134
3135// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3137 bool BiggerPattern,
3138 const uint64_t NonZeroBits,
3139 SDValue &Src, int &DstLSB,
3140 int &Width);
3141
3142// For bit-field-positioning pattern "shl VAL, N)".
3144 bool BiggerPattern,
3145 const uint64_t NonZeroBits,
3146 SDValue &Src, int &DstLSB,
3147 int &Width);
3148
3149/// Does this tree qualify as an attempt to move a bitfield into position,
3150/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3152 bool BiggerPattern, SDValue &Src,
3153 int &DstLSB, int &Width) {
3154 EVT VT = Op.getValueType();
3155 unsigned BitWidth = VT.getSizeInBits();
3156 (void)BitWidth;
3157 assert(BitWidth == 32 || BitWidth == 64);
3158
3159 KnownBits Known = CurDAG->computeKnownBits(Op);
3160
3161 // Non-zero in the sense that they're not provably zero, which is the key
3162 // point if we want to use this value
3163 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3164 if (!isShiftedMask_64(NonZeroBits))
3165 return false;
3166
3167 switch (Op.getOpcode()) {
3168 default:
3169 break;
3170 case ISD::AND:
3171 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3172 NonZeroBits, Src, DstLSB, Width);
3173 case ISD::SHL:
3174 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3175 NonZeroBits, Src, DstLSB, Width);
3176 }
3177
3178 return false;
3179}
3180
3182 bool BiggerPattern,
3183 const uint64_t NonZeroBits,
3184 SDValue &Src, int &DstLSB,
3185 int &Width) {
3186 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3187
3188 EVT VT = Op.getValueType();
3189 assert((VT == MVT::i32 || VT == MVT::i64) &&
3190 "Caller guarantees VT is one of i32 or i64");
3191 (void)VT;
3192
3193 uint64_t AndImm;
3194 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3195 return false;
3196
3197 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3198 // 1) (AndImm & (1 << POS) == 0)
3199 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3200 //
3201 // 1) and 2) don't agree so something must be wrong (e.g., in
3202 // 'SelectionDAG::computeKnownBits')
3203 assert((~AndImm & NonZeroBits) == 0 &&
3204 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3205
3206 SDValue AndOp0 = Op.getOperand(0);
3207
3208 uint64_t ShlImm;
3209 SDValue ShlOp0;
3210 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3211 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3212 ShlOp0 = AndOp0.getOperand(0);
3213 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3215 ShlImm)) {
3216 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3217
3218 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3219 SDValue ShlVal = AndOp0.getOperand(0);
3220
3221 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3222 // expect VT to be MVT::i32.
3223 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3224
3225 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3226 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3227 } else
3228 return false;
3229
3230 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3231 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3232 // AndOp0+AND.
3233 if (!BiggerPattern && !AndOp0.hasOneUse())
3234 return false;
3235
3236 DstLSB = llvm::countr_zero(NonZeroBits);
3237 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3238
3239 // Bail out on large Width. This happens when no proper combining / constant
3240 // folding was performed.
3241 if (Width >= (int)VT.getSizeInBits()) {
3242 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3243 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3244 // "val".
3245 // If VT is i32, what Width >= 32 means:
3246 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3247 // demands at least 'Width' bits (after dag-combiner). This together with
3248 // `any_extend` Op (undefined higher bits) indicates missed combination
3249 // when lowering the 'and' IR instruction to an machine IR instruction.
3250 LLVM_DEBUG(
3251 dbgs()
3252 << "Found large Width in bit-field-positioning -- this indicates no "
3253 "proper combining / constant folding was performed\n");
3254 return false;
3255 }
3256
3257 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3258 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3259 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3260 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3261 // which case it is not profitable to insert an extra shift.
3262 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3263 return false;
3264
3265 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3266 return true;
3267}
3268
3269// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3270// UBFIZ.
3272 SDValue &Src, int &DstLSB,
3273 int &Width) {
3274 // Caller should have verified that N is a left shift with constant shift
3275 // amount; asserts that.
3276 assert(Op.getOpcode() == ISD::SHL &&
3277 "Op.getNode() should be a SHL node to call this function");
3278 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3279 "Op.getNode() should shift ShlImm to call this function");
3280
3281 uint64_t AndImm = 0;
3282 SDValue Op0 = Op.getOperand(0);
3283 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3284 return false;
3285
3286 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3287 if (isMask_64(ShiftedAndImm)) {
3288 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3289 // should end with Mask, and could be prefixed with random bits if those
3290 // bits are shifted out.
3291 //
3292 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3293 // the AND result corresponding to those bits are shifted out, so it's fine
3294 // to not extract them.
3295 Width = llvm::countr_one(ShiftedAndImm);
3296 DstLSB = ShlImm;
3297 Src = Op0.getOperand(0);
3298 return true;
3299 }
3300 return false;
3301}
3302
3304 bool BiggerPattern,
3305 const uint64_t NonZeroBits,
3306 SDValue &Src, int &DstLSB,
3307 int &Width) {
3308 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3309
3310 EVT VT = Op.getValueType();
3311 assert((VT == MVT::i32 || VT == MVT::i64) &&
3312 "Caller guarantees that type is i32 or i64");
3313 (void)VT;
3314
3315 uint64_t ShlImm;
3316 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3317 return false;
3318
3319 if (!BiggerPattern && !Op.hasOneUse())
3320 return false;
3321
3322 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3323 return true;
3324
3325 DstLSB = llvm::countr_zero(NonZeroBits);
3326 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3327
3328 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3329 return false;
3330
3331 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3332 return true;
3333}
3334
3335static bool isShiftedMask(uint64_t Mask, EVT VT) {
3336 assert(VT == MVT::i32 || VT == MVT::i64);
3337 if (VT == MVT::i32)
3338 return isShiftedMask_32(Mask);
3339 return isShiftedMask_64(Mask);
3340}
3341
3342// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3343// inserted only sets known zero bits.
3345 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3346
3347 EVT VT = N->getValueType(0);
3348 if (VT != MVT::i32 && VT != MVT::i64)
3349 return false;
3350
3351 unsigned BitWidth = VT.getSizeInBits();
3352
3353 uint64_t OrImm;
3354 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3355 return false;
3356
3357 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3358 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3359 // performance neutral.
3361 return false;
3362
3363 uint64_t MaskImm;
3364 SDValue And = N->getOperand(0);
3365 // Must be a single use AND with an immediate operand.
3366 if (!And.hasOneUse() ||
3367 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3368 return false;
3369
3370 // Compute the Known Zero for the AND as this allows us to catch more general
3371 // cases than just looking for AND with imm.
3372 KnownBits Known = CurDAG->computeKnownBits(And);
3373
3374 // Non-zero in the sense that they're not provably zero, which is the key
3375 // point if we want to use this value.
3376 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3377
3378 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3379 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3380 return false;
3381
3382 // The bits being inserted must only set those bits that are known to be zero.
3383 if ((OrImm & NotKnownZero) != 0) {
3384 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3385 // currently handle this case.
3386 return false;
3387 }
3388
3389 // BFI/BFXIL dst, src, #lsb, #width.
3390 int LSB = llvm::countr_one(NotKnownZero);
3391 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3392
3393 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3394 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3395 unsigned ImmS = Width - 1;
3396
3397 // If we're creating a BFI instruction avoid cases where we need more
3398 // instructions to materialize the BFI constant as compared to the original
3399 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3400 // should be no worse in this case.
3401 bool IsBFI = LSB != 0;
3402 uint64_t BFIImm = OrImm >> LSB;
3403 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3404 // We have a BFI instruction and we know the constant can't be materialized
3405 // with a ORR-immediate with the zero register.
3406 unsigned OrChunks = 0, BFIChunks = 0;
3407 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3408 if (((OrImm >> Shift) & 0xFFFF) != 0)
3409 ++OrChunks;
3410 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3411 ++BFIChunks;
3412 }
3413 if (BFIChunks > OrChunks)
3414 return false;
3415 }
3416
3417 // Materialize the constant to be inserted.
3418 SDLoc DL(N);
3419 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3420 SDNode *MOVI = CurDAG->getMachineNode(
3421 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3422
3423 // Create the BFI/BFXIL instruction.
3424 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3425 CurDAG->getTargetConstant(ImmR, DL, VT),
3426 CurDAG->getTargetConstant(ImmS, DL, VT)};
3427 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3428 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3429 return true;
3430}
3431
3433 SDValue &ShiftedOperand,
3434 uint64_t &EncodedShiftImm) {
3435 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3436 if (!Dst.hasOneUse())
3437 return false;
3438
3439 EVT VT = Dst.getValueType();
3440 assert((VT == MVT::i32 || VT == MVT::i64) &&
3441 "Caller should guarantee that VT is one of i32 or i64");
3442 const unsigned SizeInBits = VT.getSizeInBits();
3443
3444 SDLoc DL(Dst.getNode());
3445 uint64_t AndImm, ShlImm;
3446 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3447 isShiftedMask_64(AndImm)) {
3448 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3449 SDValue DstOp0 = Dst.getOperand(0);
3450 if (!DstOp0.hasOneUse())
3451 return false;
3452
3453 // An example to illustrate the transformation
3454 // From:
3455 // lsr x8, x1, #1
3456 // and x8, x8, #0x3f80
3457 // bfxil x8, x1, #0, #7
3458 // To:
3459 // and x8, x23, #0x7f
3460 // ubfx x9, x23, #8, #7
3461 // orr x23, x8, x9, lsl #7
3462 //
3463 // The number of instructions remains the same, but ORR is faster than BFXIL
3464 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3465 // the dependency chain is improved after the transformation.
3466 uint64_t SrlImm;
3467 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3468 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3469 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3470 unsigned MaskWidth =
3471 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3472 unsigned UBFMOpc =
3473 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3474 SDNode *UBFMNode = CurDAG->getMachineNode(
3475 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3476 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3477 VT),
3478 CurDAG->getTargetConstant(
3479 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3480 ShiftedOperand = SDValue(UBFMNode, 0);
3481 EncodedShiftImm = AArch64_AM::getShifterImm(
3482 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3483 return true;
3484 }
3485 }
3486 return false;
3487 }
3488
3489 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3490 ShiftedOperand = Dst.getOperand(0);
3491 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3492 return true;
3493 }
3494
3495 uint64_t SrlImm;
3496 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3497 ShiftedOperand = Dst.getOperand(0);
3498 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3499 return true;
3500 }
3501 return false;
3502}
3503
3504// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3505// the operands and select it to AArch64::ORR with shifted registers if
3506// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3507static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3508 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3509 const bool BiggerPattern) {
3510 EVT VT = N->getValueType(0);
3511 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3512 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3513 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3514 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3515 assert((VT == MVT::i32 || VT == MVT::i64) &&
3516 "Expect result type to be i32 or i64 since N is combinable to BFM");
3517 SDLoc DL(N);
3518
3519 // Bail out if BFM simplifies away one node in BFM Dst.
3520 if (OrOpd1 != Dst)
3521 return false;
3522
3523 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3524 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3525 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3526 if (BiggerPattern) {
3527 uint64_t SrcAndImm;
3528 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3529 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3530 // OrOpd0 = AND Src, #Mask
3531 // So BFM simplifies away one AND node from Src and doesn't simplify away
3532 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3533 // one node (from Rd), ORR is better since it has higher throughput and
3534 // smaller latency than BFM on many AArch64 processors (and for the rest
3535 // ORR is at least as good as BFM).
3536 SDValue ShiftedOperand;
3537 uint64_t EncodedShiftImm;
3538 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3539 EncodedShiftImm)) {
3540 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3541 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3542 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3543 return true;
3544 }
3545 }
3546 return false;
3547 }
3548
3549 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3550
3551 uint64_t ShlImm;
3552 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3553 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3554 SDValue Ops[] = {
3555 Dst, Src,
3556 CurDAG->getTargetConstant(
3558 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3559 return true;
3560 }
3561
3562 // Select the following pattern to left-shifted operand rather than BFI.
3563 // %val1 = op ..
3564 // %val2 = shl %val1, #imm
3565 // %res = or %val1, %val2
3566 //
3567 // If N is selected to be BFI, we know that
3568 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3569 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3570 //
3571 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3572 if (OrOpd0.getOperand(0) == OrOpd1) {
3573 SDValue Ops[] = {
3574 OrOpd1, OrOpd1,
3575 CurDAG->getTargetConstant(
3577 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3578 return true;
3579 }
3580 }
3581
3582 uint64_t SrlImm;
3583 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3584 // Select the following pattern to right-shifted operand rather than BFXIL.
3585 // %val1 = op ..
3586 // %val2 = lshr %val1, #imm
3587 // %res = or %val1, %val2
3588 //
3589 // If N is selected to be BFXIL, we know that
3590 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3591 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3592 //
3593 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3594 if (OrOpd0.getOperand(0) == OrOpd1) {
3595 SDValue Ops[] = {
3596 OrOpd1, OrOpd1,
3597 CurDAG->getTargetConstant(
3599 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3600 return true;
3601 }
3602 }
3603
3604 return false;
3605}
3606
3607static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3608 SelectionDAG *CurDAG) {
3609 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3610
3611 EVT VT = N->getValueType(0);
3612 if (VT != MVT::i32 && VT != MVT::i64)
3613 return false;
3614
3615 unsigned BitWidth = VT.getSizeInBits();
3616
3617 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3618 // have the expected shape. Try to undo that.
3619
3620 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3621 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3622
3623 // Given a OR operation, check if we have the following pattern
3624 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3625 // isBitfieldExtractOp)
3626 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3627 // countTrailingZeros(mask2) == imm2 - imm + 1
3628 // f = d | c
3629 // if yes, replace the OR instruction with:
3630 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3631
3632 // OR is commutative, check all combinations of operand order and values of
3633 // BiggerPattern, i.e.
3634 // Opd0, Opd1, BiggerPattern=false
3635 // Opd1, Opd0, BiggerPattern=false
3636 // Opd0, Opd1, BiggerPattern=true
3637 // Opd1, Opd0, BiggerPattern=true
3638 // Several of these combinations may match, so check with BiggerPattern=false
3639 // first since that will produce better results by matching more instructions
3640 // and/or inserting fewer extra instructions.
3641 for (int I = 0; I < 4; ++I) {
3642
3643 SDValue Dst, Src;
3644 unsigned ImmR, ImmS;
3645 bool BiggerPattern = I / 2;
3646 SDValue OrOpd0Val = N->getOperand(I % 2);
3647 SDNode *OrOpd0 = OrOpd0Val.getNode();
3648 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3649 SDNode *OrOpd1 = OrOpd1Val.getNode();
3650
3651 unsigned BFXOpc;
3652 int DstLSB, Width;
3653 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3654 NumberOfIgnoredLowBits, BiggerPattern)) {
3655 // Check that the returned opcode is compatible with the pattern,
3656 // i.e., same type and zero extended (U and not S)
3657 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3658 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3659 continue;
3660
3661 // Compute the width of the bitfield insertion
3662 DstLSB = 0;
3663 Width = ImmS - ImmR + 1;
3664 // FIXME: This constraint is to catch bitfield insertion we may
3665 // want to widen the pattern if we want to grab general bitfield
3666 // move case
3667 if (Width <= 0)
3668 continue;
3669
3670 // If the mask on the insertee is correct, we have a BFXIL operation. We
3671 // can share the ImmR and ImmS values from the already-computed UBFM.
3672 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3673 BiggerPattern,
3674 Src, DstLSB, Width)) {
3675 ImmR = (BitWidth - DstLSB) % BitWidth;
3676 ImmS = Width - 1;
3677 } else
3678 continue;
3679
3680 // Check the second part of the pattern
3681 EVT VT = OrOpd1Val.getValueType();
3682 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3683
3684 // Compute the Known Zero for the candidate of the first operand.
3685 // This allows to catch more general case than just looking for
3686 // AND with imm. Indeed, simplify-demanded-bits may have removed
3687 // the AND instruction because it proves it was useless.
3688 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3689
3690 // Check if there is enough room for the second operand to appear
3691 // in the first one
3692 APInt BitsToBeInserted =
3693 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3694
3695 if ((BitsToBeInserted & ~Known.Zero) != 0)
3696 continue;
3697
3698 // Set the first operand
3699 uint64_t Imm;
3700 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3701 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3702 // In that case, we can eliminate the AND
3703 Dst = OrOpd1->getOperand(0);
3704 else
3705 // Maybe the AND has been removed by simplify-demanded-bits
3706 // or is useful because it discards more bits
3707 Dst = OrOpd1Val;
3708
3709 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3710 // with shifted operand is more efficient.
3711 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3712 BiggerPattern))
3713 return true;
3714
3715 // both parts match
3716 SDLoc DL(N);
3717 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3718 CurDAG->getTargetConstant(ImmS, DL, VT)};
3719 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3720 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3721 return true;
3722 }
3723
3724 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3725 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3726 // mask (e.g., 0x000ffff0).
3727 uint64_t Mask0Imm, Mask1Imm;
3728 SDValue And0 = N->getOperand(0);
3729 SDValue And1 = N->getOperand(1);
3730 if (And0.hasOneUse() && And1.hasOneUse() &&
3731 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3732 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3733 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3734 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3735
3736 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3737 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3738 // bits to be inserted.
3739 if (isShiftedMask(Mask0Imm, VT)) {
3740 std::swap(And0, And1);
3741 std::swap(Mask0Imm, Mask1Imm);
3742 }
3743
3744 SDValue Src = And1->getOperand(0);
3745 SDValue Dst = And0->getOperand(0);
3746 unsigned LSB = llvm::countr_zero(Mask1Imm);
3747 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3748
3749 // The BFXIL inserts the low-order bits from a source register, so right
3750 // shift the needed bits into place.
3751 SDLoc DL(N);
3752 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3753 uint64_t LsrImm = LSB;
3754 if (Src->hasOneUse() &&
3755 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3756 (LsrImm + LSB) < BitWidth) {
3757 Src = Src->getOperand(0);
3758 LsrImm += LSB;
3759 }
3760
3761 SDNode *LSR = CurDAG->getMachineNode(
3762 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3763 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3764
3765 // BFXIL is an alias of BFM, so translate to BFM operands.
3766 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3767 unsigned ImmS = Width - 1;
3768
3769 // Create the BFXIL instruction.
3770 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3771 CurDAG->getTargetConstant(ImmR, DL, VT),
3772 CurDAG->getTargetConstant(ImmS, DL, VT)};
3773 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3774 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3775 return true;
3776 }
3777
3778 return false;
3779}
3780
3781bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3782 if (N->getOpcode() != ISD::OR)
3783 return false;
3784
3785 APInt NUsefulBits;
3786 getUsefulBits(SDValue(N, 0), NUsefulBits);
3787
3788 // If all bits are not useful, just return UNDEF.
3789 if (!NUsefulBits) {
3790 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3791 return true;
3792 }
3793
3794 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3795 return true;
3796
3797 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3798}
3799
3800/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3801/// equivalent of a left shift by a constant amount followed by an and masking
3802/// out a contiguous set of bits.
3803bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3804 if (N->getOpcode() != ISD::AND)
3805 return false;
3806
3807 EVT VT = N->getValueType(0);
3808 if (VT != MVT::i32 && VT != MVT::i64)
3809 return false;
3810
3811 SDValue Op0;
3812 int DstLSB, Width;
3813 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3814 Op0, DstLSB, Width))
3815 return false;
3816
3817 // ImmR is the rotate right amount.
3818 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3819 // ImmS is the most significant bit of the source to be moved.
3820 unsigned ImmS = Width - 1;
3821
3822 SDLoc DL(N);
3823 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3824 CurDAG->getTargetConstant(ImmS, DL, VT)};
3825 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3826 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3827 return true;
3828}
3829
3830/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3831/// variable shift/rotate instructions.
3832bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3833 EVT VT = N->getValueType(0);
3834
3835 unsigned Opc;
3836 switch (N->getOpcode()) {
3837 case ISD::ROTR:
3838 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3839 break;
3840 case ISD::SHL:
3841 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3842 break;
3843 case ISD::SRL:
3844 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3845 break;
3846 case ISD::SRA:
3847 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3848 break;
3849 default:
3850 return false;
3851 }
3852
3853 uint64_t Size;
3854 uint64_t Bits;
3855 if (VT == MVT::i32) {
3856 Bits = 5;
3857 Size = 32;
3858 } else if (VT == MVT::i64) {
3859 Bits = 6;
3860 Size = 64;
3861 } else
3862 return false;
3863
3864 SDValue ShiftAmt = N->getOperand(1);
3865 SDLoc DL(N);
3866 SDValue NewShiftAmt;
3867
3868 // Skip over an extend of the shift amount.
3869 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3870 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3871 ShiftAmt = ShiftAmt->getOperand(0);
3872
3873 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3874 SDValue Add0 = ShiftAmt->getOperand(0);
3875 SDValue Add1 = ShiftAmt->getOperand(1);
3876 uint64_t Add0Imm;
3877 uint64_t Add1Imm;
3878 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3879 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3880 // to avoid the ADD/SUB.
3881 NewShiftAmt = Add0;
3882 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3883 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3884 (Add0Imm % Size == 0)) {
3885 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3886 // to generate a NEG instead of a SUB from a constant.
3887 unsigned NegOpc;
3888 unsigned ZeroReg;
3889 EVT SubVT = ShiftAmt->getValueType(0);
3890 if (SubVT == MVT::i32) {
3891 NegOpc = AArch64::SUBWrr;
3892 ZeroReg = AArch64::WZR;
3893 } else {
3894 assert(SubVT == MVT::i64);
3895 NegOpc = AArch64::SUBXrr;
3896 ZeroReg = AArch64::XZR;
3897 }
3898 SDValue Zero =
3899 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3900 MachineSDNode *Neg =
3901 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3902 NewShiftAmt = SDValue(Neg, 0);
3903 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3904 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3905 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3906 // to generate a NOT instead of a SUB from a constant.
3907 unsigned NotOpc;
3908 unsigned ZeroReg;
3909 EVT SubVT = ShiftAmt->getValueType(0);
3910 if (SubVT == MVT::i32) {
3911 NotOpc = AArch64::ORNWrr;
3912 ZeroReg = AArch64::WZR;
3913 } else {
3914 assert(SubVT == MVT::i64);
3915 NotOpc = AArch64::ORNXrr;
3916 ZeroReg = AArch64::XZR;
3917 }
3918 SDValue Zero =
3919 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3921 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3922 NewShiftAmt = SDValue(Not, 0);
3923 } else
3924 return false;
3925 } else {
3926 // If the shift amount is masked with an AND, check that the mask covers the
3927 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3928 // the AND.
3929 uint64_t MaskImm;
3930 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3931 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3932 return false;
3933
3934 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3935 return false;
3936
3937 NewShiftAmt = ShiftAmt->getOperand(0);
3938 }
3939
3940 // Narrow/widen the shift amount to match the size of the shift operation.
3941 if (VT == MVT::i32)
3942 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3943 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3944 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3945 MachineSDNode *Ext = CurDAG->getMachineNode(
3946 AArch64::SUBREG_TO_REG, DL, VT,
3947 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3948 NewShiftAmt = SDValue(Ext, 0);
3949 }
3950
3951 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3952 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3953 return true;
3954}
3955
3957 SDValue &FixedPos,
3958 unsigned RegWidth,
3959 bool isReciprocal) {
3960 APFloat FVal(0.0);
3961 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3962 FVal = CN->getValueAPF();
3963 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3964 // Some otherwise illegal constants are allowed in this case.
3965 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3966 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3967 return false;
3968
3969 ConstantPoolSDNode *CN =
3970 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3971 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3972 } else
3973 return false;
3974
3975 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3976 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3977 // x-register.
3978 //
3979 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3980 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3981 // integers.
3982 bool IsExact;
3983
3984 if (isReciprocal)
3985 if (!FVal.getExactInverse(&FVal))
3986 return false;
3987
3988 // fbits is between 1 and 64 in the worst-case, which means the fmul
3989 // could have 2^64 as an actual operand. Need 65 bits of precision.
3990 APSInt IntVal(65, true);
3991 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3992
3993 // N.b. isPowerOf2 also checks for > 0.
3994 if (!IsExact || !IntVal.isPowerOf2())
3995 return false;
3996 unsigned FBits = IntVal.logBase2();
3997
3998 // Checks above should have guaranteed that we haven't lost information in
3999 // finding FBits, but it must still be in range.
4000 if (FBits == 0 || FBits > RegWidth) return false;
4001
4002 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4003 return true;
4004}
4005
4006bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4007 unsigned RegWidth) {
4008 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4009 false);
4010}
4011
4012bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4013 SDValue &FixedPos,
4014 unsigned RegWidth) {
4015 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4016 true);
4017}
4018
4019// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4020// of the string and obtains the integer values from them and combines these
4021// into a single value to be used in the MRS/MSR instruction.
4024 RegString.split(Fields, ':');
4025
4026 if (Fields.size() == 1)
4027 return -1;
4028
4029 assert(Fields.size() == 5
4030 && "Invalid number of fields in read register string");
4031
4033 bool AllIntFields = true;
4034
4035 for (StringRef Field : Fields) {
4036 unsigned IntField;
4037 AllIntFields &= !Field.getAsInteger(10, IntField);
4038 Ops.push_back(IntField);
4039 }
4040
4041 assert(AllIntFields &&
4042 "Unexpected non-integer value in special register string.");
4043 (void)AllIntFields;
4044
4045 // Need to combine the integer fields of the string into a single value
4046 // based on the bit encoding of MRS/MSR instruction.
4047 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4048 (Ops[3] << 3) | (Ops[4]);
4049}
4050
4051// Lower the read_register intrinsic to an MRS instruction node if the special
4052// register string argument is either of the form detailed in the ALCE (the
4053// form described in getIntOperandsFromRegisterString) or is a named register
4054// known by the MRS SysReg mapper.
4055bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4056 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4057 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4058 SDLoc DL(N);
4059
4060 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4061
4062 unsigned Opcode64Bit = AArch64::MRS;
4063 int Imm = getIntOperandFromRegisterString(RegString->getString());
4064 if (Imm == -1) {
4065 // No match, Use the sysreg mapper to map the remaining possible strings to
4066 // the value for the register to be used for the instruction operand.
4067 const auto *TheReg =
4068 AArch64SysReg::lookupSysRegByName(RegString->getString());
4069 if (TheReg && TheReg->Readable &&
4070 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4071 Imm = TheReg->Encoding;
4072 else
4073 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4074
4075 if (Imm == -1) {
4076 // Still no match, see if this is "pc" or give up.
4077 if (!ReadIs128Bit && RegString->getString() == "pc") {
4078 Opcode64Bit = AArch64::ADR;
4079 Imm = 0;
4080 } else {
4081 return false;
4082 }
4083 }
4084 }
4085
4086 SDValue InChain = N->getOperand(0);
4087 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4088 if (!ReadIs128Bit) {
4089 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4090 {SysRegImm, InChain});
4091 } else {
4092 SDNode *MRRS = CurDAG->getMachineNode(
4093 AArch64::MRRS, DL,
4094 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4095 {SysRegImm, InChain});
4096
4097 // Sysregs are not endian. The even register always contains the low half
4098 // of the register.
4099 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4100 SDValue(MRRS, 0));
4101 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4102 SDValue(MRRS, 0));
4103 SDValue OutChain = SDValue(MRRS, 1);
4104
4105 ReplaceUses(SDValue(N, 0), Lo);
4106 ReplaceUses(SDValue(N, 1), Hi);
4107 ReplaceUses(SDValue(N, 2), OutChain);
4108 };
4109 return true;
4110}
4111
4112// Lower the write_register intrinsic to an MSR instruction node if the special
4113// register string argument is either of the form detailed in the ALCE (the
4114// form described in getIntOperandsFromRegisterString) or is a named register
4115// known by the MSR SysReg mapper.
4116bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4117 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4118 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4119 SDLoc DL(N);
4120
4121 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4122
4123 if (!WriteIs128Bit) {
4124 // Check if the register was one of those allowed as the pstatefield value
4125 // in the MSR (immediate) instruction. To accept the values allowed in the
4126 // pstatefield for the MSR (immediate) instruction, we also require that an
4127 // immediate value has been provided as an argument, we know that this is
4128 // the case as it has been ensured by semantic checking.
4129 auto trySelectPState = [&](auto PMapper, unsigned State) {
4130 if (PMapper) {
4131 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4132 "Expected a constant integer expression.");
4133 unsigned Reg = PMapper->Encoding;
4134 uint64_t Immed = N->getConstantOperandVal(2);
4135 CurDAG->SelectNodeTo(
4136 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4137 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4138 return true;
4139 }
4140 return false;
4141 };
4142
4143 if (trySelectPState(
4144 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4145 AArch64::MSRpstateImm4))
4146 return true;
4147 if (trySelectPState(
4148 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4149 AArch64::MSRpstateImm1))
4150 return true;
4151 }
4152
4153 int Imm = getIntOperandFromRegisterString(RegString->getString());
4154 if (Imm == -1) {
4155 // Use the sysreg mapper to attempt to map the remaining possible strings
4156 // to the value for the register to be used for the MSR (register)
4157 // instruction operand.
4158 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4159 if (TheReg && TheReg->Writeable &&
4160 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4161 Imm = TheReg->Encoding;
4162 else
4163 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4164
4165 if (Imm == -1)
4166 return false;
4167 }
4168
4169 SDValue InChain = N->getOperand(0);
4170 if (!WriteIs128Bit) {
4171 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4172 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4173 N->getOperand(2), InChain);
4174 } else {
4175 // No endian swap. The lower half always goes into the even subreg, and the
4176 // higher half always into the odd supreg.
4177 SDNode *Pair = CurDAG->getMachineNode(
4178 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4179 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4180 MVT::i32),
4181 N->getOperand(2),
4182 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4183 N->getOperand(3),
4184 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4185
4186 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4187 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4188 SDValue(Pair, 0), InChain);
4189 }
4190
4191 return true;
4192}
4193
4194/// We've got special pseudo-instructions for these
4195bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4196 unsigned Opcode;
4197 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4198
4199 // Leave IR for LSE if subtarget supports it.
4200 if (Subtarget->hasLSE()) return false;
4201
4202 if (MemTy == MVT::i8)
4203 Opcode = AArch64::CMP_SWAP_8;
4204 else if (MemTy == MVT::i16)
4205 Opcode = AArch64::CMP_SWAP_16;
4206 else if (MemTy == MVT::i32)
4207 Opcode = AArch64::CMP_SWAP_32;
4208 else if (MemTy == MVT::i64)
4209 Opcode = AArch64::CMP_SWAP_64;
4210 else
4211 llvm_unreachable("Unknown AtomicCmpSwap type");
4212
4213 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4214 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4215 N->getOperand(0)};
4216 SDNode *CmpSwap = CurDAG->getMachineNode(
4217 Opcode, SDLoc(N),
4218 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4219
4220 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4221 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4222
4223 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4224 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4225 CurDAG->RemoveDeadNode(N);
4226
4227 return true;
4228}
4229
4230bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4231 SDValue &Shift, bool Negate) {
4232 if (!isa<ConstantSDNode>(N))
4233 return false;
4234
4235 SDLoc DL(N);
4236 APInt Val =
4237 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4238
4239 if (Negate)
4240 Val = -Val;
4241
4242 switch (VT.SimpleTy) {
4243 case MVT::i8:
4244 // All immediates are supported.
4245 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4246 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4247 return true;
4248 case MVT::i16:
4249 case MVT::i32:
4250 case MVT::i64:
4251 // Support 8bit unsigned immediates.
4252 if ((Val & ~0xff) == 0) {
4253 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4254 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4255 return true;
4256 }
4257 // Support 16bit unsigned immediates that are a multiple of 256.
4258 if ((Val & ~0xff00) == 0) {
4259 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4260 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4261 return true;
4262 }
4263 break;
4264 default:
4265 break;
4266 }
4267
4268 return false;
4269}
4270
4271bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4272 SDValue &Imm, SDValue &Shift,
4273 bool Negate) {
4274 if (!isa<ConstantSDNode>(N))
4275 return false;
4276
4277 SDLoc DL(N);
4278 int64_t Val = cast<ConstantSDNode>(N)
4279 ->getAPIntValue()
4281 .getSExtValue();
4282
4283 if (Negate)
4284 Val = -Val;
4285
4286 // Signed saturating instructions treat their immediate operand as unsigned,
4287 // whereas the related intrinsics define their operands to be signed. This
4288 // means we can only use the immediate form when the operand is non-negative.
4289 if (Val < 0)
4290 return false;
4291
4292 switch (VT.SimpleTy) {
4293 case MVT::i8:
4294 // All positive immediates are supported.
4295 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4296 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4297 return true;
4298 case MVT::i16:
4299 case MVT::i32:
4300 case MVT::i64:
4301 // Support 8bit positive immediates.
4302 if (Val <= 255) {
4303 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4304 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4305 return true;
4306 }
4307 // Support 16bit positive immediates that are a multiple of 256.
4308 if (Val <= 65280 && Val % 256 == 0) {
4309 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4310 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4311 return true;
4312 }
4313 break;
4314 default:
4315 break;
4316 }
4317
4318 return false;
4319}
4320
4321bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4322 SDValue &Shift) {
4323 if (!isa<ConstantSDNode>(N))
4324 return false;
4325
4326 SDLoc DL(N);
4327 int64_t Val = cast<ConstantSDNode>(N)
4328 ->getAPIntValue()
4329 .trunc(VT.getFixedSizeInBits())
4330 .getSExtValue();
4331
4332 switch (VT.SimpleTy) {
4333 case MVT::i8:
4334 // All immediates are supported.
4335 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4336 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4337 return true;
4338 case MVT::i16:
4339 case MVT::i32:
4340 case MVT::i64:
4341 // Support 8bit signed immediates.
4342 if (Val >= -128 && Val <= 127) {
4343 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4344 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4345 return true;
4346 }
4347 // Support 16bit signed immediates that are a multiple of 256.
4348 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4349 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4350 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4351 return true;
4352 }
4353 break;
4354 default:
4355 break;
4356 }
4357
4358 return false;
4359}
4360
4361bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4362 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4363 int64_t ImmVal = CNode->getSExtValue();
4364 SDLoc DL(N);
4365 if (ImmVal >= -128 && ImmVal < 128) {
4366 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4367 return true;
4368 }
4369 }
4370 return false;
4371}
4372
4373bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4374 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4375 uint64_t ImmVal = CNode->getZExtValue();
4376
4377 switch (VT.SimpleTy) {
4378 case MVT::i8:
4379 ImmVal &= 0xFF;
4380 break;
4381 case MVT::i16:
4382 ImmVal &= 0xFFFF;
4383 break;
4384 case MVT::i32:
4385 ImmVal &= 0xFFFFFFFF;
4386 break;
4387 case MVT::i64:
4388 break;
4389 default:
4390 llvm_unreachable("Unexpected type");
4391 }
4392
4393 if (ImmVal < 256) {
4394 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4395 return true;
4396 }
4397 }
4398 return false;
4399}
4400
4401bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4402 bool Invert) {
4403 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4404 uint64_t ImmVal = CNode->getZExtValue();
4405 SDLoc DL(N);
4406
4407 if (Invert)
4408 ImmVal = ~ImmVal;
4409
4410 // Shift mask depending on type size.
4411 switch (VT.SimpleTy) {
4412 case MVT::i8:
4413 ImmVal &= 0xFF;
4414 ImmVal |= ImmVal << 8;
4415 ImmVal |= ImmVal << 16;
4416 ImmVal |= ImmVal << 32;
4417 break;
4418 case MVT::i16:
4419 ImmVal &= 0xFFFF;
4420 ImmVal |= ImmVal << 16;
4421 ImmVal |= ImmVal << 32;
4422 break;
4423 case MVT::i32:
4424 ImmVal &= 0xFFFFFFFF;
4425 ImmVal |= ImmVal << 32;
4426 break;
4427 case MVT::i64:
4428 break;
4429 default:
4430 llvm_unreachable("Unexpected type");
4431 }
4432
4433 uint64_t encoding;
4434 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4435 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4436 return true;
4437 }
4438 }
4439 return false;
4440}
4441
4442// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4443// Rather than attempt to normalise everything we can sometimes saturate the
4444// shift amount during selection. This function also allows for consistent
4445// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4446// required by the instructions.
4447bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4448 uint64_t High, bool AllowSaturation,
4449 SDValue &Imm) {
4450 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4451 uint64_t ImmVal = CN->getZExtValue();
4452
4453 // Reject shift amounts that are too small.
4454 if (ImmVal < Low)
4455 return false;
4456
4457 // Reject or saturate shift amounts that are too big.
4458 if (ImmVal > High) {
4459 if (!AllowSaturation)
4460 return false;
4461 ImmVal = High;
4462 }
4463
4464 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4465 return true;
4466 }
4467
4468 return false;
4469}
4470
4471bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4472 // tagp(FrameIndex, IRGstack, tag_offset):
4473 // since the offset between FrameIndex and IRGstack is a compile-time
4474 // constant, this can be lowered to a single ADDG instruction.
4475 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4476 return false;
4477 }
4478
4479 SDValue IRG_SP = N->getOperand(2);
4480 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4481 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4482 return false;
4483 }
4484
4485 const TargetLowering *TLI = getTargetLowering();
4486 SDLoc DL(N);
4487 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4488 SDValue FiOp = CurDAG->getTargetFrameIndex(
4489 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4490 int TagOffset = N->getConstantOperandVal(3);
4491
4492 SDNode *Out = CurDAG->getMachineNode(
4493 AArch64::TAGPstack, DL, MVT::i64,
4494 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4495 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4496 ReplaceNode(N, Out);
4497 return true;
4498}
4499
4500void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4501 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4502 "llvm.aarch64.tagp third argument must be an immediate");
4503 if (trySelectStackSlotTagP(N))
4504 return;
4505 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4506 // compile-time constant, not just for stack allocations.
4507
4508 // General case for unrelated pointers in Op1 and Op2.
4509 SDLoc DL(N);
4510 int TagOffset = N->getConstantOperandVal(3);
4511 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4512 {N->getOperand(1), N->getOperand(2)});
4513 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4514 {SDValue(N1, 0), N->getOperand(2)});
4515 SDNode *N3 = CurDAG->getMachineNode(
4516 AArch64::ADDG, DL, MVT::i64,
4517 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4518 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4519 ReplaceNode(N, N3);
4520}
4521
4522bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4523 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4524
4525 // Bail when not a "cast" like insert_subvector.
4526 if (N->getConstantOperandVal(2) != 0)
4527 return false;
4528 if (!N->getOperand(0).isUndef())
4529 return false;
4530
4531 // Bail when normal isel should do the job.
4532 EVT VT = N->getValueType(0);
4533 EVT InVT = N->getOperand(1).getValueType();
4534 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4535 return false;
4536 if (InVT.getSizeInBits() <= 128)
4537 return false;
4538
4539 // NOTE: We can only get here when doing fixed length SVE code generation.
4540 // We do manual selection because the types involved are not linked to real
4541 // registers (despite being legal) and must be coerced into SVE registers.
4542
4544 "Expected to insert into a packed scalable vector!");
4545
4546 SDLoc DL(N);
4547 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4548 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4549 N->getOperand(1), RC));
4550 return true;
4551}
4552
4553bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4554 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4555
4556 // Bail when not a "cast" like extract_subvector.
4557 if (N->getConstantOperandVal(1) != 0)
4558 return false;
4559
4560 // Bail when normal isel can do the job.
4561 EVT VT = N->getValueType(0);
4562 EVT InVT = N->getOperand(0).getValueType();
4563 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4564 return false;
4565 if (VT.getSizeInBits() <= 128)
4566 return false;
4567
4568 // NOTE: We can only get here when doing fixed length SVE code generation.
4569 // We do manual selection because the types involved are not linked to real
4570 // registers (despite being legal) and must be coerced into SVE registers.
4571
4573 "Expected to extract from a packed scalable vector!");
4574
4575 SDLoc DL(N);
4576 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4577 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4578 N->getOperand(0), RC));
4579 return true;
4580}
4581
4582bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4583 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4584
4585 SDValue N0 = N->getOperand(0);
4586 SDValue N1 = N->getOperand(1);
4587
4588 EVT VT = N->getValueType(0);
4589 SDLoc DL(N);
4590
4591 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4592 // Rotate by a constant is a funnel shift in IR which is exanded to
4593 // an OR with shifted operands.
4594 // We do the following transform:
4595 // OR N0, N1 -> xar (x, y, imm)
4596 // Where:
4597 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4598 // N0 = SHL_PRED true, V, splat(bits-imm)
4599 // V = (xor x, y)
4600 if (VT.isScalableVector() &&
4601 (Subtarget->hasSVE2() ||
4602 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4603 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4604 N1.getOpcode() != AArch64ISD::SRL_PRED)
4605 std::swap(N0, N1);
4606 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4607 N1.getOpcode() != AArch64ISD::SRL_PRED)
4608 return false;
4609
4610 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4611 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4612 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4613 return false;
4614
4615 if (N0.getOperand(1) != N1.getOperand(1))
4616 return false;
4617
4618 SDValue R1, R2;
4619 bool IsXOROperand = true;
4620 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4621 IsXOROperand = false;
4622 } else {
4623 R1 = N0.getOperand(1).getOperand(0);
4624 R2 = N1.getOperand(1).getOperand(1);
4625 }
4626
4627 APInt ShlAmt, ShrAmt;
4628 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4630 return false;
4631
4632 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4633 return false;
4634
4635 if (!IsXOROperand) {
4636 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4637 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4638 SDValue MOVIV = SDValue(MOV, 0);
4639
4640 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4641 SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
4642 VT, Zero, MOVIV, ZSub);
4643
4644 R1 = N1->getOperand(1);
4645 R2 = SDValue(SubRegToReg, 0);
4646 }
4647
4648 SDValue Imm =
4649 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4650
4651 SDValue Ops[] = {R1, R2, Imm};
4652 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4653 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4654 AArch64::XAR_ZZZI_D})) {
4655 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4656 return true;
4657 }
4658 return false;
4659 }
4660
4661 // We have Neon SHA3 XAR operation for v2i64 but for types
4662 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4663 // is available.
4664 EVT SVT;
4665 switch (VT.getSimpleVT().SimpleTy) {
4666 case MVT::v4i32:
4667 case MVT::v2i32:
4668 SVT = MVT::nxv4i32;
4669 break;
4670 case MVT::v8i16:
4671 case MVT::v4i16:
4672 SVT = MVT::nxv8i16;
4673 break;
4674 case MVT::v16i8:
4675 case MVT::v8i8:
4676 SVT = MVT::nxv16i8;
4677 break;
4678 case MVT::v2i64:
4679 case MVT::v1i64:
4680 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4681 break;
4682 default:
4683 return false;
4684 }
4685
4686 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4687 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4688 return false;
4689
4690 if (N0->getOpcode() != AArch64ISD::VSHL ||
4691 N1->getOpcode() != AArch64ISD::VLSHR)
4692 return false;
4693
4694 if (N0->getOperand(0) != N1->getOperand(0))
4695 return false;
4696
4697 SDValue R1, R2;
4698 bool IsXOROperand = true;
4699 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4700 IsXOROperand = false;
4701 } else {
4702 SDValue XOR = N0.getOperand(0);
4703 R1 = XOR.getOperand(0);
4704 R2 = XOR.getOperand(1);
4705 }
4706
4707 unsigned HsAmt = N0.getConstantOperandVal(1);
4708 unsigned ShAmt = N1.getConstantOperandVal(1);
4709
4710 SDValue Imm = CurDAG->getTargetConstant(
4711 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4712
4713 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4714 if (ShAmt + HsAmt != VTSizeInBits)
4715 return false;
4716
4717 if (!IsXOROperand) {
4718 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4719 SDNode *MOV =
4720 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4721 SDValue MOVIV = SDValue(MOV, 0);
4722
4723 R1 = N1->getOperand(0);
4724 R2 = MOVIV;
4725 }
4726
4727 if (SVT != VT) {
4728 SDValue Undef =
4729 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4730
4731 if (SVT.isScalableVector() && VT.is64BitVector()) {
4732 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4733
4734 SDValue UndefQ = SDValue(
4735 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4736 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4737
4738 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4739 UndefQ, R1, DSub),
4740 0);
4741 if (R2.getValueType() == VT)
4742 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4743 UndefQ, R2, DSub),
4744 0);
4745 }
4746
4747 SDValue SubReg = CurDAG->getTargetConstant(
4748 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4749
4750 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4751 R1, SubReg),
4752 0);
4753
4754 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4755 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4756 Undef, R2, SubReg),
4757 0);
4758 }
4759
4760 SDValue Ops[] = {R1, R2, Imm};
4761 SDNode *XAR = nullptr;
4762
4763 if (SVT.isScalableVector()) {
4764 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4765 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4766 AArch64::XAR_ZZZI_D}))
4767 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4768 } else {
4769 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4770 }
4771
4772 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4773
4774 if (SVT != VT) {
4775 if (VT.is64BitVector() && SVT.isScalableVector()) {
4776 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4777
4778 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4779 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4780 SDValue(XAR, 0), ZSub);
4781
4782 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4783 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4784 SDValue(Q, 0), DSub);
4785 } else {
4786 SDValue SubReg = CurDAG->getTargetConstant(
4787 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4788 MVT::i32);
4789 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4790 SDValue(XAR, 0), SubReg);
4791 }
4792 }
4793 ReplaceNode(N, XAR);
4794 return true;
4795}
4796
4797void AArch64DAGToDAGISel::Select(SDNode *Node) {
4798 // If we have a custom node, we already have selected!
4799 if (Node->isMachineOpcode()) {
4800 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4801 Node->setNodeId(-1);
4802 return;
4803 }
4804
4805 // Few custom selection stuff.
4806 EVT VT = Node->getValueType(0);
4807
4808 switch (Node->getOpcode()) {
4809 default:
4810 break;
4811
4813 if (SelectCMP_SWAP(Node))
4814 return;
4815 break;
4816
4817 case ISD::READ_REGISTER:
4818 case AArch64ISD::MRRS:
4819 if (tryReadRegister(Node))
4820 return;
4821 break;
4822
4824 case AArch64ISD::MSRR:
4825 if (tryWriteRegister(Node))
4826 return;
4827 break;
4828
4829 case ISD::LOAD: {
4830 // Try to select as an indexed load. Fall through to normal processing
4831 // if we can't.
4832 if (tryIndexedLoad(Node))
4833 return;
4834 break;
4835 }
4836
4837 case ISD::SRL:
4838 case ISD::AND:
4839 case ISD::SRA:
4841 if (tryBitfieldExtractOp(Node))
4842 return;
4843 if (tryBitfieldInsertInZeroOp(Node))
4844 return;
4845 [[fallthrough]];
4846 case ISD::ROTR:
4847 case ISD::SHL:
4848 if (tryShiftAmountMod(Node))
4849 return;
4850 break;
4851
4852 case ISD::SIGN_EXTEND:
4853 if (tryBitfieldExtractOpFromSExt(Node))
4854 return;
4855 break;
4856
4857 case ISD::OR:
4858 if (tryBitfieldInsertOp(Node))
4859 return;
4860 if (trySelectXAR(Node))
4861 return;
4862 break;
4863
4865 if (trySelectCastScalableToFixedLengthVector(Node))
4866 return;
4867 break;
4868 }
4869
4870 case ISD::INSERT_SUBVECTOR: {
4871 if (trySelectCastFixedLengthToScalableVector(Node))
4872 return;
4873 break;
4874 }
4875
4876 case ISD::Constant: {
4877 // Materialize zero constants as copies from WZR/XZR. This allows
4878 // the coalescer to propagate these into other instructions.
4879 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4880 if (ConstNode->isZero()) {
4881 if (VT == MVT::i32) {
4882 SDValue New = CurDAG->getCopyFromReg(
4883 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4884 ReplaceNode(Node, New.getNode());
4885 return;
4886 } else if (VT == MVT::i64) {
4887 SDValue New = CurDAG->getCopyFromReg(
4888 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4889 ReplaceNode(Node, New.getNode());
4890 return;
4891 }
4892 }
4893 break;
4894 }
4895
4896 case ISD::FrameIndex: {
4897 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4898 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4899 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4900 const TargetLowering *TLI = getTargetLowering();
4901 SDValue TFI = CurDAG->getTargetFrameIndex(
4902 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4903 SDLoc DL(Node);
4904 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4905 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4906 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4907 return;
4908 }
4910 unsigned IntNo = Node->getConstantOperandVal(1);
4911 switch (IntNo) {
4912 default:
4913 break;
4914 case Intrinsic::aarch64_gcsss: {
4915 SDLoc DL(Node);
4916 SDValue Chain = Node->getOperand(0);
4917 SDValue Val = Node->getOperand(2);
4918 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4919 SDNode *SS1 =
4920 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4921 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4922 MVT::Other, Zero, SDValue(SS1, 0));
4923 ReplaceNode(Node, SS2);
4924 return;
4925 }
4926 case Intrinsic::aarch64_ldaxp:
4927 case Intrinsic::aarch64_ldxp: {
4928 unsigned Op =
4929 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4930 SDValue MemAddr = Node->getOperand(2);
4931 SDLoc DL(Node);
4932 SDValue Chain = Node->getOperand(0);
4933
4934 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4935 MVT::Other, MemAddr, Chain);
4936
4937 // Transfer memoperands.
4939 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4940 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4941 ReplaceNode(Node, Ld);
4942 return;
4943 }
4944 case Intrinsic::aarch64_stlxp:
4945 case Intrinsic::aarch64_stxp: {
4946 unsigned Op =
4947 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4948 SDLoc DL(Node);
4949 SDValue Chain = Node->getOperand(0);
4950 SDValue ValLo = Node->getOperand(2);
4951 SDValue ValHi = Node->getOperand(3);
4952 SDValue MemAddr = Node->getOperand(4);
4953
4954 // Place arguments in the right order.
4955 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4956
4957 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4958 // Transfer memoperands.
4960 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4961 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4962
4963 ReplaceNode(Node, St);
4964 return;
4965 }
4966 case Intrinsic::aarch64_neon_ld1x2:
4967 if (VT == MVT::v8i8) {
4968 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4969 return;
4970 } else if (VT == MVT::v16i8) {
4971 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4972 return;
4973 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4974 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4975 return;
4976 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4977 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4978 return;
4979 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4980 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4981 return;
4982 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4983 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4984 return;
4985 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4986 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4987 return;
4988 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4989 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4990 return;
4991 }
4992 break;
4993 case Intrinsic::aarch64_neon_ld1x3:
4994 if (VT == MVT::v8i8) {
4995 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4996 return;
4997 } else if (VT == MVT::v16i8) {
4998 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4999 return;
5000 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5001 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5002 return;
5003 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5004 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5005 return;
5006 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5007 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5008 return;
5009 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5010 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5011 return;
5012 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5013 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5014 return;
5015 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5016 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5017 return;
5018 }
5019 break;
5020 case Intrinsic::aarch64_neon_ld1x4:
5021 if (VT == MVT::v8i8) {
5022 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5023 return;
5024 } else if (VT == MVT::v16i8) {
5025 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5026 return;
5027 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5028 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5029 return;
5030 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5031 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5032 return;
5033 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5034 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5035 return;
5036 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5037 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5038 return;
5039 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5040 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5041 return;
5042 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5043 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5044 return;
5045 }
5046 break;
5047 case Intrinsic::aarch64_neon_ld2:
5048 if (VT == MVT::v8i8) {
5049 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5050 return;
5051 } else if (VT == MVT::v16i8) {
5052 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5053 return;
5054 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5055 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5056 return;
5057 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5058 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5059 return;
5060 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5061 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5062 return;
5063 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5064 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5065 return;
5066 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5067 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5068 return;
5069 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5070 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5071 return;
5072 }
5073 break;
5074 case Intrinsic::aarch64_neon_ld3:
5075 if (VT == MVT::v8i8) {
5076 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5077 return;
5078 } else if (VT == MVT::v16i8) {
5079 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5080 return;
5081 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5082 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5083 return;
5084 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5085 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5086 return;
5087 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5088 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5089 return;
5090 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5091 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5092 return;
5093 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5094 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5095 return;
5096 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5097 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5098 return;
5099 }
5100 break;
5101 case Intrinsic::aarch64_neon_ld4:
5102 if (VT == MVT::v8i8) {
5103 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5104 return;
5105 } else if (VT == MVT::v16i8) {
5106 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5107 return;
5108 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5109 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5110 return;
5111 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5112 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5113 return;
5114 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5115 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5116 return;
5117 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5118 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5119 return;
5120 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5121 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5122 return;
5123 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5124 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5125 return;
5126 }
5127 break;
5128 case Intrinsic::aarch64_neon_ld2r:
5129 if (VT == MVT::v8i8) {
5130 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5131 return;
5132 } else if (VT == MVT::v16i8) {
5133 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5134 return;
5135 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5136 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5137 return;
5138 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5139 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5140 return;
5141 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5142 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5143 return;
5144 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5145 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5146 return;
5147 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5148 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5149 return;
5150 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5151 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5152 return;
5153 }
5154 break;
5155 case Intrinsic::aarch64_neon_ld3r:
5156 if (VT == MVT::v8i8) {
5157 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5158 return;
5159 } else if (VT == MVT::v16i8) {
5160 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5161 return;
5162 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5163 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5164 return;
5165 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5166 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5167 return;
5168 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5169 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5170 return;
5171 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5172 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5173 return;
5174 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5175 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5176 return;
5177 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5178 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5179 return;
5180 }
5181 break;
5182 case Intrinsic::aarch64_neon_ld4r:
5183 if (VT == MVT::v8i8) {
5184 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5185 return;
5186 } else if (VT == MVT::v16i8) {
5187 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5188 return;
5189 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5190 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5191 return;
5192 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5193 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5194 return;
5195 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5196 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5197 return;
5198 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5199 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5200 return;
5201 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5202 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5203 return;
5204 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5205 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5206 return;
5207 }
5208 break;
5209 case Intrinsic::aarch64_neon_ld2lane:
5210 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5211 SelectLoadLane(Node, 2, AArch64::LD2i8);
5212 return;
5213 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5214 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5215 SelectLoadLane(Node, 2, AArch64::LD2i16);
5216 return;
5217 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5218 VT == MVT::v2f32) {
5219 SelectLoadLane(Node, 2, AArch64::LD2i32);
5220 return;
5221 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5222 VT == MVT::v1f64) {
5223 SelectLoadLane(Node, 2, AArch64::LD2i64);
5224 return;
5225 }
5226 break;
5227 case Intrinsic::aarch64_neon_ld3lane:
5228 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5229 SelectLoadLane(Node, 3, AArch64::LD3i8);
5230 return;
5231 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5232 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5233 SelectLoadLane(Node, 3, AArch64::LD3i16);
5234 return;
5235 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5236 VT == MVT::v2f32) {
5237 SelectLoadLane(Node, 3, AArch64::LD3i32);
5238 return;
5239 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5240 VT == MVT::v1f64) {
5241 SelectLoadLane(Node, 3, AArch64::LD3i64);
5242 return;
5243 }
5244 break;
5245 case Intrinsic::aarch64_neon_ld4lane:
5246 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5247 SelectLoadLane(Node, 4, AArch64::LD4i8);
5248 return;
5249 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5250 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5251 SelectLoadLane(Node, 4, AArch64::LD4i16);
5252 return;
5253 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5254 VT == MVT::v2f32) {
5255 SelectLoadLane(Node, 4, AArch64::LD4i32);
5256 return;
5257 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5258 VT == MVT::v1f64) {
5259 SelectLoadLane(Node, 4, AArch64::LD4i64);
5260 return;
5261 }
5262 break;
5263 case Intrinsic::aarch64_ld64b:
5264 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5265 return;
5266 case Intrinsic::aarch64_sve_ld2q_sret: {
5267 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5268 return;
5269 }
5270 case Intrinsic::aarch64_sve_ld3q_sret: {
5271 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5272 return;
5273 }
5274 case Intrinsic::aarch64_sve_ld4q_sret: {
5275 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5276 return;
5277 }
5278 case Intrinsic::aarch64_sve_ld2_sret: {
5279 if (VT == MVT::nxv16i8) {
5280 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5281 true);
5282 return;
5283 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5284 VT == MVT::nxv8bf16) {
5285 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5286 true);
5287 return;
5288 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5289 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5290 true);
5291 return;
5292 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5293 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5294 true);
5295 return;
5296 }
5297 break;
5298 }
5299 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5300 if (VT == MVT::nxv16i8) {
5301 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5302 SelectContiguousMultiVectorLoad(
5303 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5304 else if (Subtarget->hasSVE2p1())
5305 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5306 AArch64::LD1B_2Z);
5307 else
5308 break;
5309 return;
5310 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5311 VT == MVT::nxv8bf16) {
5312 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5313 SelectContiguousMultiVectorLoad(
5314 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5315 else if (Subtarget->hasSVE2p1())
5316 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5317 AArch64::LD1H_2Z);
5318 else
5319 break;
5320 return;
5321 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5322 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5323 SelectContiguousMultiVectorLoad(
5324 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5325 else if (Subtarget->hasSVE2p1())
5326 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5327 AArch64::LD1W_2Z);
5328 else
5329 break;
5330 return;
5331 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5332 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5333 SelectContiguousMultiVectorLoad(
5334 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5335 else if (Subtarget->hasSVE2p1())
5336 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5337 AArch64::LD1D_2Z);
5338 else
5339 break;
5340 return;
5341 }
5342 break;
5343 }
5344 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5345 if (VT == MVT::nxv16i8) {
5346 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5347 SelectContiguousMultiVectorLoad(
5348 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5349 else if (Subtarget->hasSVE2p1())
5350 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5351 AArch64::LD1B_4Z);
5352 else
5353 break;
5354 return;
5355 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5356 VT == MVT::nxv8bf16) {
5357 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5358 SelectContiguousMultiVectorLoad(
5359 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5360 else if (Subtarget->hasSVE2p1())
5361 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5362 AArch64::LD1H_4Z);
5363 else
5364 break;
5365 return;
5366 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5367 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5368 SelectContiguousMultiVectorLoad(
5369 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5370 else if (Subtarget->hasSVE2p1())
5371 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5372 AArch64::LD1W_4Z);
5373 else
5374 break;
5375 return;
5376 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5377 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5378 SelectContiguousMultiVectorLoad(
5379 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5380 else if (Subtarget->hasSVE2p1())
5381 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5382 AArch64::LD1D_4Z);
5383 else
5384 break;
5385 return;
5386 }
5387 break;
5388 }
5389 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5390 if (VT == MVT::nxv16i8) {
5391 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5392 SelectContiguousMultiVectorLoad(Node, 2, 0,
5393 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5394 AArch64::LDNT1B_2Z_PSEUDO);
5395 else if (Subtarget->hasSVE2p1())
5396 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5397 AArch64::LDNT1B_2Z);
5398 else
5399 break;
5400 return;
5401 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5402 VT == MVT::nxv8bf16) {
5403 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5404 SelectContiguousMultiVectorLoad(Node, 2, 1,
5405 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5406 AArch64::LDNT1H_2Z_PSEUDO);
5407 else if (Subtarget->hasSVE2p1())
5408 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5409 AArch64::LDNT1H_2Z);
5410 else
5411 break;
5412 return;
5413 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5414 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5415 SelectContiguousMultiVectorLoad(Node, 2, 2,
5416 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5417 AArch64::LDNT1W_2Z_PSEUDO);
5418 else if (Subtarget->hasSVE2p1())
5419 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5420 AArch64::LDNT1W_2Z);
5421 else
5422 break;
5423 return;
5424 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5425 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5426 SelectContiguousMultiVectorLoad(Node, 2, 3,
5427 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5428 AArch64::LDNT1D_2Z_PSEUDO);
5429 else if (Subtarget->hasSVE2p1())
5430 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5431 AArch64::LDNT1D_2Z);
5432 else
5433 break;
5434 return;
5435 }
5436 break;
5437 }
5438 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5439 if (VT == MVT::nxv16i8) {
5440 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5441 SelectContiguousMultiVectorLoad(Node, 4, 0,
5442 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5443 AArch64::LDNT1B_4Z_PSEUDO);
5444 else if (Subtarget->hasSVE2p1())
5445 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5446 AArch64::LDNT1B_4Z);
5447 else
5448 break;
5449 return;
5450 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5451 VT == MVT::nxv8bf16) {
5452 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5453 SelectContiguousMultiVectorLoad(Node, 4, 1,
5454 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5455 AArch64::LDNT1H_4Z_PSEUDO);
5456 else if (Subtarget->hasSVE2p1())
5457 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5458 AArch64::LDNT1H_4Z);
5459 else
5460 break;
5461 return;
5462 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5463 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5464 SelectContiguousMultiVectorLoad(Node, 4, 2,
5465 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5466 AArch64::LDNT1W_4Z_PSEUDO);
5467 else if (Subtarget->hasSVE2p1())
5468 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5469 AArch64::LDNT1W_4Z);
5470 else
5471 break;
5472 return;
5473 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5474 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5475 SelectContiguousMultiVectorLoad(Node, 4, 3,
5476 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5477 AArch64::LDNT1D_4Z_PSEUDO);
5478 else if (Subtarget->hasSVE2p1())
5479 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5480 AArch64::LDNT1D_4Z);
5481 else
5482 break;
5483 return;
5484 }
5485 break;
5486 }
5487 case Intrinsic::aarch64_sve_ld3_sret: {
5488 if (VT == MVT::nxv16i8) {
5489 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5490 true);
5491 return;
5492 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5493 VT == MVT::nxv8bf16) {
5494 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5495 true);
5496 return;
5497 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5498 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5499 true);
5500 return;
5501 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5502 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5503 true);
5504 return;
5505 }
5506 break;
5507 }
5508 case Intrinsic::aarch64_sve_ld4_sret: {
5509 if (VT == MVT::nxv16i8) {
5510 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5511 true);
5512 return;
5513 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5514 VT == MVT::nxv8bf16) {
5515 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5516 true);
5517 return;
5518 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5519 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5520 true);
5521 return;
5522 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5523 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5524 true);
5525 return;
5526 }
5527 break;
5528 }
5529 case Intrinsic::aarch64_sme_read_hor_vg2: {
5530 if (VT == MVT::nxv16i8) {
5531 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5532 AArch64::MOVA_2ZMXI_H_B);
5533 return;
5534 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5535 VT == MVT::nxv8bf16) {
5536 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5537 AArch64::MOVA_2ZMXI_H_H);
5538 return;
5539 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5540 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5541 AArch64::MOVA_2ZMXI_H_S);
5542 return;
5543 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5544 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5545 AArch64::MOVA_2ZMXI_H_D);
5546 return;
5547 }
5548 break;
5549 }
5550 case Intrinsic::aarch64_sme_read_ver_vg2: {
5551 if (VT == MVT::nxv16i8) {
5552 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5553 AArch64::MOVA_2ZMXI_V_B);
5554 return;
5555 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5556 VT == MVT::nxv8bf16) {
5557 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5558 AArch64::MOVA_2ZMXI_V_H);
5559 return;
5560 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5561 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5562 AArch64::MOVA_2ZMXI_V_S);
5563 return;
5564 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5565 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5566 AArch64::MOVA_2ZMXI_V_D);
5567 return;
5568 }
5569 break;
5570 }
5571 case Intrinsic::aarch64_sme_read_hor_vg4: {
5572 if (VT == MVT::nxv16i8) {
5573 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5574 AArch64::MOVA_4ZMXI_H_B);
5575 return;
5576 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5577 VT == MVT::nxv8bf16) {
5578 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5579 AArch64::MOVA_4ZMXI_H_H);
5580 return;
5581 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5582 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5583 AArch64::MOVA_4ZMXI_H_S);
5584 return;
5585 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5586 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5587 AArch64::MOVA_4ZMXI_H_D);
5588 return;
5589 }
5590 break;
5591 }
5592 case Intrinsic::aarch64_sme_read_ver_vg4: {
5593 if (VT == MVT::nxv16i8) {
5594 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5595 AArch64::MOVA_4ZMXI_V_B);
5596 return;
5597 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5598 VT == MVT::nxv8bf16) {
5599 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5600 AArch64::MOVA_4ZMXI_V_H);
5601 return;
5602 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5603 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5604 AArch64::MOVA_4ZMXI_V_S);
5605 return;
5606 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5607 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5608 AArch64::MOVA_4ZMXI_V_D);
5609 return;
5610 }
5611 break;
5612 }
5613 case Intrinsic::aarch64_sme_read_vg1x2: {
5614 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5615 AArch64::MOVA_VG2_2ZMXI);
5616 return;
5617 }
5618 case Intrinsic::aarch64_sme_read_vg1x4: {
5619 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5620 AArch64::MOVA_VG4_4ZMXI);
5621 return;
5622 }
5623 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5624 if (VT == MVT::nxv16i8) {
5625 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5626 return;
5627 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5628 VT == MVT::nxv8bf16) {
5629 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5630 return;
5631 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5632 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5633 return;
5634 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5635 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5636 return;
5637 }
5638 break;
5639 }
5640 case Intrinsic::aarch64_sme_readz_vert_x2: {
5641 if (VT == MVT::nxv16i8) {
5642 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5643 return;
5644 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5645 VT == MVT::nxv8bf16) {
5646 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5647 return;
5648 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5649 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5650 return;
5651 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5652 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5653 return;
5654 }
5655 break;
5656 }
5657 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5658 if (VT == MVT::nxv16i8) {
5659 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5660 return;
5661 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5662 VT == MVT::nxv8bf16) {
5663 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5664 return;
5665 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5666 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5667 return;
5668 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5669 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5670 return;
5671 }
5672 break;
5673 }
5674 case Intrinsic::aarch64_sme_readz_vert_x4: {
5675 if (VT == MVT::nxv16i8) {
5676 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5677 return;
5678 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5679 VT == MVT::nxv8bf16) {
5680 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5681 return;
5682 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5683 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5684 return;
5685 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5686 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5687 return;
5688 }
5689 break;
5690 }
5691 case Intrinsic::aarch64_sme_readz_x2: {
5692 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5693 AArch64::ZA);
5694 return;
5695 }
5696 case Intrinsic::aarch64_sme_readz_x4: {
5697 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5698 AArch64::ZA);
5699 return;
5700 }
5701 case Intrinsic::swift_async_context_addr: {
5702 SDLoc DL(Node);
5703 SDValue Chain = Node->getOperand(0);
5704 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5705 SDValue Res = SDValue(
5706 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5707 CurDAG->getTargetConstant(8, DL, MVT::i32),
5708 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5709 0);
5710 ReplaceUses(SDValue(Node, 0), Res);
5711 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5712 CurDAG->RemoveDeadNode(Node);
5713
5714 auto &MF = CurDAG->getMachineFunction();
5715 MF.getFrameInfo().setFrameAddressIsTaken(true);
5716 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5717 return;
5718 }
5719 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5720 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5721 Node->getValueType(0),
5722 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5723 AArch64::LUTI2_4ZTZI_S}))
5724 // Second Immediate must be <= 3:
5725 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5726 return;
5727 }
5728 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5729 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5730 Node->getValueType(0),
5731 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5732 // Second Immediate must be <= 1:
5733 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5734 return;
5735 }
5736 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5737 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5738 Node->getValueType(0),
5739 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5740 AArch64::LUTI2_2ZTZI_S}))
5741 // Second Immediate must be <= 7:
5742 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5743 return;
5744 }
5745 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5746 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5747 Node->getValueType(0),
5748 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5749 AArch64::LUTI4_2ZTZI_S}))
5750 // Second Immediate must be <= 3:
5751 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5752 return;
5753 }
5754 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5755 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5756 return;
5757 }
5758 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5759 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5760 Node->getValueType(0),
5761 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5762 SelectCVTIntrinsicFP8(Node, 2, Opc);
5763 return;
5764 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5765 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5766 Node->getValueType(0),
5767 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5768 SelectCVTIntrinsicFP8(Node, 2, Opc);
5769 return;
5770 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5771 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5772 Node->getValueType(0),
5773 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5774 SelectCVTIntrinsicFP8(Node, 2, Opc);
5775 return;
5776 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5777 if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5778 Node->getValueType(0),
5779 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5780 SelectCVTIntrinsicFP8(Node, 2, Opc);
5781 return;
5782 }
5783 } break;
5785 unsigned IntNo = Node->getConstantOperandVal(0);
5786 switch (IntNo) {
5787 default:
5788 break;
5789 case Intrinsic::aarch64_tagp:
5790 SelectTagP(Node);
5791 return;
5792
5793 case Intrinsic::ptrauth_auth:
5794 SelectPtrauthAuth(Node);
5795 return;
5796
5797 case Intrinsic::ptrauth_resign:
5798 SelectPtrauthResign(Node);
5799 return;
5800
5801 case Intrinsic::aarch64_neon_tbl2:
5802 SelectTable(Node, 2,
5803 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5804 false);
5805 return;
5806 case Intrinsic::aarch64_neon_tbl3:
5807 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5808 : AArch64::TBLv16i8Three,
5809 false);
5810 return;
5811 case Intrinsic::aarch64_neon_tbl4:
5812 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5813 : AArch64::TBLv16i8Four,
5814 false);
5815 return;
5816 case Intrinsic::aarch64_neon_tbx2:
5817 SelectTable(Node, 2,
5818 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5819 true);
5820 return;
5821 case Intrinsic::aarch64_neon_tbx3:
5822 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5823 : AArch64::TBXv16i8Three,
5824 true);
5825 return;
5826 case Intrinsic::aarch64_neon_tbx4:
5827 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5828 : AArch64::TBXv16i8Four,
5829 true);
5830 return;
5831 case Intrinsic::aarch64_sve_srshl_single_x2:
5832 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5833 Node->getValueType(0),
5834 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5835 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5836 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5837 return;
5838 case Intrinsic::aarch64_sve_srshl_single_x4:
5839 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5840 Node->getValueType(0),
5841 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5842 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5843 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5844 return;
5845 case Intrinsic::aarch64_sve_urshl_single_x2:
5846 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5847 Node->getValueType(0),
5848 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5849 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5850 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5851 return;
5852 case Intrinsic::aarch64_sve_urshl_single_x4:
5853 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5854 Node->getValueType(0),
5855 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5856 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5857 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5858 return;
5859 case Intrinsic::aarch64_sve_srshl_x2:
5860 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5861 Node->getValueType(0),
5862 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5863 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5864 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5865 return;
5866 case Intrinsic::aarch64_sve_srshl_x4:
5867 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5868 Node->getValueType(0),
5869 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5870 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5871 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5872 return;
5873 case Intrinsic::aarch64_sve_urshl_x2:
5874 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5875 Node->getValueType(0),
5876 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5877 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5878 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5879 return;
5880 case Intrinsic::aarch64_sve_urshl_x4:
5881 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5882 Node->getValueType(0),
5883 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5884 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5885 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5886 return;
5887 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5888 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5889 Node->getValueType(0),
5890 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5891 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5892 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5893 return;
5894 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5895 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5896 Node->getValueType(0),
5897 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5898 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5899 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5900 return;
5901 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5902 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5903 Node->getValueType(0),
5904 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5905 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5906 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5907 return;
5908 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5909 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5910 Node->getValueType(0),
5911 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5912 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5913 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5914 return;
5915 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5916 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5917 Node->getValueType(0),
5918 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5919 AArch64::FSCALE_2ZZ_D}))
5920 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5921 return;
5922 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5923 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5924 Node->getValueType(0),
5925 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5926 AArch64::FSCALE_4ZZ_D}))
5927 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5928 return;
5929 case Intrinsic::aarch64_sme_fp8_scale_x2:
5930 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5931 Node->getValueType(0),
5932 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5933 AArch64::FSCALE_2Z2Z_D}))
5934 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5935 return;
5936 case Intrinsic::aarch64_sme_fp8_scale_x4:
5937 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5938 Node->getValueType(0),
5939 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5940 AArch64::FSCALE_4Z4Z_D}))
5941 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5942 return;
5943 case Intrinsic::aarch64_sve_whilege_x2:
5944 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5945 Node->getValueType(0),
5946 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5947 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5948 SelectWhilePair(Node, Op);
5949 return;
5950 case Intrinsic::aarch64_sve_whilegt_x2:
5951 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5952 Node->getValueType(0),
5953 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5954 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5955 SelectWhilePair(Node, Op);
5956 return;
5957 case Intrinsic::aarch64_sve_whilehi_x2:
5958 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5959 Node->getValueType(0),
5960 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5961 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5962 SelectWhilePair(Node, Op);
5963 return;
5964 case Intrinsic::aarch64_sve_whilehs_x2:
5965 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5966 Node->getValueType(0),
5967 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5968 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5969 SelectWhilePair(Node, Op);
5970 return;
5971 case Intrinsic::aarch64_sve_whilele_x2:
5972 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5973 Node->getValueType(0),
5974 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5975 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5976 SelectWhilePair(Node, Op);
5977 return;
5978 case Intrinsic::aarch64_sve_whilelo_x2:
5979 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5980 Node->getValueType(0),
5981 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5982 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5983 SelectWhilePair(Node, Op);
5984 return;
5985 case Intrinsic::aarch64_sve_whilels_x2:
5986 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5987 Node->getValueType(0),
5988 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5989 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5990 SelectWhilePair(Node, Op);
5991 return;
5992 case Intrinsic::aarch64_sve_whilelt_x2:
5993 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5994 Node->getValueType(0),
5995 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5996 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5997 SelectWhilePair(Node, Op);
5998 return;
5999 case Intrinsic::aarch64_sve_smax_single_x2:
6000 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6001 Node->getValueType(0),
6002 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6003 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6004 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6005 return;
6006 case Intrinsic::aarch64_sve_umax_single_x2:
6007 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6008 Node->getValueType(0),
6009 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6010 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6011 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6012 return;
6013 case Intrinsic::aarch64_sve_fmax_single_x2:
6014 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6015 Node->getValueType(0),
6016 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6017 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6018 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6019 return;
6020 case Intrinsic::aarch64_sve_smax_single_x4:
6021 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6022 Node->getValueType(0),
6023 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6024 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6025 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6026 return;
6027 case Intrinsic::aarch64_sve_umax_single_x4:
6028 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6029 Node->getValueType(0),
6030 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6031 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6032 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6033 return;
6034 case Intrinsic::aarch64_sve_fmax_single_x4:
6035 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6036 Node->getValueType(0),
6037 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6038 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6039 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6040 return;
6041 case Intrinsic::aarch64_sve_smin_single_x2:
6042 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6043 Node->getValueType(0),
6044 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6045 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6046 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6047 return;
6048 case Intrinsic::aarch64_sve_umin_single_x2:
6049 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6050 Node->getValueType(0),
6051 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6052 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6053 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6054 return;
6055 case Intrinsic::aarch64_sve_fmin_single_x2:
6056 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6057 Node->getValueType(0),
6058 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6059 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6060 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6061 return;
6062 case Intrinsic::aarch64_sve_smin_single_x4:
6063 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6064 Node->getValueType(0),
6065 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6066 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6067 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6068 return;
6069 case Intrinsic::aarch64_sve_umin_single_x4:
6070 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6071 Node->getValueType(0),
6072 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6073 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6074 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6075 return;
6076 case Intrinsic::aarch64_sve_fmin_single_x4:
6077 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6078 Node->getValueType(0),
6079 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6080 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6081 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6082 return;
6083 case Intrinsic::aarch64_sve_smax_x2:
6084 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6085 Node->getValueType(0),
6086 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6087 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6088 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6089 return;
6090 case Intrinsic::aarch64_sve_umax_x2:
6091 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6092 Node->getValueType(0),
6093 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6094 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6095 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6096 return;
6097 case Intrinsic::aarch64_sve_fmax_x2:
6098 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6099 Node->getValueType(0),
6100 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6101 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6102 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6103 return;
6104 case Intrinsic::aarch64_sve_smax_x4:
6105 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6106 Node->getValueType(0),
6107 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6108 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6109 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6110 return;
6111 case Intrinsic::aarch64_sve_umax_x4:
6112 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6113 Node->getValueType(0),
6114 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6115 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6116 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6117 return;
6118 case Intrinsic::aarch64_sve_fmax_x4:
6119 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6120 Node->getValueType(0),
6121 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6122 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6123 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6124 return;
6125 case Intrinsic::aarch64_sme_famax_x2:
6126 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6127 Node->getValueType(0),
6128 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6129 AArch64::FAMAX_2Z2Z_D}))
6130 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6131 return;
6132 case Intrinsic::aarch64_sme_famax_x4:
6133 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6134 Node->getValueType(0),
6135 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6136 AArch64::FAMAX_4Z4Z_D}))
6137 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6138 return;
6139 case Intrinsic::aarch64_sme_famin_x2:
6140 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6141 Node->getValueType(0),
6142 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6143 AArch64::FAMIN_2Z2Z_D}))
6144 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6145 return;
6146 case Intrinsic::aarch64_sme_famin_x4:
6147 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6148 Node->getValueType(0),
6149 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6150 AArch64::FAMIN_4Z4Z_D}))
6151 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6152 return;
6153 case Intrinsic::aarch64_sve_smin_x2:
6154 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6155 Node->getValueType(0),
6156 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6157 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6158 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6159 return;
6160 case Intrinsic::aarch64_sve_umin_x2:
6161 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6162 Node->getValueType(0),
6163 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6164 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6165 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6166 return;
6167 case Intrinsic::aarch64_sve_fmin_x2:
6168 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6169 Node->getValueType(0),
6170 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6171 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6172 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6173 return;
6174 case Intrinsic::aarch64_sve_smin_x4:
6175 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6176 Node->getValueType(0),
6177 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6178 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6179 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6180 return;
6181 case Intrinsic::aarch64_sve_umin_x4:
6182 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6183 Node->getValueType(0),
6184 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6185 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6186 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6187 return;
6188 case Intrinsic::aarch64_sve_fmin_x4:
6189 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6190 Node->getValueType(0),
6191 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6192 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6193 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6194 return;
6195 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6196 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6197 Node->getValueType(0),
6198 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6199 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6200 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6201 return;
6202 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6203 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6204 Node->getValueType(0),
6205 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6206 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6207 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6208 return;
6209 case Intrinsic::aarch64_sve_fminnm_single_x2:
6210 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6211 Node->getValueType(0),
6212 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6213 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6214 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6215 return;
6216 case Intrinsic::aarch64_sve_fminnm_single_x4:
6217 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6218 Node->getValueType(0),
6219 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6220 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6221 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6222 return;
6223 case Intrinsic::aarch64_sve_fmaxnm_x2:
6224 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6225 Node->getValueType(0),
6226 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6227 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6228 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6229 return;
6230 case Intrinsic::aarch64_sve_fmaxnm_x4:
6231 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6232 Node->getValueType(0),
6233 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6234 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6235 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6236 return;
6237 case Intrinsic::aarch64_sve_fminnm_x2:
6238 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6239 Node->getValueType(0),
6240 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6241 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6242 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6243 return;
6244 case Intrinsic::aarch64_sve_fminnm_x4:
6245 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6246 Node->getValueType(0),
6247 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6248 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6249 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6250 return;
6251 case Intrinsic::aarch64_sve_fcvtzs_x2:
6252 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6253 return;
6254 case Intrinsic::aarch64_sve_scvtf_x2:
6255 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6256 return;
6257 case Intrinsic::aarch64_sve_fcvtzu_x2:
6258 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6259 return;
6260 case Intrinsic::aarch64_sve_ucvtf_x2:
6261 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6262 return;
6263 case Intrinsic::aarch64_sve_fcvtzs_x4:
6264 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6265 return;
6266 case Intrinsic::aarch64_sve_scvtf_x4:
6267 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6268 return;
6269 case Intrinsic::aarch64_sve_fcvtzu_x4:
6270 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6271 return;
6272 case Intrinsic::aarch64_sve_ucvtf_x4:
6273 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6274 return;
6275 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6276 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6277 return;
6278 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6279 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6280 return;
6281 case Intrinsic::aarch64_sve_sclamp_single_x2:
6282 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6283 Node->getValueType(0),
6284 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6285 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6286 SelectClamp(Node, 2, Op);
6287 return;
6288 case Intrinsic::aarch64_sve_uclamp_single_x2:
6289 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6290 Node->getValueType(0),
6291 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6292 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6293 SelectClamp(Node, 2, Op);
6294 return;
6295 case Intrinsic::aarch64_sve_fclamp_single_x2:
6296 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6297 Node->getValueType(0),
6298 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6299 AArch64::FCLAMP_VG2_2Z2Z_D}))
6300 SelectClamp(Node, 2, Op);
6301 return;
6302 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6303 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6304 return;
6305 case Intrinsic::aarch64_sve_sclamp_single_x4:
6306 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6307 Node->getValueType(0),
6308 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6309 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6310 SelectClamp(Node, 4, Op);
6311 return;
6312 case Intrinsic::aarch64_sve_uclamp_single_x4:
6313 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6314 Node->getValueType(0),
6315 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6316 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6317 SelectClamp(Node, 4, Op);
6318 return;
6319 case Intrinsic::aarch64_sve_fclamp_single_x4:
6320 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6321 Node->getValueType(0),
6322 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6323 AArch64::FCLAMP_VG4_4Z4Z_D}))
6324 SelectClamp(Node, 4, Op);
6325 return;
6326 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6327 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6328 return;
6329 case Intrinsic::aarch64_sve_add_single_x2:
6330 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6331 Node->getValueType(0),
6332 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6333 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6334 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6335 return;
6336 case Intrinsic::aarch64_sve_add_single_x4:
6337 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6338 Node->getValueType(0),
6339 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6340 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6341 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6342 return;
6343 case Intrinsic::aarch64_sve_zip_x2:
6344 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6345 Node->getValueType(0),
6346 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6347 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6348 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6349 return;
6350 case Intrinsic::aarch64_sve_zipq_x2:
6351 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6352 AArch64::ZIP_VG2_2ZZZ_Q);
6353 return;
6354 case Intrinsic::aarch64_sve_zip_x4:
6355 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6356 Node->getValueType(0),
6357 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6358 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6359 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6360 return;
6361 case Intrinsic::aarch64_sve_zipq_x4:
6362 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6363 AArch64::ZIP_VG4_4Z4Z_Q);
6364 return;
6365 case Intrinsic::aarch64_sve_uzp_x2:
6366 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6367 Node->getValueType(0),
6368 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6369 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6370 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6371 return;
6372 case Intrinsic::aarch64_sve_uzpq_x2:
6373 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6374 AArch64::UZP_VG2_2ZZZ_Q);
6375 return;
6376 case Intrinsic::aarch64_sve_uzp_x4:
6377 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6378 Node->getValueType(0),
6379 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6380 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6381 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6382 return;
6383 case Intrinsic::aarch64_sve_uzpq_x4:
6384 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6385 AArch64::UZP_VG4_4Z4Z_Q);
6386 return;
6387 case Intrinsic::aarch64_sve_sel_x2:
6388 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6389 Node->getValueType(0),
6390 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6391 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6392 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6393 return;
6394 case Intrinsic::aarch64_sve_sel_x4:
6395 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6396 Node->getValueType(0),
6397 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6398 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6399 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6400 return;
6401 case Intrinsic::aarch64_sve_frinta_x2:
6402 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6403 return;
6404 case Intrinsic::aarch64_sve_frinta_x4:
6405 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6406 return;
6407 case Intrinsic::aarch64_sve_frintm_x2:
6408 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6409 return;
6410 case Intrinsic::aarch64_sve_frintm_x4:
6411 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6412 return;
6413 case Intrinsic::aarch64_sve_frintn_x2:
6414 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6415 return;
6416 case Intrinsic::aarch64_sve_frintn_x4:
6417 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6418 return;
6419 case Intrinsic::aarch64_sve_frintp_x2:
6420 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6421 return;
6422 case Intrinsic::aarch64_sve_frintp_x4:
6423 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6424 return;
6425 case Intrinsic::aarch64_sve_sunpk_x2:
6426 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6427 Node->getValueType(0),
6428 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6429 AArch64::SUNPK_VG2_2ZZ_D}))
6430 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6431 return;
6432 case Intrinsic::aarch64_sve_uunpk_x2:
6433 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6434 Node->getValueType(0),
6435 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6436 AArch64::UUNPK_VG2_2ZZ_D}))
6437 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6438 return;
6439 case Intrinsic::aarch64_sve_sunpk_x4:
6440 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6441 Node->getValueType(0),
6442 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6443 AArch64::SUNPK_VG4_4Z2Z_D}))
6444 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6445 return;
6446 case Intrinsic::aarch64_sve_uunpk_x4:
6447 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6448 Node->getValueType(0),
6449 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6450 AArch64::UUNPK_VG4_4Z2Z_D}))
6451 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6452 return;
6453 case Intrinsic::aarch64_sve_pext_x2: {
6454 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6455 Node->getValueType(0),
6456 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6457 AArch64::PEXT_2PCI_D}))
6458 SelectPExtPair(Node, Op);
6459 return;
6460 }
6461 }
6462 break;
6463 }
6464 case ISD::INTRINSIC_VOID: {
6465 unsigned IntNo = Node->getConstantOperandVal(1);
6466 if (Node->getNumOperands() >= 3)
6467 VT = Node->getOperand(2)->getValueType(0);
6468 switch (IntNo) {
6469 default:
6470 break;
6471 case Intrinsic::aarch64_neon_st1x2: {
6472 if (VT == MVT::v8i8) {
6473 SelectStore(Node, 2, AArch64::ST1Twov8b);
6474 return;
6475 } else if (VT == MVT::v16i8) {
6476 SelectStore(Node, 2, AArch64::ST1Twov16b);
6477 return;
6478 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6479 VT == MVT::v4bf16) {
6480 SelectStore(Node, 2, AArch64::ST1Twov4h);
6481 return;
6482 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6483 VT == MVT::v8bf16) {
6484 SelectStore(Node, 2, AArch64::ST1Twov8h);
6485 return;
6486 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6487 SelectStore(Node, 2, AArch64::ST1Twov2s);
6488 return;
6489 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6490 SelectStore(Node, 2, AArch64::ST1Twov4s);
6491 return;
6492 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6493 SelectStore(Node, 2, AArch64::ST1Twov2d);
6494 return;
6495 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6496 SelectStore(Node, 2, AArch64::ST1Twov1d);
6497 return;
6498 }
6499 break;
6500 }
6501 case Intrinsic::aarch64_neon_st1x3: {
6502 if (VT == MVT::v8i8) {
6503 SelectStore(Node, 3, AArch64::ST1Threev8b);
6504 return;
6505 } else if (VT == MVT::v16i8) {
6506 SelectStore(Node, 3, AArch64::ST1Threev16b);
6507 return;
6508 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6509 VT == MVT::v4bf16) {
6510 SelectStore(Node, 3, AArch64::ST1Threev4h);
6511 return;
6512 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6513 VT == MVT::v8bf16) {
6514 SelectStore(Node, 3, AArch64::ST1Threev8h);
6515 return;
6516 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6517 SelectStore(Node, 3, AArch64::ST1Threev2s);
6518 return;
6519 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6520 SelectStore(Node, 3, AArch64::ST1Threev4s);
6521 return;
6522 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6523 SelectStore(Node, 3, AArch64::ST1Threev2d);
6524 return;
6525 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6526 SelectStore(Node, 3, AArch64::ST1Threev1d);
6527 return;
6528 }
6529 break;
6530 }
6531 case Intrinsic::aarch64_neon_st1x4: {
6532 if (VT == MVT::v8i8) {
6533 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6534 return;
6535 } else if (VT == MVT::v16i8) {
6536 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6537 return;
6538 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6539 VT == MVT::v4bf16) {
6540 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6541 return;
6542 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6543 VT == MVT::v8bf16) {
6544 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6545 return;
6546 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6547 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6548 return;
6549 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6550 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6551 return;
6552 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6553 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6554 return;
6555 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6556 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6557 return;
6558 }
6559 break;
6560 }
6561 case Intrinsic::aarch64_neon_st2: {
6562 if (VT == MVT::v8i8) {
6563 SelectStore(Node, 2, AArch64::ST2Twov8b);
6564 return;
6565 } else if (VT == MVT::v16i8) {
6566 SelectStore(Node, 2, AArch64::ST2Twov16b);
6567 return;
6568 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6569 VT == MVT::v4bf16) {
6570 SelectStore(Node, 2, AArch64::ST2Twov4h);
6571 return;
6572 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6573 VT == MVT::v8bf16) {
6574 SelectStore(Node, 2, AArch64::ST2Twov8h);
6575 return;
6576 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6577 SelectStore(Node, 2, AArch64::ST2Twov2s);
6578 return;
6579 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6580 SelectStore(Node, 2, AArch64::ST2Twov4s);
6581 return;
6582 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6583 SelectStore(Node, 2, AArch64::ST2Twov2d);
6584 return;
6585 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6586 SelectStore(Node, 2, AArch64::ST1Twov1d);
6587 return;
6588 }
6589 break;
6590 }
6591 case Intrinsic::aarch64_neon_st3: {
6592 if (VT == MVT::v8i8) {
6593 SelectStore(Node, 3, AArch64::ST3Threev8b);
6594 return;
6595 } else if (VT == MVT::v16i8) {
6596 SelectStore(Node, 3, AArch64::ST3Threev16b);
6597 return;
6598 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6599 VT == MVT::v4bf16) {
6600 SelectStore(Node, 3, AArch64::ST3Threev4h);
6601 return;
6602 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6603 VT == MVT::v8bf16) {
6604 SelectStore(Node, 3, AArch64::ST3Threev8h);
6605 return;
6606 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6607 SelectStore(Node, 3, AArch64::ST3Threev2s);
6608 return;
6609 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6610 SelectStore(Node, 3, AArch64::ST3Threev4s);
6611 return;
6612 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6613 SelectStore(Node, 3, AArch64::ST3Threev2d);
6614 return;
6615 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6616 SelectStore(Node, 3, AArch64::ST1Threev1d);
6617 return;
6618 }
6619 break;
6620 }
6621 case Intrinsic::aarch64_neon_st4: {
6622 if (VT == MVT::v8i8) {
6623 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6624 return;
6625 } else if (VT == MVT::v16i8) {
6626 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6627 return;
6628 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6629 VT == MVT::v4bf16) {
6630 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6631 return;
6632 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6633 VT == MVT::v8bf16) {
6634 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6635 return;
6636 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6637 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6638 return;
6639 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6640 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6641 return;
6642 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6643 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6644 return;
6645 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6646 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6647 return;
6648 }
6649 break;
6650 }
6651 case Intrinsic::aarch64_neon_st2lane: {
6652 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6653 SelectStoreLane(Node, 2, AArch64::ST2i8);
6654 return;
6655 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6656 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6657 SelectStoreLane(Node, 2, AArch64::ST2i16);
6658 return;
6659 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6660 VT == MVT::v2f32) {
6661 SelectStoreLane(Node, 2, AArch64::ST2i32);
6662 return;
6663 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6664 VT == MVT::v1f64) {
6665 SelectStoreLane(Node, 2, AArch64::ST2i64);
6666 return;
6667 }
6668 break;
6669 }
6670 case Intrinsic::aarch64_neon_st3lane: {
6671 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6672 SelectStoreLane(Node, 3, AArch64::ST3i8);
6673 return;
6674 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6675 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6676 SelectStoreLane(Node, 3, AArch64::ST3i16);
6677 return;
6678 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6679 VT == MVT::v2f32) {
6680 SelectStoreLane(Node, 3, AArch64::ST3i32);
6681 return;
6682 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6683 VT == MVT::v1f64) {
6684 SelectStoreLane(Node, 3, AArch64::ST3i64);
6685 return;
6686 }
6687 break;
6688 }
6689 case Intrinsic::aarch64_neon_st4lane: {
6690 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6691 SelectStoreLane(Node, 4, AArch64::ST4i8);
6692 return;
6693 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6694 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6695 SelectStoreLane(Node, 4, AArch64::ST4i16);
6696 return;
6697 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6698 VT == MVT::v2f32) {
6699 SelectStoreLane(Node, 4, AArch64::ST4i32);
6700 return;
6701 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6702 VT == MVT::v1f64) {
6703 SelectStoreLane(Node, 4, AArch64::ST4i64);
6704 return;
6705 }
6706 break;
6707 }
6708 case Intrinsic::aarch64_sve_st2q: {
6709 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6710 return;
6711 }
6712 case Intrinsic::aarch64_sve_st3q: {
6713 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6714 return;
6715 }
6716 case Intrinsic::aarch64_sve_st4q: {
6717 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6718 return;
6719 }
6720 case Intrinsic::aarch64_sve_st2: {
6721 if (VT == MVT::nxv16i8) {
6722 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6723 return;
6724 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6725 VT == MVT::nxv8bf16) {
6726 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6727 return;
6728 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6729 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6730 return;
6731 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6732 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6733 return;
6734 }
6735 break;
6736 }
6737 case Intrinsic::aarch64_sve_st3: {
6738 if (VT == MVT::nxv16i8) {
6739 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6740 return;
6741 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6742 VT == MVT::nxv8bf16) {
6743 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6744 return;
6745 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6746 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6747 return;
6748 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6749 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6750 return;
6751 }
6752 break;
6753 }
6754 case Intrinsic::aarch64_sve_st4: {
6755 if (VT == MVT::nxv16i8) {
6756 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6757 return;
6758 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6759 VT == MVT::nxv8bf16) {
6760 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6761 return;
6762 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6763 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6764 return;
6765 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6766 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6767 return;
6768 }
6769 break;
6770 }
6771 }
6772 break;
6773 }
6774 case AArch64ISD::LD2post: {
6775 if (VT == MVT::v8i8) {
6776 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6777 return;
6778 } else if (VT == MVT::v16i8) {
6779 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6780 return;
6781 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6782 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6783 return;
6784 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6785 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6786 return;
6787 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6788 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6789 return;
6790 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6791 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6792 return;
6793 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6794 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6795 return;
6796 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6797 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6798 return;
6799 }
6800 break;
6801 }
6802 case AArch64ISD::LD3post: {
6803 if (VT == MVT::v8i8) {
6804 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6805 return;
6806 } else if (VT == MVT::v16i8) {
6807 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6808 return;
6809 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6810 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6811 return;
6812 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6813 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6814 return;
6815 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6816 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6817 return;
6818 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6819 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6820 return;
6821 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6822 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6823 return;
6824 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6825 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6826 return;
6827 }
6828 break;
6829 }
6830 case AArch64ISD::LD4post: {
6831 if (VT == MVT::v8i8) {
6832 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6833 return;
6834 } else if (VT == MVT::v16i8) {
6835 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6836 return;
6837 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6838 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6839 return;
6840 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6841 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6842 return;
6843 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6844 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6845 return;
6846 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6847 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6848 return;
6849 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6850 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6851 return;
6852 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6853 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6854 return;
6855 }
6856 break;
6857 }
6858 case AArch64ISD::LD1x2post: {
6859 if (VT == MVT::v8i8) {
6860 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6861 return;
6862 } else if (VT == MVT::v16i8) {
6863 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6864 return;
6865 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6866 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6867 return;
6868 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6869 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6870 return;
6871 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6872 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6873 return;
6874 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6875 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6876 return;
6877 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6878 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6879 return;
6880 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6881 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6882 return;
6883 }
6884 break;
6885 }
6886 case AArch64ISD::LD1x3post: {
6887 if (VT == MVT::v8i8) {
6888 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6889 return;
6890 } else if (VT == MVT::v16i8) {
6891 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6892 return;
6893 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6894 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6895 return;
6896 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6897 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6898 return;
6899 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6900 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6901 return;
6902 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6903 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6904 return;
6905 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6906 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6907 return;
6908 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6909 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6910 return;
6911 }
6912 break;
6913 }
6914 case AArch64ISD::LD1x4post: {
6915 if (VT == MVT::v8i8) {
6916 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6917 return;
6918 } else if (VT == MVT::v16i8) {
6919 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6920 return;
6921 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6922 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6923 return;
6924 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6925 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6926 return;
6927 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6928 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6929 return;
6930 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6931 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6932 return;
6933 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6934 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6935 return;
6936 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6937 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6938 return;
6939 }
6940 break;
6941 }
6942 case AArch64ISD::LD1DUPpost: {
6943 if (VT == MVT::v8i8) {
6944 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6945 return;
6946 } else if (VT == MVT::v16i8) {
6947 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6948 return;
6949 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6950 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6951 return;
6952 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6953 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6954 return;
6955 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6956 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6957 return;
6958 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6959 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6960 return;
6961 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6962 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6963 return;
6964 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6965 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6966 return;
6967 }
6968 break;
6969 }
6970 case AArch64ISD::LD2DUPpost: {
6971 if (VT == MVT::v8i8) {
6972 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6973 return;
6974 } else if (VT == MVT::v16i8) {
6975 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6976 return;
6977 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6978 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6979 return;
6980 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6981 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6982 return;
6983 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6984 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6985 return;
6986 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6987 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6988 return;
6989 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6990 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6991 return;
6992 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6993 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6994 return;
6995 }
6996 break;
6997 }
6998 case AArch64ISD::LD3DUPpost: {
6999 if (VT == MVT::v8i8) {
7000 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7001 return;
7002 } else if (VT == MVT::v16i8) {
7003 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7004 return;
7005 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7006 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7007 return;
7008 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7009 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7010 return;
7011 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7012 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7013 return;
7014 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7015 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7016 return;
7017 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7018 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7019 return;
7020 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7021 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7022 return;
7023 }
7024 break;
7025 }
7026 case AArch64ISD::LD4DUPpost: {
7027 if (VT == MVT::v8i8) {
7028 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7029 return;
7030 } else if (VT == MVT::v16i8) {
7031 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7032 return;
7033 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7034 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7035 return;
7036 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7037 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7038 return;
7039 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7040 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7041 return;
7042 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7043 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7044 return;
7045 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7046 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7047 return;
7048 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7049 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7050 return;
7051 }
7052 break;
7053 }
7054 case AArch64ISD::LD1LANEpost: {
7055 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7056 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7057 return;
7058 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7059 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7060 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7061 return;
7062 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7063 VT == MVT::v2f32) {
7064 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7065 return;
7066 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7067 VT == MVT::v1f64) {
7068 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7069 return;
7070 }
7071 break;
7072 }
7073 case AArch64ISD::LD2LANEpost: {
7074 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7075 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7076 return;
7077 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7078 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7079 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7080 return;
7081 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7082 VT == MVT::v2f32) {
7083 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7084 return;
7085 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7086 VT == MVT::v1f64) {
7087 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7088 return;
7089 }
7090 break;
7091 }
7092 case AArch64ISD::LD3LANEpost: {
7093 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7094 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7095 return;
7096 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7097 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7098 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7099 return;
7100 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7101 VT == MVT::v2f32) {
7102 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7103 return;
7104 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7105 VT == MVT::v1f64) {
7106 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7107 return;
7108 }
7109 break;
7110 }
7111 case AArch64ISD::LD4LANEpost: {
7112 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7113 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7114 return;
7115 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7116 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7117 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7118 return;
7119 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7120 VT == MVT::v2f32) {
7121 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7122 return;
7123 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7124 VT == MVT::v1f64) {
7125 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7126 return;
7127 }
7128 break;
7129 }
7130 case AArch64ISD::ST2post: {
7131 VT = Node->getOperand(1).getValueType();
7132 if (VT == MVT::v8i8) {
7133 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7134 return;
7135 } else if (VT == MVT::v16i8) {
7136 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7137 return;
7138 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7139 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7140 return;
7141 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7142 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7143 return;
7144 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7145 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7146 return;
7147 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7148 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7149 return;
7150 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7151 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7152 return;
7153 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7154 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7155 return;
7156 }
7157 break;
7158 }
7159 case AArch64ISD::ST3post: {
7160 VT = Node->getOperand(1).getValueType();
7161 if (VT == MVT::v8i8) {
7162 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7163 return;
7164 } else if (VT == MVT::v16i8) {
7165 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7166 return;
7167 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7168 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7169 return;
7170 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7171 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7172 return;
7173 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7174 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7175 return;
7176 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7177 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7178 return;
7179 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7180 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7181 return;
7182 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7183 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7184 return;
7185 }
7186 break;
7187 }
7188 case AArch64ISD::ST4post: {
7189 VT = Node->getOperand(1).getValueType();
7190 if (VT == MVT::v8i8) {
7191 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7192 return;
7193 } else if (VT == MVT::v16i8) {
7194 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7195 return;
7196 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7197 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7198 return;
7199 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7200 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7201 return;
7202 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7203 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7204 return;
7205 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7206 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7207 return;
7208 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7209 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7210 return;
7211 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7212 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7213 return;
7214 }
7215 break;
7216 }
7217 case AArch64ISD::ST1x2post: {
7218 VT = Node->getOperand(1).getValueType();
7219 if (VT == MVT::v8i8) {
7220 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7221 return;
7222 } else if (VT == MVT::v16i8) {
7223 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7224 return;
7225 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7226 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7227 return;
7228 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7229 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7230 return;
7231 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7232 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7233 return;
7234 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7235 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7236 return;
7237 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7238 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7239 return;
7240 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7241 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7242 return;
7243 }
7244 break;
7245 }
7246 case AArch64ISD::ST1x3post: {
7247 VT = Node->getOperand(1).getValueType();
7248 if (VT == MVT::v8i8) {
7249 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7250 return;
7251 } else if (VT == MVT::v16i8) {
7252 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7253 return;
7254 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7255 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7256 return;
7257 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7258 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7259 return;
7260 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7261 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7262 return;
7263 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7264 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7265 return;
7266 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7267 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7268 return;
7269 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7270 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7271 return;
7272 }
7273 break;
7274 }
7275 case AArch64ISD::ST1x4post: {
7276 VT = Node->getOperand(1).getValueType();
7277 if (VT == MVT::v8i8) {
7278 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7279 return;
7280 } else if (VT == MVT::v16i8) {
7281 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7282 return;
7283 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7284 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7285 return;
7286 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7287 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7288 return;
7289 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7290 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7291 return;
7292 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7293 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7294 return;
7295 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7296 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7297 return;
7298 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7299 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7300 return;
7301 }
7302 break;
7303 }
7304 case AArch64ISD::ST2LANEpost: {
7305 VT = Node->getOperand(1).getValueType();
7306 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7307 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7308 return;
7309 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7310 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7311 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7312 return;
7313 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7314 VT == MVT::v2f32) {
7315 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7316 return;
7317 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7318 VT == MVT::v1f64) {
7319 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7320 return;
7321 }
7322 break;
7323 }
7324 case AArch64ISD::ST3LANEpost: {
7325 VT = Node->getOperand(1).getValueType();
7326 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7327 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7328 return;
7329 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7330 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7331 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7332 return;
7333 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7334 VT == MVT::v2f32) {
7335 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7336 return;
7337 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7338 VT == MVT::v1f64) {
7339 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7340 return;
7341 }
7342 break;
7343 }
7344 case AArch64ISD::ST4LANEpost: {
7345 VT = Node->getOperand(1).getValueType();
7346 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7347 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7348 return;
7349 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7350 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7351 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7352 return;
7353 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7354 VT == MVT::v2f32) {
7355 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7356 return;
7357 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7358 VT == MVT::v1f64) {
7359 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7360 return;
7361 }
7362 break;
7363 }
7364 }
7365
7366 // Select the default instruction
7367 SelectCode(Node);
7368}
7369
7370/// createAArch64ISelDag - This pass converts a legalized DAG into a
7371/// AArch64-specific DAG, ready for instruction scheduling.
7373 CodeGenOptLevel OptLevel) {
7374 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7375}
7376
7377/// When \p PredVT is a scalable vector predicate in the form
7378/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7379/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7380/// structured vectors (NumVec >1), the output data type is
7381/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7382/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7383/// EVT.
7385 unsigned NumVec) {
7386 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7387 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7388 return EVT();
7389
7390 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7391 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7392 return EVT();
7393
7394 ElementCount EC = PredVT.getVectorElementCount();
7395 EVT ScalarVT =
7396 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7397 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7398
7399 return MemVT;
7400}
7401
7402/// Return the EVT of the data associated to a memory operation in \p
7403/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7405 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7406 return MemIntr->getMemoryVT();
7407
7408 if (isa<MemSDNode>(Root)) {
7409 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7410
7411 EVT DataVT;
7412 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7413 DataVT = Load->getValueType(0);
7414 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7415 DataVT = Load->getValueType(0);
7416 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7417 DataVT = Store->getValue().getValueType();
7418 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7419 DataVT = Store->getValue().getValueType();
7420 else
7421 llvm_unreachable("Unexpected MemSDNode!");
7422
7423 return DataVT.changeVectorElementType(MemVT.getVectorElementType());
7424 }
7425
7426 const unsigned Opcode = Root->getOpcode();
7427 // For custom ISD nodes, we have to look at them individually to extract the
7428 // type of the data moved to/from memory.
7429 switch (Opcode) {
7430 case AArch64ISD::LD1_MERGE_ZERO:
7431 case AArch64ISD::LD1S_MERGE_ZERO:
7432 case AArch64ISD::LDNF1_MERGE_ZERO:
7433 case AArch64ISD::LDNF1S_MERGE_ZERO:
7434 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7435 case AArch64ISD::ST1_PRED:
7436 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7437 default:
7438 break;
7439 }
7440
7441 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7442 return EVT();
7443
7444 switch (Root->getConstantOperandVal(1)) {
7445 default:
7446 return EVT();
7447 case Intrinsic::aarch64_sme_ldr:
7448 case Intrinsic::aarch64_sme_str:
7449 return MVT::nxv16i8;
7450 case Intrinsic::aarch64_sve_prf:
7451 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7452 // width of the predicate.
7454 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7455 case Intrinsic::aarch64_sve_ld2_sret:
7456 case Intrinsic::aarch64_sve_ld2q_sret:
7458 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7459 case Intrinsic::aarch64_sve_st2q:
7461 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7462 case Intrinsic::aarch64_sve_ld3_sret:
7463 case Intrinsic::aarch64_sve_ld3q_sret:
7465 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7466 case Intrinsic::aarch64_sve_st3q:
7468 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7469 case Intrinsic::aarch64_sve_ld4_sret:
7470 case Intrinsic::aarch64_sve_ld4q_sret:
7472 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7473 case Intrinsic::aarch64_sve_st4q:
7475 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7476 case Intrinsic::aarch64_sve_ld1udq:
7477 case Intrinsic::aarch64_sve_st1dq:
7478 return EVT(MVT::nxv1i64);
7479 case Intrinsic::aarch64_sve_ld1uwq:
7480 case Intrinsic::aarch64_sve_st1wq:
7481 return EVT(MVT::nxv1i32);
7482 }
7483}
7484
7485/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7486/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7487/// where Root is the memory access using N for its address.
7488template <int64_t Min, int64_t Max>
7489bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7490 SDValue &Base,
7491 SDValue &OffImm) {
7492 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7493 const DataLayout &DL = CurDAG->getDataLayout();
7494 const MachineFrameInfo &MFI = MF->getFrameInfo();
7495
7496 if (N.getOpcode() == ISD::FrameIndex) {
7497 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7498 // We can only encode VL scaled offsets, so only fold in frame indexes
7499 // referencing SVE objects.
7501 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7502 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7503 return true;
7504 }
7505
7506 return false;
7507 }
7508
7509 if (MemVT == EVT())
7510 return false;
7511
7512 if (N.getOpcode() != ISD::ADD)
7513 return false;
7514
7515 SDValue VScale = N.getOperand(1);
7516 int64_t MulImm = std::numeric_limits<int64_t>::max();
7517 if (VScale.getOpcode() == ISD::VSCALE) {
7518 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7519 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7520 int64_t ByteOffset = C->getSExtValue();
7521 const auto KnownVScale =
7522 Subtarget->getSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
7523
7524 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7525 return false;
7526
7527 MulImm = ByteOffset / KnownVScale;
7528 } else
7529 return false;
7530
7531 TypeSize TS = MemVT.getSizeInBits();
7532 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7533
7534 if ((MulImm % MemWidthBytes) != 0)
7535 return false;
7536
7537 int64_t Offset = MulImm / MemWidthBytes;
7538 if (Offset < Min || Offset > Max)
7539 return false;
7540
7541 Base = N.getOperand(0);
7542 if (Base.getOpcode() == ISD::FrameIndex) {
7543 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7544 // We can only encode VL scaled offsets, so only fold in frame indexes
7545 // referencing SVE objects.
7547 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7548 }
7549
7550 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7551 return true;
7552}
7553
7554/// Select register plus register addressing mode for SVE, with scaled
7555/// offset.
7556bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7557 SDValue &Base,
7558 SDValue &Offset) {
7559 if (N.getOpcode() != ISD::ADD)
7560 return false;
7561
7562 // Process an ADD node.
7563 const SDValue LHS = N.getOperand(0);
7564 const SDValue RHS = N.getOperand(1);
7565
7566 // 8 bit data does not come with the SHL node, so it is treated
7567 // separately.
7568 if (Scale == 0) {
7569 Base = LHS;
7570 Offset = RHS;
7571 return true;
7572 }
7573
7574 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7575 int64_t ImmOff = C->getSExtValue();
7576 unsigned Size = 1 << Scale;
7577
7578 // To use the reg+reg addressing mode, the immediate must be a multiple of
7579 // the vector element's byte size.
7580 if (ImmOff % Size)
7581 return false;
7582
7583 SDLoc DL(N);
7584 Base = LHS;
7585 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7586 SDValue Ops[] = {Offset};
7587 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7588 Offset = SDValue(MI, 0);
7589 return true;
7590 }
7591
7592 // Check if the RHS is a shift node with a constant.
7593 if (RHS.getOpcode() != ISD::SHL)
7594 return false;
7595
7596 const SDValue ShiftRHS = RHS.getOperand(1);
7597 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7598 if (C->getZExtValue() == Scale) {
7599 Base = LHS;
7600 Offset = RHS.getOperand(0);
7601 return true;
7602 }
7603
7604 return false;
7605}
7606
7607bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7608 const AArch64TargetLowering *TLI =
7609 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7610
7611 return TLI->isAllActivePredicate(*CurDAG, N);
7612}
7613
7614bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7615 EVT VT = N.getValueType();
7616 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7617}
7618
7619bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7621 unsigned Scale) {
7622 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7623 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7624 int64_t ImmOff = C->getSExtValue();
7625 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7626 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7627 }
7628 return SDValue();
7629 };
7630
7631 if (SDValue C = MatchConstantOffset(N)) {
7632 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7633 Offset = C;
7634 return true;
7635 }
7636
7637 // Try to untangle an ADD node into a 'reg + offset'
7638 if (CurDAG->isBaseWithConstantOffset(N)) {
7639 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7640 Base = N.getOperand(0);
7641 Offset = C;
7642 return true;
7643 }
7644 }
7645
7646 // By default, just match reg + 0.
7647 Base = N;
7648 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7649 return true;
7650}
7651
7652bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7653 SDValue &Imm) {
7655 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7656 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7657 // Check conservatively if the immediate fits the valid range [0, 64).
7658 // Immediate variants for GE and HS definitely need to be decremented
7659 // when lowering the pseudos later, so an immediate of 1 would become 0.
7660 // For the inverse conditions LT and LO we don't know for sure if they
7661 // will need a decrement but should the decision be made to reverse the
7662 // branch condition, we again end up with the need to decrement.
7663 // The same argument holds for LE, LS, GT and HI and possibly
7664 // incremented immediates. This can lead to slightly less optimal
7665 // codegen, e.g. we never codegen the legal case
7666 // cblt w0, #63, A
7667 // because we could end up with the illegal case
7668 // cbge w0, #64, B
7669 // should the decision to reverse the branch direction be made. For the
7670 // lower bound cases this is no problem since we can express comparisons
7671 // against 0 with either tbz/tnbz or using wzr/xzr.
7672 uint64_t LowerBound = 0, UpperBound = 64;
7673 switch (CC) {
7674 case AArch64CC::GE:
7675 case AArch64CC::HS:
7676 case AArch64CC::LT:
7677 case AArch64CC::LO:
7678 LowerBound = 1;
7679 break;
7680 case AArch64CC::LE:
7681 case AArch64CC::LS:
7682 case AArch64CC::GT:
7683 case AArch64CC::HI:
7684 UpperBound = 63;
7685 break;
7686 default:
7687 break;
7688 }
7689
7690 if (CN->getAPIntValue().uge(LowerBound) &&
7691 CN->getAPIntValue().ult(UpperBound)) {
7692 SDLoc DL(N);
7693 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7694 return true;
7695 }
7696 }
7697
7698 return false;
7699}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
#define PASS_NAME
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
#define DEBUG_TYPE
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
uint64_t Size
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
#define I(x, y, z)
Definition: MD5.cpp:58
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
#define LLVM_DEBUG(...)
Definition: Debug.h:119
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:205
support::ulittle16_t & Hi
Definition: aarch32.cpp:204
DEMANGLE_DUMP_METHOD void dump() const
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition: APFloat.cpp:5999
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1332
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1540
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1670
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1598
void flipAllBits()
Toggle every bit to its opposite value.
Definition: APInt.h:1452
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:510
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:147
iterator begin() const
Definition: ArrayRef.h:135
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:229
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:459
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:707
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
size_t size() const
Definition: SmallVector.h:79
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:710
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:956
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:169
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:587
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1351
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1141
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:835
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:215
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:975
@ FrameIndex
Definition: ISDOpcodes.h:90
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:826
@ WRITE_REGISTER
Definition: ISDOpcodes.h:135
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1347
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:663
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:225
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:756
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:134
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:832
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1448
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1358
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:870
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:236
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:838
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:62
@ AssertZext
Definition: ISDOpcodes.h:63
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:208
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1640
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1671
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
Reg
All possible values of the reg field in the ModR/M byte.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:477
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:260
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:276
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:157
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:282
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1987
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:270
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:82
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:223
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
#define N
Extended Value Type.
Definition: ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:458
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:354
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isFixedLengthVector() const
Definition: ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:44
Matching combinators.