LLVM 22.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
67 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 template <signed Low, signed High>
75 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
76
77 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
78 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
80 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, false, Reg, Shift);
83 }
84 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
85 return SelectShiftedRegister(N, true, Reg, Shift);
86 }
87 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
98 }
99 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 1, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 2, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 4, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 8, Base, OffImm);
119 }
120 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeIndexed(N, 16, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
134 }
135 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
136 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
137 }
138 template <unsigned Size, unsigned Max>
139 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
140 // Test if there is an appropriate addressing mode and check if the
141 // immediate fits.
142 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
143 if (Found) {
144 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
145 int64_t C = CI->getSExtValue();
146 if (C <= Max)
147 return true;
148 }
149 }
150
151 // Otherwise, base only, materialize address in register.
152 Base = N;
153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
154 return true;
155 }
156
157 template<int Width>
158 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
159 SDValue &SignExtend, SDValue &DoShift) {
160 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
161 }
162
163 template<int Width>
164 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
165 SDValue &SignExtend, SDValue &DoShift) {
166 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
167 }
168
169 bool SelectExtractHigh(SDValue N, SDValue &Res) {
170 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
171 N = N->getOperand(0);
172 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
173 !isa<ConstantSDNode>(N->getOperand(1)))
174 return false;
175 EVT VT = N->getValueType(0);
176 EVT LVT = N->getOperand(0).getValueType();
177 unsigned Index = N->getConstantOperandVal(1);
178 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
179 Index != VT.getVectorNumElements())
180 return false;
181 Res = N->getOperand(0);
182 return true;
183 }
184
185 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
186 if (N.getOpcode() != AArch64ISD::VLSHR)
187 return false;
188 SDValue Op = N->getOperand(0);
189 EVT VT = Op.getValueType();
190 unsigned ShtAmt = N->getConstantOperandVal(1);
191 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
192 return false;
193
194 APInt Imm;
195 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
196 Imm = APInt(VT.getScalarSizeInBits(),
197 Op.getOperand(1).getConstantOperandVal(0)
198 << Op.getOperand(1).getConstantOperandVal(1));
199 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
200 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
201 Imm = APInt(VT.getScalarSizeInBits(),
202 Op.getOperand(1).getConstantOperandVal(0));
203 else
204 return false;
205
206 if (Imm != 1ULL << (ShtAmt - 1))
207 return false;
208
209 Res1 = Op.getOperand(0);
210 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
211 return true;
212 }
213
214 bool SelectDupZeroOrUndef(SDValue N) {
215 switch(N->getOpcode()) {
216 case ISD::UNDEF:
217 return true;
218 case AArch64ISD::DUP:
219 case ISD::SPLAT_VECTOR: {
220 auto Opnd0 = N->getOperand(0);
221 if (isNullConstant(Opnd0))
222 return true;
223 if (isNullFPConstant(Opnd0))
224 return true;
225 break;
226 }
227 default:
228 break;
229 }
230
231 return false;
232 }
233
234 bool SelectAny(SDValue) { return true; }
235
236 bool SelectDupZero(SDValue N) {
237 switch(N->getOpcode()) {
238 case AArch64ISD::DUP:
239 case ISD::SPLAT_VECTOR: {
240 auto Opnd0 = N->getOperand(0);
241 if (isNullConstant(Opnd0))
242 return true;
243 if (isNullFPConstant(Opnd0))
244 return true;
245 break;
246 }
247 }
248
249 return false;
250 }
251
252 template <MVT::SimpleValueType VT, bool Negate>
253 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
254 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
255 }
256
257 template <MVT::SimpleValueType VT, bool Negate>
258 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
259 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
260 }
261
262 template <MVT::SimpleValueType VT>
263 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
264 return SelectSVECpyDupImm(N, VT, Imm, Shift);
265 }
266
267 template <MVT::SimpleValueType VT, bool Invert = false>
268 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
269 return SelectSVELogicalImm(N, VT, Imm, Invert);
270 }
271
272 template <MVT::SimpleValueType VT>
273 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
274 return SelectSVEArithImm(N, VT, Imm);
275 }
276
277 template <unsigned Low, unsigned High, bool AllowSaturation = false>
278 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
279 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
280 }
281
282 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
283 if (N->getOpcode() != ISD::SPLAT_VECTOR)
284 return false;
285
286 EVT EltVT = N->getValueType(0).getVectorElementType();
287 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
288 /* High */ EltVT.getFixedSizeInBits(),
289 /* AllowSaturation */ true, Imm);
290 }
291
292 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
293 template<signed Min, signed Max, signed Scale, bool Shift>
294 bool SelectCntImm(SDValue N, SDValue &Imm) {
296 return false;
297
298 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
299 if (Shift)
300 MulImm = 1LL << MulImm;
301
302 if ((MulImm % std::abs(Scale)) != 0)
303 return false;
304
305 MulImm /= Scale;
306 if ((MulImm >= Min) && (MulImm <= Max)) {
307 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
308 return true;
309 }
310
311 return false;
312 }
313
314 template <signed Max, signed Scale>
315 bool SelectEXTImm(SDValue N, SDValue &Imm) {
317 return false;
318
319 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
320
321 if (MulImm >= 0 && MulImm <= Max) {
322 MulImm *= Scale;
323 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
324 return true;
325 }
326
327 return false;
328 }
329
330 template <unsigned BaseReg, unsigned Max>
331 bool ImmToReg(SDValue N, SDValue &Imm) {
332 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
333 uint64_t C = CI->getZExtValue();
334
335 if (C > Max)
336 return false;
337
338 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
339 return true;
340 }
341 return false;
342 }
343
344 /// Form sequences of consecutive 64/128-bit registers for use in NEON
345 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
346 /// between 1 and 4 elements. If it contains a single element that is returned
347 /// unchanged; otherwise a REG_SEQUENCE value is returned.
350 // Form a sequence of SVE registers for instructions using list of vectors,
351 // e.g. structured loads and stores (ldN, stN).
352 SDValue createZTuple(ArrayRef<SDValue> Vecs);
353
354 // Similar to above, except the register must start at a multiple of the
355 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
356 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
357
358 /// Generic helper for the createDTuple/createQTuple
359 /// functions. Those should almost always be called instead.
360 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
361 const unsigned SubRegs[]);
362
363 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
364
365 bool tryIndexedLoad(SDNode *N);
366
367 void SelectPtrauthAuth(SDNode *N);
368 void SelectPtrauthResign(SDNode *N);
369
370 bool trySelectStackSlotTagP(SDNode *N);
371 void SelectTagP(SDNode *N);
372
373 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
374 unsigned SubRegIdx);
375 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
378 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
380 unsigned Opc_rr, unsigned Opc_ri,
381 bool IsIntr = false);
382 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
383 unsigned Scale, unsigned Opc_ri,
384 unsigned Opc_rr);
385 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
386 bool IsZmMulti, unsigned Opcode,
387 bool HasPred = false);
388 void SelectPExtPair(SDNode *N, unsigned Opc);
389 void SelectWhilePair(SDNode *N, unsigned Opc);
390 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
391 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
394 bool IsTupleInput, unsigned Opc);
395 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
396
397 template <unsigned MaxIdx, unsigned Scale>
398 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
399 unsigned Op);
400 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
401 unsigned Op, unsigned MaxIdx, unsigned Scale,
402 unsigned BaseReg = 0);
403 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
404 /// SVE Reg+Imm addressing mode.
405 template <int64_t Min, int64_t Max>
406 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
407 SDValue &OffImm);
408 /// SVE Reg+Reg address mode.
409 template <unsigned Scale>
410 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
411 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
412 }
413
414 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
415 unsigned Opc, uint32_t MaxImm);
416
417 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
418
419 template <unsigned MaxIdx, unsigned Scale>
420 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
421 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
422 }
423
424 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
429 unsigned Opc_rr, unsigned Opc_ri);
430 std::tuple<unsigned, SDValue, SDValue>
431 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
432 const SDValue &OldBase, const SDValue &OldOffset,
433 unsigned Scale);
434
435 bool tryBitfieldExtractOp(SDNode *N);
436 bool tryBitfieldExtractOpFromSExt(SDNode *N);
437 bool tryBitfieldInsertOp(SDNode *N);
438 bool tryBitfieldInsertInZeroOp(SDNode *N);
439 bool tryShiftAmountMod(SDNode *N);
440
441 bool tryReadRegister(SDNode *N);
442 bool tryWriteRegister(SDNode *N);
443
444 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
445 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
446
447 bool trySelectXAR(SDNode *N);
448
449// Include the pieces autogenerated from the target description.
450#include "AArch64GenDAGISel.inc"
451
452private:
453 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
454 SDValue &Shift);
455 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
456 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
457 SDValue &OffImm) {
458 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
459 }
460 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
461 unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &OffImm);
465 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &OffImm);
467 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
471 SDValue &Offset, SDValue &SignExtend,
472 SDValue &DoShift);
473 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
474 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
475 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
476 SDValue &Offset, SDValue &SignExtend);
477
478 template<unsigned RegWidth>
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
480 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
481 }
482
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485 template<unsigned RegWidth>
486 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
487 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
488 }
489
490 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
491 unsigned Width);
492
493 bool SelectCMP_SWAP(SDNode *N);
494
495 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
496 bool Negate);
497 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
498 bool Negate);
499 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
500 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
501
502 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
503 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
504 bool AllowSaturation, SDValue &Imm);
505
506 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
507 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
508 SDValue &Offset);
509 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
510 SDValue &Offset, unsigned Scale = 1);
511
512 bool SelectAllActivePredicate(SDValue N);
513 bool SelectAnyPredicate(SDValue N);
514
515 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
516};
517
518class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
519public:
520 static char ID;
521 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
522 CodeGenOptLevel OptLevel)
524 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
525};
526} // end anonymous namespace
527
528char AArch64DAGToDAGISelLegacy::ID = 0;
529
530INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
531
532/// isIntImmediate - This method tests to see if the node is a constant
533/// operand. If so Imm will receive the 32-bit value.
534static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
536 Imm = C->getZExtValue();
537 return true;
538 }
539 return false;
540}
541
542// isIntImmediate - This method tests to see if a constant operand.
543// If so Imm will receive the value.
544static bool isIntImmediate(SDValue N, uint64_t &Imm) {
545 return isIntImmediate(N.getNode(), Imm);
546}
547
548// isOpcWithIntImmediate - This method tests to see if the node is a specific
549// opcode and that it has a immediate integer right operand.
550// If so Imm will receive the 32 bit value.
551static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
552 uint64_t &Imm) {
553 return N->getOpcode() == Opc &&
554 isIntImmediate(N->getOperand(1).getNode(), Imm);
555}
556
557// isIntImmediateEq - This method tests to see if N is a constant operand that
558// is equivalent to 'ImmExpected'.
559#ifndef NDEBUG
560static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
561 uint64_t Imm;
562 if (!isIntImmediate(N.getNode(), Imm))
563 return false;
564 return Imm == ImmExpected;
565}
566#endif
567
568bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
569 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
570 std::vector<SDValue> &OutOps) {
571 switch(ConstraintID) {
572 default:
573 llvm_unreachable("Unexpected asm memory constraint");
574 case InlineAsm::ConstraintCode::m:
575 case InlineAsm::ConstraintCode::o:
576 case InlineAsm::ConstraintCode::Q:
577 // We need to make sure that this one operand does not end up in XZR, thus
578 // require the address to be in a PointerRegClass register.
579 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
580 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
581 SDLoc dl(Op);
582 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
583 SDValue NewOp =
584 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
585 dl, Op.getValueType(),
586 Op, RC), 0);
587 OutOps.push_back(NewOp);
588 return false;
589 }
590 return true;
591}
592
593/// SelectArithImmed - Select an immediate value that can be represented as
594/// a 12-bit value shifted left by either 0 or 12. If so, return true with
595/// Val set to the 12-bit value and Shift set to the shifter operand.
596bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
597 SDValue &Shift) {
598 // This function is called from the addsub_shifted_imm ComplexPattern,
599 // which lists [imm] as the list of opcode it's interested in, however
600 // we still need to check whether the operand is actually an immediate
601 // here because the ComplexPattern opcode list is only used in
602 // root-level opcode matching.
603 if (!isa<ConstantSDNode>(N.getNode()))
604 return false;
605
606 uint64_t Immed = N.getNode()->getAsZExtVal();
607 unsigned ShiftAmt;
608
609 if (Immed >> 12 == 0) {
610 ShiftAmt = 0;
611 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
612 ShiftAmt = 12;
613 Immed = Immed >> 12;
614 } else
615 return false;
616
617 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
618 SDLoc dl(N);
619 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
620 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
621 return true;
622}
623
624/// SelectNegArithImmed - As above, but negates the value before trying to
625/// select it.
626bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
627 SDValue &Shift) {
628 // This function is called from the addsub_shifted_imm ComplexPattern,
629 // which lists [imm] as the list of opcode it's interested in, however
630 // we still need to check whether the operand is actually an immediate
631 // here because the ComplexPattern opcode list is only used in
632 // root-level opcode matching.
633 if (!isa<ConstantSDNode>(N.getNode()))
634 return false;
635
636 // The immediate operand must be a 24-bit zero-extended immediate.
637 uint64_t Immed = N.getNode()->getAsZExtVal();
638
639 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
640 // have the opposite effect on the C flag, so this pattern mustn't match under
641 // those circumstances.
642 if (Immed == 0)
643 return false;
644
645 if (N.getValueType() == MVT::i32)
646 Immed = ~((uint32_t)Immed) + 1;
647 else
648 Immed = ~Immed + 1ULL;
649 if (Immed & 0xFFFFFFFFFF000000ULL)
650 return false;
651
652 Immed &= 0xFFFFFFULL;
653 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
654 Shift);
655}
656
657/// getShiftTypeForNode - Translate a shift node to the corresponding
658/// ShiftType value.
660 switch (N.getOpcode()) {
661 default:
663 case ISD::SHL:
664 return AArch64_AM::LSL;
665 case ISD::SRL:
666 return AArch64_AM::LSR;
667 case ISD::SRA:
668 return AArch64_AM::ASR;
669 case ISD::ROTR:
670 return AArch64_AM::ROR;
671 }
672}
673
675 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
676}
677
678/// Determine whether it is worth it to fold SHL into the addressing
679/// mode.
681 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
682 // It is worth folding logical shift of up to three places.
683 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
684 if (!CSD)
685 return false;
686 unsigned ShiftVal = CSD->getZExtValue();
687 if (ShiftVal > 3)
688 return false;
689
690 // Check if this particular node is reused in any non-memory related
691 // operation. If yes, do not try to fold this node into the address
692 // computation, since the computation will be kept.
693 const SDNode *Node = V.getNode();
694 for (SDNode *UI : Node->users())
695 if (!isMemOpOrPrefetch(UI))
696 for (SDNode *UII : UI->users())
697 if (!isMemOpOrPrefetch(UII))
698 return false;
699 return true;
700}
701
702/// Determine whether it is worth to fold V into an extended register addressing
703/// mode.
704bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
705 // Trivial if we are optimizing for code size or if there is only
706 // one use of the value.
707 if (CurDAG->shouldOptForSize() || V.hasOneUse())
708 return true;
709
710 // If a subtarget has a slow shift, folding a shift into multiple loads
711 // costs additional micro-ops.
712 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
713 return false;
714
715 // Check whether we're going to emit the address arithmetic anyway because
716 // it's used by a non-address operation.
717 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
718 return true;
719 if (V.getOpcode() == ISD::ADD) {
720 const SDValue LHS = V.getOperand(0);
721 const SDValue RHS = V.getOperand(1);
722 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
723 return true;
724 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
725 return true;
726 }
727
728 // It hurts otherwise, since the value will be reused.
729 return false;
730}
731
732/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
733/// to select more shifted register
734bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
735 SDValue &Shift) {
736 EVT VT = N.getValueType();
737 if (VT != MVT::i32 && VT != MVT::i64)
738 return false;
739
740 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
741 return false;
742 SDValue LHS = N.getOperand(0);
743 if (!LHS->hasOneUse())
744 return false;
745
746 unsigned LHSOpcode = LHS->getOpcode();
747 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
748 return false;
749
750 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
751 if (!ShiftAmtNode)
752 return false;
753
754 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
755 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
756 if (!RHSC)
757 return false;
758
759 APInt AndMask = RHSC->getAPIntValue();
760 unsigned LowZBits, MaskLen;
761 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
762 return false;
763
764 unsigned BitWidth = N.getValueSizeInBits();
765 SDLoc DL(LHS);
766 uint64_t NewShiftC;
767 unsigned NewShiftOp;
768 if (LHSOpcode == ISD::SHL) {
769 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
770 // BitWidth != LowZBits + MaskLen doesn't match the pattern
771 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
772 return false;
773
774 NewShiftC = LowZBits - ShiftAmtC;
775 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
776 } else {
777 if (LowZBits == 0)
778 return false;
779
780 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
781 NewShiftC = LowZBits + ShiftAmtC;
782 if (NewShiftC >= BitWidth)
783 return false;
784
785 // SRA need all high bits
786 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
787 return false;
788
789 // SRL high bits can be 0 or 1
790 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
791 return false;
792
793 if (LHSOpcode == ISD::SRL)
794 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
795 else
796 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
797 }
798
799 assert(NewShiftC < BitWidth && "Invalid shift amount");
800 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
801 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
802 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
803 NewShiftAmt, BitWidthMinus1),
804 0);
805 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
806 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
807 return true;
808}
809
810/// getExtendTypeForNode - Translate an extend node to the corresponding
811/// ExtendType value.
813getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
814 if (N.getOpcode() == ISD::SIGN_EXTEND ||
815 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
816 EVT SrcVT;
817 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
818 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
819 else
820 SrcVT = N.getOperand(0).getValueType();
821
822 if (!IsLoadStore && SrcVT == MVT::i8)
823 return AArch64_AM::SXTB;
824 else if (!IsLoadStore && SrcVT == MVT::i16)
825 return AArch64_AM::SXTH;
826 else if (SrcVT == MVT::i32)
827 return AArch64_AM::SXTW;
828 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
829
831 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
832 N.getOpcode() == ISD::ANY_EXTEND) {
833 EVT SrcVT = N.getOperand(0).getValueType();
834 if (!IsLoadStore && SrcVT == MVT::i8)
835 return AArch64_AM::UXTB;
836 else if (!IsLoadStore && SrcVT == MVT::i16)
837 return AArch64_AM::UXTH;
838 else if (SrcVT == MVT::i32)
839 return AArch64_AM::UXTW;
840 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
841
843 } else if (N.getOpcode() == ISD::AND) {
844 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
845 if (!CSD)
847 uint64_t AndMask = CSD->getZExtValue();
848
849 switch (AndMask) {
850 default:
852 case 0xFF:
853 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
854 case 0xFFFF:
855 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
856 case 0xFFFFFFFF:
857 return AArch64_AM::UXTW;
858 }
859 }
860
862}
863
864/// Determine whether it is worth to fold V into an extended register of an
865/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
866/// instruction, and the shift should be treated as worth folding even if has
867/// multiple uses.
868bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
869 // Trivial if we are optimizing for code size or if there is only
870 // one use of the value.
871 if (CurDAG->shouldOptForSize() || V.hasOneUse())
872 return true;
873
874 // If a subtarget has a fastpath LSL we can fold a logical shift into
875 // the add/sub and save a cycle.
876 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
877 V.getConstantOperandVal(1) <= 4 &&
879 return true;
880
881 // It hurts otherwise, since the value will be reused.
882 return false;
883}
884
885/// SelectShiftedRegister - Select a "shifted register" operand. If the value
886/// is not shifted, set the Shift operand to default of "LSL 0". The logical
887/// instructions allow the shifted register to be rotated, but the arithmetic
888/// instructions do not. The AllowROR parameter specifies whether ROR is
889/// supported.
890bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
891 SDValue &Reg, SDValue &Shift) {
892 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
893 return true;
894
896 if (ShType == AArch64_AM::InvalidShiftExtend)
897 return false;
898 if (!AllowROR && ShType == AArch64_AM::ROR)
899 return false;
900
901 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
902 unsigned BitSize = N.getValueSizeInBits();
903 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
904 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
905
906 Reg = N.getOperand(0);
907 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
908 return isWorthFoldingALU(N, true);
909 }
910
911 return false;
912}
913
914/// Instructions that accept extend modifiers like UXTW expect the register
915/// being extended to be a GPR32, but the incoming DAG might be acting on a
916/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
917/// this is the case.
919 if (N.getValueType() == MVT::i32)
920 return N;
921
922 SDLoc dl(N);
923 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
924}
925
926// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
927template<signed Low, signed High, signed Scale>
928bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
930 return false;
931
932 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
933 if ((MulImm % std::abs(Scale)) == 0) {
934 int64_t RDVLImm = MulImm / Scale;
935 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
936 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
937 return true;
938 }
939 }
940
941 return false;
942}
943
944// Returns a suitable RDSVL multiplier from a left shift.
945template <signed Low, signed High>
946bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
948 return false;
949
950 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
951 if (MulImm >= Low && MulImm <= High) {
952 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
953 return true;
954 }
955
956 return false;
957}
958
959/// SelectArithExtendedRegister - Select a "extended register" operand. This
960/// operand folds in an extend followed by an optional left shift.
961bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
962 SDValue &Shift) {
963 unsigned ShiftVal = 0;
965
966 if (N.getOpcode() == ISD::SHL) {
967 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
968 if (!CSD)
969 return false;
970 ShiftVal = CSD->getZExtValue();
971 if (ShiftVal > 4)
972 return false;
973
974 Ext = getExtendTypeForNode(N.getOperand(0));
976 return false;
977
978 Reg = N.getOperand(0).getOperand(0);
979 } else {
982 return false;
983
984 Reg = N.getOperand(0);
985
986 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
987 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
988 auto isDef32 = [](SDValue N) {
989 unsigned Opc = N.getOpcode();
990 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
993 Opc != ISD::FREEZE;
994 };
995 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
996 isDef32(Reg))
997 return false;
998 }
999
1000 // AArch64 mandates that the RHS of the operation must use the smallest
1001 // register class that could contain the size being extended from. Thus,
1002 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1003 // there might not be an actual 32-bit value in the program. We can
1004 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1005 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1006 Reg = narrowIfNeeded(CurDAG, Reg);
1007 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1008 MVT::i32);
1009 return isWorthFoldingALU(N);
1010}
1011
1012/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1013/// operand is referred by the instructions have SP operand
1014bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1015 SDValue &Shift) {
1016 unsigned ShiftVal = 0;
1018
1019 if (N.getOpcode() != ISD::SHL)
1020 return false;
1021
1022 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1023 if (!CSD)
1024 return false;
1025 ShiftVal = CSD->getZExtValue();
1026 if (ShiftVal > 4)
1027 return false;
1028
1030 Reg = N.getOperand(0);
1031 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1032 MVT::i32);
1033 return isWorthFoldingALU(N);
1034}
1035
1036/// If there's a use of this ADDlow that's not itself a load/store then we'll
1037/// need to create a real ADD instruction from it anyway and there's no point in
1038/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1039/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1040/// leads to duplicated ADRP instructions.
1042 for (auto *User : N->users()) {
1043 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1044 User->getOpcode() != ISD::ATOMIC_LOAD &&
1045 User->getOpcode() != ISD::ATOMIC_STORE)
1046 return false;
1047
1048 // ldar and stlr have much more restrictive addressing modes (just a
1049 // register).
1050 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1051 return false;
1052 }
1053
1054 return true;
1055}
1056
1057/// Check if the immediate offset is valid as a scaled immediate.
1058static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1059 unsigned Size) {
1060 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1061 Offset < (Range << Log2_32(Size)))
1062 return true;
1063 return false;
1064}
1065
1066/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1067/// immediate" address. The "Size" argument is the size in bytes of the memory
1068/// reference, which determines the scale.
1069bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1070 unsigned BW, unsigned Size,
1071 SDValue &Base,
1072 SDValue &OffImm) {
1073 SDLoc dl(N);
1074 const DataLayout &DL = CurDAG->getDataLayout();
1075 const TargetLowering *TLI = getTargetLowering();
1076 if (N.getOpcode() == ISD::FrameIndex) {
1077 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1078 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1079 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1080 return true;
1081 }
1082
1083 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1084 // selected here doesn't support labels/immediates, only base+offset.
1085 if (CurDAG->isBaseWithConstantOffset(N)) {
1086 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1087 if (IsSignedImm) {
1088 int64_t RHSC = RHS->getSExtValue();
1089 unsigned Scale = Log2_32(Size);
1090 int64_t Range = 0x1LL << (BW - 1);
1091
1092 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1093 RHSC < (Range << Scale)) {
1094 Base = N.getOperand(0);
1095 if (Base.getOpcode() == ISD::FrameIndex) {
1096 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1097 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1098 }
1099 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1100 return true;
1101 }
1102 } else {
1103 // unsigned Immediate
1104 uint64_t RHSC = RHS->getZExtValue();
1105 unsigned Scale = Log2_32(Size);
1106 uint64_t Range = 0x1ULL << BW;
1107
1108 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1109 Base = N.getOperand(0);
1110 if (Base.getOpcode() == ISD::FrameIndex) {
1111 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1112 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1113 }
1114 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1115 return true;
1116 }
1117 }
1118 }
1119 }
1120 // Base only. The address will be materialized into a register before
1121 // the memory is accessed.
1122 // add x0, Xbase, #offset
1123 // stp x1, x2, [x0]
1124 Base = N;
1125 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1126 return true;
1127}
1128
1129/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1130/// immediate" address. The "Size" argument is the size in bytes of the memory
1131/// reference, which determines the scale.
1132bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1133 SDValue &Base, SDValue &OffImm) {
1134 SDLoc dl(N);
1135 const DataLayout &DL = CurDAG->getDataLayout();
1136 const TargetLowering *TLI = getTargetLowering();
1137 if (N.getOpcode() == ISD::FrameIndex) {
1138 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1139 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1140 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1141 return true;
1142 }
1143
1144 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1145 GlobalAddressSDNode *GAN =
1146 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1147 Base = N.getOperand(0);
1148 OffImm = N.getOperand(1);
1149 if (!GAN)
1150 return true;
1151
1152 if (GAN->getOffset() % Size == 0 &&
1154 return true;
1155 }
1156
1157 if (CurDAG->isBaseWithConstantOffset(N)) {
1158 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1159 int64_t RHSC = (int64_t)RHS->getZExtValue();
1160 unsigned Scale = Log2_32(Size);
1161 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1162 Base = N.getOperand(0);
1163 if (Base.getOpcode() == ISD::FrameIndex) {
1164 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1165 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1166 }
1167 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1168 return true;
1169 }
1170 }
1171 }
1172
1173 // Before falling back to our general case, check if the unscaled
1174 // instructions can handle this. If so, that's preferable.
1175 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1176 return false;
1177
1178 // Base only. The address will be materialized into a register before
1179 // the memory is accessed.
1180 // add x0, Xbase, #offset
1181 // ldr x0, [x0]
1182 Base = N;
1183 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1184 return true;
1185}
1186
1187/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1188/// immediate" address. This should only match when there is an offset that
1189/// is not valid for a scaled immediate addressing mode. The "Size" argument
1190/// is the size in bytes of the memory reference, which is needed here to know
1191/// what is valid for a scaled immediate.
1192bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1193 SDValue &Base,
1194 SDValue &OffImm) {
1195 if (!CurDAG->isBaseWithConstantOffset(N))
1196 return false;
1197 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1198 int64_t RHSC = RHS->getSExtValue();
1199 if (RHSC >= -256 && RHSC < 256) {
1200 Base = N.getOperand(0);
1201 if (Base.getOpcode() == ISD::FrameIndex) {
1202 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1203 const TargetLowering *TLI = getTargetLowering();
1204 Base = CurDAG->getTargetFrameIndex(
1205 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1206 }
1207 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1208 return true;
1209 }
1210 }
1211 return false;
1212}
1213
1215 SDLoc dl(N);
1216 SDValue ImpDef = SDValue(
1217 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1218 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1219 N);
1220}
1221
1222/// Check if the given SHL node (\p N), can be used to form an
1223/// extended register for an addressing mode.
1224bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1225 bool WantExtend, SDValue &Offset,
1226 SDValue &SignExtend) {
1227 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1228 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1229 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1230 return false;
1231
1232 SDLoc dl(N);
1233 if (WantExtend) {
1235 getExtendTypeForNode(N.getOperand(0), true);
1237 return false;
1238
1239 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1240 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1241 MVT::i32);
1242 } else {
1243 Offset = N.getOperand(0);
1244 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1245 }
1246
1247 unsigned LegalShiftVal = Log2_32(Size);
1248 unsigned ShiftVal = CSD->getZExtValue();
1249
1250 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1251 return false;
1252
1253 return isWorthFoldingAddr(N, Size);
1254}
1255
1256bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1258 SDValue &SignExtend,
1259 SDValue &DoShift) {
1260 if (N.getOpcode() != ISD::ADD)
1261 return false;
1262 SDValue LHS = N.getOperand(0);
1263 SDValue RHS = N.getOperand(1);
1264 SDLoc dl(N);
1265
1266 // We don't want to match immediate adds here, because they are better lowered
1267 // to the register-immediate addressing modes.
1269 return false;
1270
1271 // Check if this particular node is reused in any non-memory related
1272 // operation. If yes, do not try to fold this node into the address
1273 // computation, since the computation will be kept.
1274 const SDNode *Node = N.getNode();
1275 for (SDNode *UI : Node->users()) {
1276 if (!isMemOpOrPrefetch(UI))
1277 return false;
1278 }
1279
1280 // Remember if it is worth folding N when it produces extended register.
1281 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1282
1283 // Try to match a shifted extend on the RHS.
1284 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1285 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1286 Base = LHS;
1287 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1288 return true;
1289 }
1290
1291 // Try to match a shifted extend on the LHS.
1292 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1293 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1294 Base = RHS;
1295 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1296 return true;
1297 }
1298
1299 // There was no shift, whatever else we find.
1300 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1301
1303 // Try to match an unshifted extend on the LHS.
1304 if (IsExtendedRegisterWorthFolding &&
1305 (Ext = getExtendTypeForNode(LHS, true)) !=
1307 Base = RHS;
1308 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1309 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1310 MVT::i32);
1311 if (isWorthFoldingAddr(LHS, Size))
1312 return true;
1313 }
1314
1315 // Try to match an unshifted extend on the RHS.
1316 if (IsExtendedRegisterWorthFolding &&
1317 (Ext = getExtendTypeForNode(RHS, true)) !=
1319 Base = LHS;
1320 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1321 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1322 MVT::i32);
1323 if (isWorthFoldingAddr(RHS, Size))
1324 return true;
1325 }
1326
1327 return false;
1328}
1329
1330// Check if the given immediate is preferred by ADD. If an immediate can be
1331// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1332// encoded by one MOVZ, return true.
1333static bool isPreferredADD(int64_t ImmOff) {
1334 // Constant in [0x0, 0xfff] can be encoded in ADD.
1335 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1336 return true;
1337 // Check if it can be encoded in an "ADD LSL #12".
1338 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1339 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1340 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1341 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1342 return false;
1343}
1344
1345bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1347 SDValue &SignExtend,
1348 SDValue &DoShift) {
1349 if (N.getOpcode() != ISD::ADD)
1350 return false;
1351 SDValue LHS = N.getOperand(0);
1352 SDValue RHS = N.getOperand(1);
1353 SDLoc DL(N);
1354
1355 // Check if this particular node is reused in any non-memory related
1356 // operation. If yes, do not try to fold this node into the address
1357 // computation, since the computation will be kept.
1358 const SDNode *Node = N.getNode();
1359 for (SDNode *UI : Node->users()) {
1360 if (!isMemOpOrPrefetch(UI))
1361 return false;
1362 }
1363
1364 // Watch out if RHS is a wide immediate, it can not be selected into
1365 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1366 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1367 // instructions like:
1368 // MOV X0, WideImmediate
1369 // ADD X1, BaseReg, X0
1370 // LDR X2, [X1, 0]
1371 // For such situation, using [BaseReg, XReg] addressing mode can save one
1372 // ADD/SUB:
1373 // MOV X0, WideImmediate
1374 // LDR X2, [BaseReg, X0]
1375 if (isa<ConstantSDNode>(RHS)) {
1376 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1377 // Skip the immediate can be selected by load/store addressing mode.
1378 // Also skip the immediate can be encoded by a single ADD (SUB is also
1379 // checked by using -ImmOff).
1380 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1381 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1382 return false;
1383
1384 SDValue Ops[] = { RHS };
1385 SDNode *MOVI =
1386 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1387 SDValue MOVIV = SDValue(MOVI, 0);
1388 // This ADD of two X register will be selected into [Reg+Reg] mode.
1389 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1390 }
1391
1392 // Remember if it is worth folding N when it produces extended register.
1393 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1394
1395 // Try to match a shifted extend on the RHS.
1396 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1397 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1398 Base = LHS;
1399 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1400 return true;
1401 }
1402
1403 // Try to match a shifted extend on the LHS.
1404 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1405 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1406 Base = RHS;
1407 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1408 return true;
1409 }
1410
1411 // Match any non-shifted, non-extend, non-immediate add expression.
1412 Base = LHS;
1413 Offset = RHS;
1414 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1415 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1416 // Reg1 + Reg2 is free: no check needed.
1417 return true;
1418}
1419
1420SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1421 static const unsigned RegClassIDs[] = {
1422 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1423 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1424 AArch64::dsub2, AArch64::dsub3};
1425
1426 return createTuple(Regs, RegClassIDs, SubRegs);
1427}
1428
1429SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1430 static const unsigned RegClassIDs[] = {
1431 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1432 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1433 AArch64::qsub2, AArch64::qsub3};
1434
1435 return createTuple(Regs, RegClassIDs, SubRegs);
1436}
1437
1438SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1439 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1440 AArch64::ZPR3RegClassID,
1441 AArch64::ZPR4RegClassID};
1442 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1443 AArch64::zsub2, AArch64::zsub3};
1444
1445 return createTuple(Regs, RegClassIDs, SubRegs);
1446}
1447
1448SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1449 assert(Regs.size() == 2 || Regs.size() == 4);
1450
1451 // The createTuple interface requires 3 RegClassIDs for each possible
1452 // tuple type even though we only have them for ZPR2 and ZPR4.
1453 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1454 AArch64::ZPR4Mul4RegClassID};
1455 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1456 AArch64::zsub2, AArch64::zsub3};
1457 return createTuple(Regs, RegClassIDs, SubRegs);
1458}
1459
1460SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1461 const unsigned RegClassIDs[],
1462 const unsigned SubRegs[]) {
1463 // There's no special register-class for a vector-list of 1 element: it's just
1464 // a vector.
1465 if (Regs.size() == 1)
1466 return Regs[0];
1467
1468 assert(Regs.size() >= 2 && Regs.size() <= 4);
1469
1470 SDLoc DL(Regs[0]);
1471
1473
1474 // First operand of REG_SEQUENCE is the desired RegClass.
1475 Ops.push_back(
1476 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1477
1478 // Then we get pairs of source & subregister-position for the components.
1479 for (unsigned i = 0; i < Regs.size(); ++i) {
1480 Ops.push_back(Regs[i]);
1481 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1482 }
1483
1484 SDNode *N =
1485 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1486 return SDValue(N, 0);
1487}
1488
1489void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1490 bool isExt) {
1491 SDLoc dl(N);
1492 EVT VT = N->getValueType(0);
1493
1494 unsigned ExtOff = isExt;
1495
1496 // Form a REG_SEQUENCE to force register allocation.
1497 unsigned Vec0Off = ExtOff + 1;
1498 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1499 SDValue RegSeq = createQTuple(Regs);
1500
1502 if (isExt)
1503 Ops.push_back(N->getOperand(1));
1504 Ops.push_back(RegSeq);
1505 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1506 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1507}
1508
1509static std::tuple<SDValue, SDValue>
1511 SDLoc DL(Disc);
1512 SDValue AddrDisc;
1513 SDValue ConstDisc;
1514
1515 // If this is a blend, remember the constant and address discriminators.
1516 // Otherwise, it's either a constant discriminator, or a non-blended
1517 // address discriminator.
1518 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1519 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1520 AddrDisc = Disc->getOperand(1);
1521 ConstDisc = Disc->getOperand(2);
1522 } else {
1523 ConstDisc = Disc;
1524 }
1525
1526 // If the constant discriminator (either the blend RHS, or the entire
1527 // discriminator value) isn't a 16-bit constant, bail out, and let the
1528 // discriminator be computed separately.
1529 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1530 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1531 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1532
1533 // If there's no address discriminator, use XZR directly.
1534 if (!AddrDisc)
1535 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1536
1537 return std::make_tuple(
1538 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1539 AddrDisc);
1540}
1541
1542void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1543 SDLoc DL(N);
1544 // IntrinsicID is operand #0
1545 SDValue Val = N->getOperand(1);
1546 SDValue AUTKey = N->getOperand(2);
1547 SDValue AUTDisc = N->getOperand(3);
1548
1549 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1550 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1551
1552 SDValue AUTAddrDisc, AUTConstDisc;
1553 std::tie(AUTConstDisc, AUTAddrDisc) =
1554 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1555
1556 if (!Subtarget->isX16X17Safer()) {
1557 SDValue Ops[] = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1558
1559 SDNode *AUT =
1560 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1561 ReplaceNode(N, AUT);
1562 } else {
1563 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1564 AArch64::X16, Val, SDValue());
1565 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1566
1567 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1568 ReplaceNode(N, AUT);
1569 }
1570}
1571
1572void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1573 SDLoc DL(N);
1574 // IntrinsicID is operand #0
1575 SDValue Val = N->getOperand(1);
1576 SDValue AUTKey = N->getOperand(2);
1577 SDValue AUTDisc = N->getOperand(3);
1578 SDValue PACKey = N->getOperand(4);
1579 SDValue PACDisc = N->getOperand(5);
1580
1581 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1582 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1583
1584 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1585 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1586
1587 SDValue AUTAddrDisc, AUTConstDisc;
1588 std::tie(AUTConstDisc, AUTAddrDisc) =
1589 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1590
1591 SDValue PACAddrDisc, PACConstDisc;
1592 std::tie(PACConstDisc, PACAddrDisc) =
1593 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1594
1595 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1596 AArch64::X16, Val, SDValue());
1597
1598 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1599 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1600
1601 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1602 ReplaceNode(N, AUTPAC);
1603}
1604
1605bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1606 LoadSDNode *LD = cast<LoadSDNode>(N);
1607 if (LD->isUnindexed())
1608 return false;
1609 EVT VT = LD->getMemoryVT();
1610 EVT DstVT = N->getValueType(0);
1611 ISD::MemIndexedMode AM = LD->getAddressingMode();
1612 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1613 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1614 int OffsetVal = (int)OffsetOp->getZExtValue();
1615
1616 // We're not doing validity checking here. That was done when checking
1617 // if we should mark the load as indexed or not. We're just selecting
1618 // the right instruction.
1619 unsigned Opcode = 0;
1620
1621 ISD::LoadExtType ExtType = LD->getExtensionType();
1622 bool InsertTo64 = false;
1623 if (VT == MVT::i64)
1624 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1625 else if (VT == MVT::i32) {
1626 if (ExtType == ISD::NON_EXTLOAD)
1627 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1628 else if (ExtType == ISD::SEXTLOAD)
1629 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1630 else {
1631 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1632 InsertTo64 = true;
1633 // The result of the load is only i32. It's the subreg_to_reg that makes
1634 // it into an i64.
1635 DstVT = MVT::i32;
1636 }
1637 } else if (VT == MVT::i16) {
1638 if (ExtType == ISD::SEXTLOAD) {
1639 if (DstVT == MVT::i64)
1640 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1641 else
1642 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1643 } else {
1644 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1645 InsertTo64 = DstVT == MVT::i64;
1646 // The result of the load is only i32. It's the subreg_to_reg that makes
1647 // it into an i64.
1648 DstVT = MVT::i32;
1649 }
1650 } else if (VT == MVT::i8) {
1651 if (ExtType == ISD::SEXTLOAD) {
1652 if (DstVT == MVT::i64)
1653 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1654 else
1655 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1656 } else {
1657 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1658 InsertTo64 = DstVT == MVT::i64;
1659 // The result of the load is only i32. It's the subreg_to_reg that makes
1660 // it into an i64.
1661 DstVT = MVT::i32;
1662 }
1663 } else if (VT == MVT::f16) {
1664 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1665 } else if (VT == MVT::bf16) {
1666 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1667 } else if (VT == MVT::f32) {
1668 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1669 } else if (VT == MVT::f64 ||
1670 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1671 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1672 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1673 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1674 } else if (VT.is64BitVector()) {
1675 if (IsPre || OffsetVal != 8)
1676 return false;
1677 switch (VT.getScalarSizeInBits()) {
1678 case 8:
1679 Opcode = AArch64::LD1Onev8b_POST;
1680 break;
1681 case 16:
1682 Opcode = AArch64::LD1Onev4h_POST;
1683 break;
1684 case 32:
1685 Opcode = AArch64::LD1Onev2s_POST;
1686 break;
1687 case 64:
1688 Opcode = AArch64::LD1Onev1d_POST;
1689 break;
1690 default:
1691 llvm_unreachable("Expected vector element to be a power of 2");
1692 }
1693 } else if (VT.is128BitVector()) {
1694 if (IsPre || OffsetVal != 16)
1695 return false;
1696 switch (VT.getScalarSizeInBits()) {
1697 case 8:
1698 Opcode = AArch64::LD1Onev16b_POST;
1699 break;
1700 case 16:
1701 Opcode = AArch64::LD1Onev8h_POST;
1702 break;
1703 case 32:
1704 Opcode = AArch64::LD1Onev4s_POST;
1705 break;
1706 case 64:
1707 Opcode = AArch64::LD1Onev2d_POST;
1708 break;
1709 default:
1710 llvm_unreachable("Expected vector element to be a power of 2");
1711 }
1712 } else
1713 return false;
1714 SDValue Chain = LD->getChain();
1715 SDValue Base = LD->getBasePtr();
1716 SDLoc dl(N);
1717 // LD1 encodes an immediate offset by using XZR as the offset register.
1718 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1719 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1720 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1721 SDValue Ops[] = { Base, Offset, Chain };
1722 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1723 MVT::Other, Ops);
1724
1725 // Transfer memoperands.
1726 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1727 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1728
1729 // Either way, we're replacing the node, so tell the caller that.
1730 SDValue LoadedVal = SDValue(Res, 1);
1731 if (InsertTo64) {
1732 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1733 LoadedVal =
1734 SDValue(CurDAG->getMachineNode(
1735 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1736 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1737 SubReg),
1738 0);
1739 }
1740
1741 ReplaceUses(SDValue(N, 0), LoadedVal);
1742 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1743 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1744 CurDAG->RemoveDeadNode(N);
1745 return true;
1746}
1747
1748void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1749 unsigned SubRegIdx) {
1750 SDLoc dl(N);
1751 EVT VT = N->getValueType(0);
1752 SDValue Chain = N->getOperand(0);
1753
1754 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1755 Chain};
1756
1757 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1758
1759 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1760 SDValue SuperReg = SDValue(Ld, 0);
1761 for (unsigned i = 0; i < NumVecs; ++i)
1762 ReplaceUses(SDValue(N, i),
1763 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1764
1765 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1766
1767 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1768 // because it's too simple to have needed special treatment during lowering.
1769 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1770 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1771 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1772 }
1773
1774 CurDAG->RemoveDeadNode(N);
1775}
1776
1777void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1778 unsigned Opc, unsigned SubRegIdx) {
1779 SDLoc dl(N);
1780 EVT VT = N->getValueType(0);
1781 SDValue Chain = N->getOperand(0);
1782
1783 SDValue Ops[] = {N->getOperand(1), // Mem operand
1784 N->getOperand(2), // Incremental
1785 Chain};
1786
1787 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1788 MVT::Untyped, MVT::Other};
1789
1790 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1791
1792 // Update uses of write back register
1793 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1794
1795 // Update uses of vector list
1796 SDValue SuperReg = SDValue(Ld, 1);
1797 if (NumVecs == 1)
1798 ReplaceUses(SDValue(N, 0), SuperReg);
1799 else
1800 for (unsigned i = 0; i < NumVecs; ++i)
1801 ReplaceUses(SDValue(N, i),
1802 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1803
1804 // Update the chain
1805 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1806 CurDAG->RemoveDeadNode(N);
1807}
1808
1809/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1810/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1811/// new Base and an SDValue representing the new offset.
1812std::tuple<unsigned, SDValue, SDValue>
1813AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1814 unsigned Opc_ri,
1815 const SDValue &OldBase,
1816 const SDValue &OldOffset,
1817 unsigned Scale) {
1818 SDValue NewBase = OldBase;
1819 SDValue NewOffset = OldOffset;
1820 // Detect a possible Reg+Imm addressing mode.
1821 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1822 N, OldBase, NewBase, NewOffset);
1823
1824 // Detect a possible reg+reg addressing mode, but only if we haven't already
1825 // detected a Reg+Imm one.
1826 const bool IsRegReg =
1827 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1828
1829 // Select the instruction.
1830 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1831}
1832
1833enum class SelectTypeKind {
1834 Int1 = 0,
1835 Int = 1,
1836 FP = 2,
1838};
1839
1840/// This function selects an opcode from a list of opcodes, which is
1841/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1842/// element types, in this order.
1843template <SelectTypeKind Kind>
1844static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1845 // Only match scalable vector VTs
1846 if (!VT.isScalableVector())
1847 return 0;
1848
1849 EVT EltVT = VT.getVectorElementType();
1850 unsigned Key = VT.getVectorMinNumElements();
1851 switch (Kind) {
1853 break;
1855 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1856 EltVT != MVT::i64)
1857 return 0;
1858 break;
1860 if (EltVT != MVT::i1)
1861 return 0;
1862 break;
1863 case SelectTypeKind::FP:
1864 if (EltVT == MVT::bf16)
1865 Key = 16;
1866 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1867 EltVT != MVT::f64)
1868 return 0;
1869 break;
1870 }
1871
1872 unsigned Offset;
1873 switch (Key) {
1874 case 16: // 8-bit or bf16
1875 Offset = 0;
1876 break;
1877 case 8: // 16-bit
1878 Offset = 1;
1879 break;
1880 case 4: // 32-bit
1881 Offset = 2;
1882 break;
1883 case 2: // 64-bit
1884 Offset = 3;
1885 break;
1886 default:
1887 return 0;
1888 }
1889
1890 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1891}
1892
1893// This function is almost identical to SelectWhilePair, but has an
1894// extra check on the range of the immediate operand.
1895// TODO: Merge these two functions together at some point?
1896void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1897 // Immediate can be either 0 or 1.
1898 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1899 if (Imm->getZExtValue() > 1)
1900 return;
1901
1902 SDLoc DL(N);
1903 EVT VT = N->getValueType(0);
1904 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1905 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1906 SDValue SuperReg = SDValue(WhilePair, 0);
1907
1908 for (unsigned I = 0; I < 2; ++I)
1909 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1910 AArch64::psub0 + I, DL, VT, SuperReg));
1911
1912 CurDAG->RemoveDeadNode(N);
1913}
1914
1915void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1916 SDLoc DL(N);
1917 EVT VT = N->getValueType(0);
1918
1919 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1920
1921 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1922 SDValue SuperReg = SDValue(WhilePair, 0);
1923
1924 for (unsigned I = 0; I < 2; ++I)
1925 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1926 AArch64::psub0 + I, DL, VT, SuperReg));
1927
1928 CurDAG->RemoveDeadNode(N);
1929}
1930
1931void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1932 unsigned Opcode) {
1933 EVT VT = N->getValueType(0);
1934 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1935 SDValue Ops = createZTuple(Regs);
1936 SDLoc DL(N);
1937 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1938 SDValue SuperReg = SDValue(Intrinsic, 0);
1939 for (unsigned i = 0; i < NumVecs; ++i)
1940 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1941 AArch64::zsub0 + i, DL, VT, SuperReg));
1942
1943 CurDAG->RemoveDeadNode(N);
1944}
1945
1946void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1947 unsigned Opcode) {
1948 SDLoc DL(N);
1949 EVT VT = N->getValueType(0);
1950 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1951 Ops.push_back(/*Chain*/ N->getOperand(0));
1952
1953 SDNode *Instruction =
1954 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1955 SDValue SuperReg = SDValue(Instruction, 0);
1956
1957 for (unsigned i = 0; i < NumVecs; ++i)
1958 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1959 AArch64::zsub0 + i, DL, VT, SuperReg));
1960
1961 // Copy chain
1962 unsigned ChainIdx = NumVecs;
1963 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1964 CurDAG->RemoveDeadNode(N);
1965}
1966
1967void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1968 unsigned NumVecs,
1969 bool IsZmMulti,
1970 unsigned Opcode,
1971 bool HasPred) {
1972 assert(Opcode != 0 && "Unexpected opcode");
1973
1974 SDLoc DL(N);
1975 EVT VT = N->getValueType(0);
1976 unsigned FirstVecIdx = HasPred ? 2 : 1;
1977
1978 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1979 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1980 return createZMulTuple(Regs);
1981 };
1982
1983 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1984
1985 SDValue Zm;
1986 if (IsZmMulti)
1987 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1988 else
1989 Zm = N->getOperand(NumVecs + FirstVecIdx);
1990
1991 SDNode *Intrinsic;
1992 if (HasPred)
1993 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1994 N->getOperand(1), Zdn, Zm);
1995 else
1996 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1997 SDValue SuperReg = SDValue(Intrinsic, 0);
1998 for (unsigned i = 0; i < NumVecs; ++i)
1999 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2000 AArch64::zsub0 + i, DL, VT, SuperReg));
2001
2002 CurDAG->RemoveDeadNode(N);
2003}
2004
2005void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2006 unsigned Scale, unsigned Opc_ri,
2007 unsigned Opc_rr, bool IsIntr) {
2008 assert(Scale < 5 && "Invalid scaling value.");
2009 SDLoc DL(N);
2010 EVT VT = N->getValueType(0);
2011 SDValue Chain = N->getOperand(0);
2012
2013 // Optimize addressing mode.
2015 unsigned Opc;
2016 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2017 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2018 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2019
2020 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2021 Base, // Memory operand
2022 Offset, Chain};
2023
2024 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2025
2026 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2027 SDValue SuperReg = SDValue(Load, 0);
2028 for (unsigned i = 0; i < NumVecs; ++i)
2029 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2030 AArch64::zsub0 + i, DL, VT, SuperReg));
2031
2032 // Copy chain
2033 unsigned ChainIdx = NumVecs;
2034 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2035 CurDAG->RemoveDeadNode(N);
2036}
2037
2038void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2039 unsigned NumVecs,
2040 unsigned Scale,
2041 unsigned Opc_ri,
2042 unsigned Opc_rr) {
2043 assert(Scale < 4 && "Invalid scaling value.");
2044 SDLoc DL(N);
2045 EVT VT = N->getValueType(0);
2046 SDValue Chain = N->getOperand(0);
2047
2048 SDValue PNg = N->getOperand(2);
2049 SDValue Base = N->getOperand(3);
2050 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2051 unsigned Opc;
2052 std::tie(Opc, Base, Offset) =
2053 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2054
2055 SDValue Ops[] = {PNg, // Predicate-as-counter
2056 Base, // Memory operand
2057 Offset, Chain};
2058
2059 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2060
2061 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2062 SDValue SuperReg = SDValue(Load, 0);
2063 for (unsigned i = 0; i < NumVecs; ++i)
2064 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2065 AArch64::zsub0 + i, DL, VT, SuperReg));
2066
2067 // Copy chain
2068 unsigned ChainIdx = NumVecs;
2069 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2070 CurDAG->RemoveDeadNode(N);
2071}
2072
2073void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2074 unsigned Opcode) {
2075 if (N->getValueType(0) != MVT::nxv4f32)
2076 return;
2077 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2078}
2079
2080void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2081 unsigned NumOutVecs,
2082 unsigned Opc,
2083 uint32_t MaxImm) {
2084 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2085 if (Imm->getZExtValue() > MaxImm)
2086 return;
2087
2088 SDValue ZtValue;
2089 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2090 return;
2091
2092 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
2093 SDLoc DL(Node);
2094 EVT VT = Node->getValueType(0);
2095
2096 SDNode *Instruction =
2097 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2098 SDValue SuperReg = SDValue(Instruction, 0);
2099
2100 for (unsigned I = 0; I < NumOutVecs; ++I)
2101 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2102 AArch64::zsub0 + I, DL, VT, SuperReg));
2103
2104 // Copy chain
2105 unsigned ChainIdx = NumOutVecs;
2106 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2107 CurDAG->RemoveDeadNode(Node);
2108}
2109
2110void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2111 unsigned NumOutVecs,
2112 unsigned Opc) {
2113
2114 SDValue ZtValue;
2116 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2117 return;
2118
2119 Ops.push_back(ZtValue);
2120 Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)}));
2121 SDLoc DL(Node);
2122 EVT VT = Node->getValueType(0);
2123
2124 SDNode *Instruction =
2125 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2126 SDValue SuperReg = SDValue(Instruction, 0);
2127
2128 for (unsigned I = 0; I < NumOutVecs; ++I)
2129 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2130 AArch64::zsub0 + I, DL, VT, SuperReg));
2131
2132 // Copy chain
2133 unsigned ChainIdx = NumOutVecs;
2134 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2135 CurDAG->RemoveDeadNode(Node);
2136}
2137
2138void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2139 unsigned Op) {
2140 SDLoc DL(N);
2141 EVT VT = N->getValueType(0);
2142
2143 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2144 SDValue Zd = createZMulTuple(Regs);
2145 SDValue Zn = N->getOperand(1 + NumVecs);
2146 SDValue Zm = N->getOperand(2 + NumVecs);
2147
2148 SDValue Ops[] = {Zd, Zn, Zm};
2149
2150 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2151 SDValue SuperReg = SDValue(Intrinsic, 0);
2152 for (unsigned i = 0; i < NumVecs; ++i)
2153 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2154 AArch64::zsub0 + i, DL, VT, SuperReg));
2155
2156 CurDAG->RemoveDeadNode(N);
2157}
2158
2159bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2160 switch (BaseReg) {
2161 default:
2162 return false;
2163 case AArch64::ZA:
2164 case AArch64::ZAB0:
2165 if (TileNum == 0)
2166 break;
2167 return false;
2168 case AArch64::ZAH0:
2169 if (TileNum <= 1)
2170 break;
2171 return false;
2172 case AArch64::ZAS0:
2173 if (TileNum <= 3)
2174 break;
2175 return false;
2176 case AArch64::ZAD0:
2177 if (TileNum <= 7)
2178 break;
2179 return false;
2180 }
2181
2182 BaseReg += TileNum;
2183 return true;
2184}
2185
2186template <unsigned MaxIdx, unsigned Scale>
2187void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2188 unsigned BaseReg, unsigned Op) {
2189 unsigned TileNum = 0;
2190 if (BaseReg != AArch64::ZA)
2191 TileNum = N->getConstantOperandVal(2);
2192
2193 if (!SelectSMETile(BaseReg, TileNum))
2194 return;
2195
2196 SDValue SliceBase, Base, Offset;
2197 if (BaseReg == AArch64::ZA)
2198 SliceBase = N->getOperand(2);
2199 else
2200 SliceBase = N->getOperand(3);
2201
2202 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2203 return;
2204
2205 SDLoc DL(N);
2206 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2207 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2208 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2209
2210 EVT VT = N->getValueType(0);
2211 for (unsigned I = 0; I < NumVecs; ++I)
2212 ReplaceUses(SDValue(N, I),
2213 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2214 SDValue(Mov, 0)));
2215 // Copy chain
2216 unsigned ChainIdx = NumVecs;
2217 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2218 CurDAG->RemoveDeadNode(N);
2219}
2220
2221void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2222 unsigned Op, unsigned MaxIdx,
2223 unsigned Scale, unsigned BaseReg) {
2224 // Slice can be in different positions
2225 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2226 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2227 SDValue SliceBase = N->getOperand(2);
2228 if (BaseReg != AArch64::ZA)
2229 SliceBase = N->getOperand(3);
2230
2232 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2233 return;
2234 // The correct Za tile number is computed in Machine Instruction
2235 // See EmitZAInstr
2236 // DAG cannot select Za tile as an output register with ZReg
2237 SDLoc DL(N);
2239 if (BaseReg != AArch64::ZA )
2240 Ops.push_back(N->getOperand(2));
2241 Ops.push_back(Base);
2242 Ops.push_back(Offset);
2243 Ops.push_back(N->getOperand(0)); //Chain
2244 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2245
2246 EVT VT = N->getValueType(0);
2247 for (unsigned I = 0; I < NumVecs; ++I)
2248 ReplaceUses(SDValue(N, I),
2249 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2250 SDValue(Mov, 0)));
2251
2252 // Copy chain
2253 unsigned ChainIdx = NumVecs;
2254 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2255 CurDAG->RemoveDeadNode(N);
2256}
2257
2258void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2259 unsigned NumOutVecs,
2260 bool IsTupleInput,
2261 unsigned Opc) {
2262 SDLoc DL(N);
2263 EVT VT = N->getValueType(0);
2264 unsigned NumInVecs = N->getNumOperands() - 1;
2265
2267 if (IsTupleInput) {
2268 assert((NumInVecs == 2 || NumInVecs == 4) &&
2269 "Don't know how to handle multi-register input!");
2270 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2271 Ops.push_back(createZMulTuple(Regs));
2272 } else {
2273 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2274 for (unsigned I = 0; I < NumInVecs; I++)
2275 Ops.push_back(N->getOperand(1 + I));
2276 }
2277
2278 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2279 SDValue SuperReg = SDValue(Res, 0);
2280
2281 for (unsigned I = 0; I < NumOutVecs; I++)
2282 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2283 AArch64::zsub0 + I, DL, VT, SuperReg));
2284 CurDAG->RemoveDeadNode(N);
2285}
2286
2287void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2288 unsigned Opc) {
2289 SDLoc dl(N);
2290 EVT VT = N->getOperand(2)->getValueType(0);
2291
2292 // Form a REG_SEQUENCE to force register allocation.
2293 bool Is128Bit = VT.getSizeInBits() == 128;
2294 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2295 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2296
2297 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2298 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2299
2300 // Transfer memoperands.
2301 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2302 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2303
2304 ReplaceNode(N, St);
2305}
2306
2307void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2308 unsigned Scale, unsigned Opc_rr,
2309 unsigned Opc_ri) {
2310 SDLoc dl(N);
2311
2312 // Form a REG_SEQUENCE to force register allocation.
2313 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2314 SDValue RegSeq = createZTuple(Regs);
2315
2316 // Optimize addressing mode.
2317 unsigned Opc;
2319 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2320 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2321 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2322
2323 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2324 Base, // address
2325 Offset, // offset
2326 N->getOperand(0)}; // chain
2327 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2328
2329 ReplaceNode(N, St);
2330}
2331
2332bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2333 SDValue &OffImm) {
2334 SDLoc dl(N);
2335 const DataLayout &DL = CurDAG->getDataLayout();
2336 const TargetLowering *TLI = getTargetLowering();
2337
2338 // Try to match it for the frame address
2339 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2340 int FI = FINode->getIndex();
2341 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2342 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2343 return true;
2344 }
2345
2346 return false;
2347}
2348
2349void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2350 unsigned Opc) {
2351 SDLoc dl(N);
2352 EVT VT = N->getOperand(2)->getValueType(0);
2353 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2354 MVT::Other}; // Type for the Chain
2355
2356 // Form a REG_SEQUENCE to force register allocation.
2357 bool Is128Bit = VT.getSizeInBits() == 128;
2358 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2359 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2360
2361 SDValue Ops[] = {RegSeq,
2362 N->getOperand(NumVecs + 1), // base register
2363 N->getOperand(NumVecs + 2), // Incremental
2364 N->getOperand(0)}; // Chain
2365 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2366
2367 ReplaceNode(N, St);
2368}
2369
2370namespace {
2371/// WidenVector - Given a value in the V64 register class, produce the
2372/// equivalent value in the V128 register class.
2373class WidenVector {
2374 SelectionDAG &DAG;
2375
2376public:
2377 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2378
2379 SDValue operator()(SDValue V64Reg) {
2380 EVT VT = V64Reg.getValueType();
2381 unsigned NarrowSize = VT.getVectorNumElements();
2382 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2383 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2384 SDLoc DL(V64Reg);
2385
2386 SDValue Undef =
2387 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2388 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2389 }
2390};
2391} // namespace
2392
2393/// NarrowVector - Given a value in the V128 register class, produce the
2394/// equivalent value in the V64 register class.
2396 EVT VT = V128Reg.getValueType();
2397 unsigned WideSize = VT.getVectorNumElements();
2398 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2399 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2400
2401 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2402 V128Reg);
2403}
2404
2405void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2406 unsigned Opc) {
2407 SDLoc dl(N);
2408 EVT VT = N->getValueType(0);
2409 bool Narrow = VT.getSizeInBits() == 64;
2410
2411 // Form a REG_SEQUENCE to force register allocation.
2412 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2413
2414 if (Narrow)
2415 transform(Regs, Regs.begin(),
2416 WidenVector(*CurDAG));
2417
2418 SDValue RegSeq = createQTuple(Regs);
2419
2420 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2421
2422 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2423
2424 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2425 N->getOperand(NumVecs + 3), N->getOperand(0)};
2426 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2427 SDValue SuperReg = SDValue(Ld, 0);
2428
2429 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2430 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2431 AArch64::qsub2, AArch64::qsub3 };
2432 for (unsigned i = 0; i < NumVecs; ++i) {
2433 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2434 if (Narrow)
2435 NV = NarrowVector(NV, *CurDAG);
2436 ReplaceUses(SDValue(N, i), NV);
2437 }
2438
2439 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2440 CurDAG->RemoveDeadNode(N);
2441}
2442
2443void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2444 unsigned Opc) {
2445 SDLoc dl(N);
2446 EVT VT = N->getValueType(0);
2447 bool Narrow = VT.getSizeInBits() == 64;
2448
2449 // Form a REG_SEQUENCE to force register allocation.
2450 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2451
2452 if (Narrow)
2453 transform(Regs, Regs.begin(),
2454 WidenVector(*CurDAG));
2455
2456 SDValue RegSeq = createQTuple(Regs);
2457
2458 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2459 RegSeq->getValueType(0), MVT::Other};
2460
2461 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2462
2463 SDValue Ops[] = {RegSeq,
2464 CurDAG->getTargetConstant(LaneNo, dl,
2465 MVT::i64), // Lane Number
2466 N->getOperand(NumVecs + 2), // Base register
2467 N->getOperand(NumVecs + 3), // Incremental
2468 N->getOperand(0)};
2469 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2470
2471 // Update uses of the write back register
2472 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2473
2474 // Update uses of the vector list
2475 SDValue SuperReg = SDValue(Ld, 1);
2476 if (NumVecs == 1) {
2477 ReplaceUses(SDValue(N, 0),
2478 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2479 } else {
2480 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2481 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2482 AArch64::qsub2, AArch64::qsub3 };
2483 for (unsigned i = 0; i < NumVecs; ++i) {
2484 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2485 SuperReg);
2486 if (Narrow)
2487 NV = NarrowVector(NV, *CurDAG);
2488 ReplaceUses(SDValue(N, i), NV);
2489 }
2490 }
2491
2492 // Update the Chain
2493 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2494 CurDAG->RemoveDeadNode(N);
2495}
2496
2497void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2498 unsigned Opc) {
2499 SDLoc dl(N);
2500 EVT VT = N->getOperand(2)->getValueType(0);
2501 bool Narrow = VT.getSizeInBits() == 64;
2502
2503 // Form a REG_SEQUENCE to force register allocation.
2504 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2505
2506 if (Narrow)
2507 transform(Regs, Regs.begin(),
2508 WidenVector(*CurDAG));
2509
2510 SDValue RegSeq = createQTuple(Regs);
2511
2512 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2513
2514 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2515 N->getOperand(NumVecs + 3), N->getOperand(0)};
2516 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2517
2518 // Transfer memoperands.
2519 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2520 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2521
2522 ReplaceNode(N, St);
2523}
2524
2525void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2526 unsigned Opc) {
2527 SDLoc dl(N);
2528 EVT VT = N->getOperand(2)->getValueType(0);
2529 bool Narrow = VT.getSizeInBits() == 64;
2530
2531 // Form a REG_SEQUENCE to force register allocation.
2532 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2533
2534 if (Narrow)
2535 transform(Regs, Regs.begin(),
2536 WidenVector(*CurDAG));
2537
2538 SDValue RegSeq = createQTuple(Regs);
2539
2540 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2541 MVT::Other};
2542
2543 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2544
2545 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2546 N->getOperand(NumVecs + 2), // Base Register
2547 N->getOperand(NumVecs + 3), // Incremental
2548 N->getOperand(0)};
2549 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2550
2551 // Transfer memoperands.
2552 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2553 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2554
2555 ReplaceNode(N, St);
2556}
2557
2559 unsigned &Opc, SDValue &Opd0,
2560 unsigned &LSB, unsigned &MSB,
2561 unsigned NumberOfIgnoredLowBits,
2562 bool BiggerPattern) {
2563 assert(N->getOpcode() == ISD::AND &&
2564 "N must be a AND operation to call this function");
2565
2566 EVT VT = N->getValueType(0);
2567
2568 // Here we can test the type of VT and return false when the type does not
2569 // match, but since it is done prior to that call in the current context
2570 // we turned that into an assert to avoid redundant code.
2571 assert((VT == MVT::i32 || VT == MVT::i64) &&
2572 "Type checking must have been done before calling this function");
2573
2574 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2575 // changed the AND node to a 32-bit mask operation. We'll have to
2576 // undo that as part of the transform here if we want to catch all
2577 // the opportunities.
2578 // Currently the NumberOfIgnoredLowBits argument helps to recover
2579 // from these situations when matching bigger pattern (bitfield insert).
2580
2581 // For unsigned extracts, check for a shift right and mask
2582 uint64_t AndImm = 0;
2583 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2584 return false;
2585
2586 const SDNode *Op0 = N->getOperand(0).getNode();
2587
2588 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2589 // simplified. Try to undo that
2590 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2591
2592 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2593 if (AndImm & (AndImm + 1))
2594 return false;
2595
2596 bool ClampMSB = false;
2597 uint64_t SrlImm = 0;
2598 // Handle the SRL + ANY_EXTEND case.
2599 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2600 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2601 // Extend the incoming operand of the SRL to 64-bit.
2602 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2603 // Make sure to clamp the MSB so that we preserve the semantics of the
2604 // original operations.
2605 ClampMSB = true;
2606 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2608 SrlImm)) {
2609 // If the shift result was truncated, we can still combine them.
2610 Opd0 = Op0->getOperand(0).getOperand(0);
2611
2612 // Use the type of SRL node.
2613 VT = Opd0->getValueType(0);
2614 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2615 Opd0 = Op0->getOperand(0);
2616 ClampMSB = (VT == MVT::i32);
2617 } else if (BiggerPattern) {
2618 // Let's pretend a 0 shift right has been performed.
2619 // The resulting code will be at least as good as the original one
2620 // plus it may expose more opportunities for bitfield insert pattern.
2621 // FIXME: Currently we limit this to the bigger pattern, because
2622 // some optimizations expect AND and not UBFM.
2623 Opd0 = N->getOperand(0);
2624 } else
2625 return false;
2626
2627 // Bail out on large immediates. This happens when no proper
2628 // combining/constant folding was performed.
2629 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2630 LLVM_DEBUG(
2631 (dbgs() << N
2632 << ": Found large shift immediate, this should not happen\n"));
2633 return false;
2634 }
2635
2636 LSB = SrlImm;
2637 MSB = SrlImm +
2638 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2639 : llvm::countr_one<uint64_t>(AndImm)) -
2640 1;
2641 if (ClampMSB)
2642 // Since we're moving the extend before the right shift operation, we need
2643 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2644 // the zeros which would get shifted in with the original right shift
2645 // operation.
2646 MSB = MSB > 31 ? 31 : MSB;
2647
2648 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2649 return true;
2650}
2651
2653 SDValue &Opd0, unsigned &Immr,
2654 unsigned &Imms) {
2655 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2656
2657 EVT VT = N->getValueType(0);
2658 unsigned BitWidth = VT.getSizeInBits();
2659 assert((VT == MVT::i32 || VT == MVT::i64) &&
2660 "Type checking must have been done before calling this function");
2661
2662 SDValue Op = N->getOperand(0);
2663 if (Op->getOpcode() == ISD::TRUNCATE) {
2664 Op = Op->getOperand(0);
2665 VT = Op->getValueType(0);
2666 BitWidth = VT.getSizeInBits();
2667 }
2668
2669 uint64_t ShiftImm;
2670 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2671 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2672 return false;
2673
2674 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2675 if (ShiftImm + Width > BitWidth)
2676 return false;
2677
2678 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2679 Opd0 = Op.getOperand(0);
2680 Immr = ShiftImm;
2681 Imms = ShiftImm + Width - 1;
2682 return true;
2683}
2684
2686 SDValue &Opd0, unsigned &LSB,
2687 unsigned &MSB) {
2688 // We are looking for the following pattern which basically extracts several
2689 // continuous bits from the source value and places it from the LSB of the
2690 // destination value, all other bits of the destination value or set to zero:
2691 //
2692 // Value2 = AND Value, MaskImm
2693 // SRL Value2, ShiftImm
2694 //
2695 // with MaskImm >> ShiftImm to search for the bit width.
2696 //
2697 // This gets selected into a single UBFM:
2698 //
2699 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2700 //
2701
2702 if (N->getOpcode() != ISD::SRL)
2703 return false;
2704
2705 uint64_t AndMask = 0;
2706 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2707 return false;
2708
2709 Opd0 = N->getOperand(0).getOperand(0);
2710
2711 uint64_t SrlImm = 0;
2712 if (!isIntImmediate(N->getOperand(1), SrlImm))
2713 return false;
2714
2715 // Check whether we really have several bits extract here.
2716 if (!isMask_64(AndMask >> SrlImm))
2717 return false;
2718
2719 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2720 LSB = SrlImm;
2721 MSB = llvm::Log2_64(AndMask);
2722 return true;
2723}
2724
2725static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2726 unsigned &Immr, unsigned &Imms,
2727 bool BiggerPattern) {
2728 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2729 "N must be a SHR/SRA operation to call this function");
2730
2731 EVT VT = N->getValueType(0);
2732
2733 // Here we can test the type of VT and return false when the type does not
2734 // match, but since it is done prior to that call in the current context
2735 // we turned that into an assert to avoid redundant code.
2736 assert((VT == MVT::i32 || VT == MVT::i64) &&
2737 "Type checking must have been done before calling this function");
2738
2739 // Check for AND + SRL doing several bits extract.
2740 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2741 return true;
2742
2743 // We're looking for a shift of a shift.
2744 uint64_t ShlImm = 0;
2745 uint64_t TruncBits = 0;
2746 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2747 Opd0 = N->getOperand(0).getOperand(0);
2748 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2749 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2750 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2751 // be considered as setting high 32 bits as zero. Our strategy here is to
2752 // always generate 64bit UBFM. This consistency will help the CSE pass
2753 // later find more redundancy.
2754 Opd0 = N->getOperand(0).getOperand(0);
2755 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2756 VT = Opd0.getValueType();
2757 assert(VT == MVT::i64 && "the promoted type should be i64");
2758 } else if (BiggerPattern) {
2759 // Let's pretend a 0 shift left has been performed.
2760 // FIXME: Currently we limit this to the bigger pattern case,
2761 // because some optimizations expect AND and not UBFM
2762 Opd0 = N->getOperand(0);
2763 } else
2764 return false;
2765
2766 // Missing combines/constant folding may have left us with strange
2767 // constants.
2768 if (ShlImm >= VT.getSizeInBits()) {
2769 LLVM_DEBUG(
2770 (dbgs() << N
2771 << ": Found large shift immediate, this should not happen\n"));
2772 return false;
2773 }
2774
2775 uint64_t SrlImm = 0;
2776 if (!isIntImmediate(N->getOperand(1), SrlImm))
2777 return false;
2778
2779 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2780 "bad amount in shift node!");
2781 int immr = SrlImm - ShlImm;
2782 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2783 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2784 // SRA requires a signed extraction
2785 if (VT == MVT::i32)
2786 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2787 else
2788 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2789 return true;
2790}
2791
2792bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2793 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2794
2795 EVT VT = N->getValueType(0);
2796 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2797 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2798 return false;
2799
2800 uint64_t ShiftImm;
2801 SDValue Op = N->getOperand(0);
2802 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2803 return false;
2804
2805 SDLoc dl(N);
2806 // Extend the incoming operand of the shift to 64-bits.
2807 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2808 unsigned Immr = ShiftImm;
2809 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2810 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2811 CurDAG->getTargetConstant(Imms, dl, VT)};
2812 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2813 return true;
2814}
2815
2816static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2817 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2818 unsigned NumberOfIgnoredLowBits = 0,
2819 bool BiggerPattern = false) {
2820 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2821 return false;
2822
2823 switch (N->getOpcode()) {
2824 default:
2825 if (!N->isMachineOpcode())
2826 return false;
2827 break;
2828 case ISD::AND:
2829 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2830 NumberOfIgnoredLowBits, BiggerPattern);
2831 case ISD::SRL:
2832 case ISD::SRA:
2833 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2834
2836 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2837 }
2838
2839 unsigned NOpc = N->getMachineOpcode();
2840 switch (NOpc) {
2841 default:
2842 return false;
2843 case AArch64::SBFMWri:
2844 case AArch64::UBFMWri:
2845 case AArch64::SBFMXri:
2846 case AArch64::UBFMXri:
2847 Opc = NOpc;
2848 Opd0 = N->getOperand(0);
2849 Immr = N->getConstantOperandVal(1);
2850 Imms = N->getConstantOperandVal(2);
2851 return true;
2852 }
2853 // Unreachable
2854 return false;
2855}
2856
2857bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2858 unsigned Opc, Immr, Imms;
2859 SDValue Opd0;
2860 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2861 return false;
2862
2863 EVT VT = N->getValueType(0);
2864 SDLoc dl(N);
2865
2866 // If the bit extract operation is 64bit but the original type is 32bit, we
2867 // need to add one EXTRACT_SUBREG.
2868 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2869 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2870 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2871
2872 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2873 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2874 MVT::i32, SDValue(BFM, 0));
2875 ReplaceNode(N, Inner.getNode());
2876 return true;
2877 }
2878
2879 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2880 CurDAG->getTargetConstant(Imms, dl, VT)};
2881 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2882 return true;
2883}
2884
2885/// Does DstMask form a complementary pair with the mask provided by
2886/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2887/// this asks whether DstMask zeroes precisely those bits that will be set by
2888/// the other half.
2889static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2890 unsigned NumberOfIgnoredHighBits, EVT VT) {
2891 assert((VT == MVT::i32 || VT == MVT::i64) &&
2892 "i32 or i64 mask type expected!");
2893 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2894
2895 // Enable implicitTrunc as we're intentionally ignoring high bits.
2896 APInt SignificantDstMask =
2897 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2898 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2899
2900 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2901 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2902}
2903
2904// Look for bits that will be useful for later uses.
2905// A bit is consider useless as soon as it is dropped and never used
2906// before it as been dropped.
2907// E.g., looking for useful bit of x
2908// 1. y = x & 0x7
2909// 2. z = y >> 2
2910// After #1, x useful bits are 0x7, then the useful bits of x, live through
2911// y.
2912// After #2, the useful bits of x are 0x4.
2913// However, if x is used on an unpredictable instruction, then all its bits
2914// are useful.
2915// E.g.
2916// 1. y = x & 0x7
2917// 2. z = y >> 2
2918// 3. str x, [@x]
2919static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2920
2922 unsigned Depth) {
2923 uint64_t Imm =
2924 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2925 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2926 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2927 getUsefulBits(Op, UsefulBits, Depth + 1);
2928}
2929
2931 uint64_t Imm, uint64_t MSB,
2932 unsigned Depth) {
2933 // inherit the bitwidth value
2934 APInt OpUsefulBits(UsefulBits);
2935 OpUsefulBits = 1;
2936
2937 if (MSB >= Imm) {
2938 OpUsefulBits <<= MSB - Imm + 1;
2939 --OpUsefulBits;
2940 // The interesting part will be in the lower part of the result
2941 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2942 // The interesting part was starting at Imm in the argument
2943 OpUsefulBits <<= Imm;
2944 } else {
2945 OpUsefulBits <<= MSB + 1;
2946 --OpUsefulBits;
2947 // The interesting part will be shifted in the result
2948 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2949 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2950 // The interesting part was at zero in the argument
2951 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2952 }
2953
2954 UsefulBits &= OpUsefulBits;
2955}
2956
2957static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2958 unsigned Depth) {
2959 uint64_t Imm =
2960 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2961 uint64_t MSB =
2962 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2963
2964 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2965}
2966
2968 unsigned Depth) {
2969 uint64_t ShiftTypeAndValue =
2970 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2971 APInt Mask(UsefulBits);
2972 Mask.clearAllBits();
2973 Mask.flipAllBits();
2974
2975 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2976 // Shift Left
2977 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2978 Mask <<= ShiftAmt;
2979 getUsefulBits(Op, Mask, Depth + 1);
2980 Mask.lshrInPlace(ShiftAmt);
2981 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2982 // Shift Right
2983 // We do not handle AArch64_AM::ASR, because the sign will change the
2984 // number of useful bits
2985 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2986 Mask.lshrInPlace(ShiftAmt);
2987 getUsefulBits(Op, Mask, Depth + 1);
2988 Mask <<= ShiftAmt;
2989 } else
2990 return;
2991
2992 UsefulBits &= Mask;
2993}
2994
2995static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2996 unsigned Depth) {
2997 uint64_t Imm =
2998 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2999 uint64_t MSB =
3000 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3001
3002 APInt OpUsefulBits(UsefulBits);
3003 OpUsefulBits = 1;
3004
3005 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3006 ResultUsefulBits.flipAllBits();
3007 APInt Mask(UsefulBits.getBitWidth(), 0);
3008
3009 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3010
3011 if (MSB >= Imm) {
3012 // The instruction is a BFXIL.
3013 uint64_t Width = MSB - Imm + 1;
3014 uint64_t LSB = Imm;
3015
3016 OpUsefulBits <<= Width;
3017 --OpUsefulBits;
3018
3019 if (Op.getOperand(1) == Orig) {
3020 // Copy the low bits from the result to bits starting from LSB.
3021 Mask = ResultUsefulBits & OpUsefulBits;
3022 Mask <<= LSB;
3023 }
3024
3025 if (Op.getOperand(0) == Orig)
3026 // Bits starting from LSB in the input contribute to the result.
3027 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3028 } else {
3029 // The instruction is a BFI.
3030 uint64_t Width = MSB + 1;
3031 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3032
3033 OpUsefulBits <<= Width;
3034 --OpUsefulBits;
3035 OpUsefulBits <<= LSB;
3036
3037 if (Op.getOperand(1) == Orig) {
3038 // Copy the bits from the result to the zero bits.
3039 Mask = ResultUsefulBits & OpUsefulBits;
3040 Mask.lshrInPlace(LSB);
3041 }
3042
3043 if (Op.getOperand(0) == Orig)
3044 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3045 }
3046
3047 UsefulBits &= Mask;
3048}
3049
3050static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3051 SDValue Orig, unsigned Depth) {
3052
3053 // Users of this node should have already been instruction selected
3054 // FIXME: Can we turn that into an assert?
3055 if (!UserNode->isMachineOpcode())
3056 return;
3057
3058 switch (UserNode->getMachineOpcode()) {
3059 default:
3060 return;
3061 case AArch64::ANDSWri:
3062 case AArch64::ANDSXri:
3063 case AArch64::ANDWri:
3064 case AArch64::ANDXri:
3065 // We increment Depth only when we call the getUsefulBits
3066 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3067 Depth);
3068 case AArch64::UBFMWri:
3069 case AArch64::UBFMXri:
3070 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3071
3072 case AArch64::ORRWrs:
3073 case AArch64::ORRXrs:
3074 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3075 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3076 Depth);
3077 return;
3078 case AArch64::BFMWri:
3079 case AArch64::BFMXri:
3080 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3081
3082 case AArch64::STRBBui:
3083 case AArch64::STURBBi:
3084 if (UserNode->getOperand(0) != Orig)
3085 return;
3086 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3087 return;
3088
3089 case AArch64::STRHHui:
3090 case AArch64::STURHHi:
3091 if (UserNode->getOperand(0) != Orig)
3092 return;
3093 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3094 return;
3095 }
3096}
3097
3098static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3100 return;
3101 // Initialize UsefulBits
3102 if (!Depth) {
3103 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3104 // At the beginning, assume every produced bits is useful
3105 UsefulBits = APInt(Bitwidth, 0);
3106 UsefulBits.flipAllBits();
3107 }
3108 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3109
3110 for (SDNode *Node : Op.getNode()->users()) {
3111 // A use cannot produce useful bits
3112 APInt UsefulBitsForUse = APInt(UsefulBits);
3113 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3114 UsersUsefulBits |= UsefulBitsForUse;
3115 }
3116 // UsefulBits contains the produced bits that are meaningful for the
3117 // current definition, thus a user cannot make a bit meaningful at
3118 // this point
3119 UsefulBits &= UsersUsefulBits;
3120}
3121
3122/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3123/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3124/// 0, return Op unchanged.
3125static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3126 if (ShlAmount == 0)
3127 return Op;
3128
3129 EVT VT = Op.getValueType();
3130 SDLoc dl(Op);
3131 unsigned BitWidth = VT.getSizeInBits();
3132 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3133
3134 SDNode *ShiftNode;
3135 if (ShlAmount > 0) {
3136 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3137 ShiftNode = CurDAG->getMachineNode(
3138 UBFMOpc, dl, VT, Op,
3139 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3140 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3141 } else {
3142 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3143 assert(ShlAmount < 0 && "expected right shift");
3144 int ShrAmount = -ShlAmount;
3145 ShiftNode = CurDAG->getMachineNode(
3146 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3147 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3148 }
3149
3150 return SDValue(ShiftNode, 0);
3151}
3152
3153// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3154static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3155 bool BiggerPattern,
3156 const uint64_t NonZeroBits,
3157 SDValue &Src, int &DstLSB,
3158 int &Width);
3159
3160// For bit-field-positioning pattern "shl VAL, N)".
3161static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3162 bool BiggerPattern,
3163 const uint64_t NonZeroBits,
3164 SDValue &Src, int &DstLSB,
3165 int &Width);
3166
3167/// Does this tree qualify as an attempt to move a bitfield into position,
3168/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3170 bool BiggerPattern, SDValue &Src,
3171 int &DstLSB, int &Width) {
3172 EVT VT = Op.getValueType();
3173 unsigned BitWidth = VT.getSizeInBits();
3174 (void)BitWidth;
3175 assert(BitWidth == 32 || BitWidth == 64);
3176
3177 KnownBits Known = CurDAG->computeKnownBits(Op);
3178
3179 // Non-zero in the sense that they're not provably zero, which is the key
3180 // point if we want to use this value
3181 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3182 if (!isShiftedMask_64(NonZeroBits))
3183 return false;
3184
3185 switch (Op.getOpcode()) {
3186 default:
3187 break;
3188 case ISD::AND:
3189 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3190 NonZeroBits, Src, DstLSB, Width);
3191 case ISD::SHL:
3192 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3193 NonZeroBits, Src, DstLSB, Width);
3194 }
3195
3196 return false;
3197}
3198
3200 bool BiggerPattern,
3201 const uint64_t NonZeroBits,
3202 SDValue &Src, int &DstLSB,
3203 int &Width) {
3204 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3205
3206 EVT VT = Op.getValueType();
3207 assert((VT == MVT::i32 || VT == MVT::i64) &&
3208 "Caller guarantees VT is one of i32 or i64");
3209 (void)VT;
3210
3211 uint64_t AndImm;
3212 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3213 return false;
3214
3215 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3216 // 1) (AndImm & (1 << POS) == 0)
3217 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3218 //
3219 // 1) and 2) don't agree so something must be wrong (e.g., in
3220 // 'SelectionDAG::computeKnownBits')
3221 assert((~AndImm & NonZeroBits) == 0 &&
3222 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3223
3224 SDValue AndOp0 = Op.getOperand(0);
3225
3226 uint64_t ShlImm;
3227 SDValue ShlOp0;
3228 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3229 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3230 ShlOp0 = AndOp0.getOperand(0);
3231 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3233 ShlImm)) {
3234 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3235
3236 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3237 SDValue ShlVal = AndOp0.getOperand(0);
3238
3239 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3240 // expect VT to be MVT::i32.
3241 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3242
3243 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3244 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3245 } else
3246 return false;
3247
3248 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3249 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3250 // AndOp0+AND.
3251 if (!BiggerPattern && !AndOp0.hasOneUse())
3252 return false;
3253
3254 DstLSB = llvm::countr_zero(NonZeroBits);
3255 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3256
3257 // Bail out on large Width. This happens when no proper combining / constant
3258 // folding was performed.
3259 if (Width >= (int)VT.getSizeInBits()) {
3260 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3261 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3262 // "val".
3263 // If VT is i32, what Width >= 32 means:
3264 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3265 // demands at least 'Width' bits (after dag-combiner). This together with
3266 // `any_extend` Op (undefined higher bits) indicates missed combination
3267 // when lowering the 'and' IR instruction to an machine IR instruction.
3268 LLVM_DEBUG(
3269 dbgs()
3270 << "Found large Width in bit-field-positioning -- this indicates no "
3271 "proper combining / constant folding was performed\n");
3272 return false;
3273 }
3274
3275 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3276 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3277 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3278 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3279 // which case it is not profitable to insert an extra shift.
3280 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3281 return false;
3282
3283 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3284 return true;
3285}
3286
3287// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3288// UBFIZ.
3290 SDValue &Src, int &DstLSB,
3291 int &Width) {
3292 // Caller should have verified that N is a left shift with constant shift
3293 // amount; asserts that.
3294 assert(Op.getOpcode() == ISD::SHL &&
3295 "Op.getNode() should be a SHL node to call this function");
3296 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3297 "Op.getNode() should shift ShlImm to call this function");
3298
3299 uint64_t AndImm = 0;
3300 SDValue Op0 = Op.getOperand(0);
3301 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3302 return false;
3303
3304 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3305 if (isMask_64(ShiftedAndImm)) {
3306 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3307 // should end with Mask, and could be prefixed with random bits if those
3308 // bits are shifted out.
3309 //
3310 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3311 // the AND result corresponding to those bits are shifted out, so it's fine
3312 // to not extract them.
3313 Width = llvm::countr_one(ShiftedAndImm);
3314 DstLSB = ShlImm;
3315 Src = Op0.getOperand(0);
3316 return true;
3317 }
3318 return false;
3319}
3320
3322 bool BiggerPattern,
3323 const uint64_t NonZeroBits,
3324 SDValue &Src, int &DstLSB,
3325 int &Width) {
3326 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3327
3328 EVT VT = Op.getValueType();
3329 assert((VT == MVT::i32 || VT == MVT::i64) &&
3330 "Caller guarantees that type is i32 or i64");
3331 (void)VT;
3332
3333 uint64_t ShlImm;
3334 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3335 return false;
3336
3337 if (!BiggerPattern && !Op.hasOneUse())
3338 return false;
3339
3340 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3341 return true;
3342
3343 DstLSB = llvm::countr_zero(NonZeroBits);
3344 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3345
3346 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3347 return false;
3348
3349 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3350 return true;
3351}
3352
3353static bool isShiftedMask(uint64_t Mask, EVT VT) {
3354 assert(VT == MVT::i32 || VT == MVT::i64);
3355 if (VT == MVT::i32)
3356 return isShiftedMask_32(Mask);
3357 return isShiftedMask_64(Mask);
3358}
3359
3360// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3361// inserted only sets known zero bits.
3363 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3364
3365 EVT VT = N->getValueType(0);
3366 if (VT != MVT::i32 && VT != MVT::i64)
3367 return false;
3368
3369 unsigned BitWidth = VT.getSizeInBits();
3370
3371 uint64_t OrImm;
3372 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3373 return false;
3374
3375 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3376 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3377 // performance neutral.
3379 return false;
3380
3381 uint64_t MaskImm;
3382 SDValue And = N->getOperand(0);
3383 // Must be a single use AND with an immediate operand.
3384 if (!And.hasOneUse() ||
3385 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3386 return false;
3387
3388 // Compute the Known Zero for the AND as this allows us to catch more general
3389 // cases than just looking for AND with imm.
3390 KnownBits Known = CurDAG->computeKnownBits(And);
3391
3392 // Non-zero in the sense that they're not provably zero, which is the key
3393 // point if we want to use this value.
3394 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3395
3396 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3397 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3398 return false;
3399
3400 // The bits being inserted must only set those bits that are known to be zero.
3401 if ((OrImm & NotKnownZero) != 0) {
3402 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3403 // currently handle this case.
3404 return false;
3405 }
3406
3407 // BFI/BFXIL dst, src, #lsb, #width.
3408 int LSB = llvm::countr_one(NotKnownZero);
3409 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3410
3411 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3412 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3413 unsigned ImmS = Width - 1;
3414
3415 // If we're creating a BFI instruction avoid cases where we need more
3416 // instructions to materialize the BFI constant as compared to the original
3417 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3418 // should be no worse in this case.
3419 bool IsBFI = LSB != 0;
3420 uint64_t BFIImm = OrImm >> LSB;
3421 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3422 // We have a BFI instruction and we know the constant can't be materialized
3423 // with a ORR-immediate with the zero register.
3424 unsigned OrChunks = 0, BFIChunks = 0;
3425 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3426 if (((OrImm >> Shift) & 0xFFFF) != 0)
3427 ++OrChunks;
3428 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3429 ++BFIChunks;
3430 }
3431 if (BFIChunks > OrChunks)
3432 return false;
3433 }
3434
3435 // Materialize the constant to be inserted.
3436 SDLoc DL(N);
3437 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3438 SDNode *MOVI = CurDAG->getMachineNode(
3439 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3440
3441 // Create the BFI/BFXIL instruction.
3442 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3443 CurDAG->getTargetConstant(ImmR, DL, VT),
3444 CurDAG->getTargetConstant(ImmS, DL, VT)};
3445 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3446 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3447 return true;
3448}
3449
3451 SDValue &ShiftedOperand,
3452 uint64_t &EncodedShiftImm) {
3453 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3454 if (!Dst.hasOneUse())
3455 return false;
3456
3457 EVT VT = Dst.getValueType();
3458 assert((VT == MVT::i32 || VT == MVT::i64) &&
3459 "Caller should guarantee that VT is one of i32 or i64");
3460 const unsigned SizeInBits = VT.getSizeInBits();
3461
3462 SDLoc DL(Dst.getNode());
3463 uint64_t AndImm, ShlImm;
3464 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3465 isShiftedMask_64(AndImm)) {
3466 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3467 SDValue DstOp0 = Dst.getOperand(0);
3468 if (!DstOp0.hasOneUse())
3469 return false;
3470
3471 // An example to illustrate the transformation
3472 // From:
3473 // lsr x8, x1, #1
3474 // and x8, x8, #0x3f80
3475 // bfxil x8, x1, #0, #7
3476 // To:
3477 // and x8, x23, #0x7f
3478 // ubfx x9, x23, #8, #7
3479 // orr x23, x8, x9, lsl #7
3480 //
3481 // The number of instructions remains the same, but ORR is faster than BFXIL
3482 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3483 // the dependency chain is improved after the transformation.
3484 uint64_t SrlImm;
3485 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3486 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3487 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3488 unsigned MaskWidth =
3489 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3490 unsigned UBFMOpc =
3491 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3492 SDNode *UBFMNode = CurDAG->getMachineNode(
3493 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3494 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3495 VT),
3496 CurDAG->getTargetConstant(
3497 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3498 ShiftedOperand = SDValue(UBFMNode, 0);
3499 EncodedShiftImm = AArch64_AM::getShifterImm(
3500 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3501 return true;
3502 }
3503 }
3504 return false;
3505 }
3506
3507 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3508 ShiftedOperand = Dst.getOperand(0);
3509 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3510 return true;
3511 }
3512
3513 uint64_t SrlImm;
3514 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3515 ShiftedOperand = Dst.getOperand(0);
3516 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3517 return true;
3518 }
3519 return false;
3520}
3521
3522// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3523// the operands and select it to AArch64::ORR with shifted registers if
3524// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3525static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3526 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3527 const bool BiggerPattern) {
3528 EVT VT = N->getValueType(0);
3529 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3530 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3531 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3532 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3533 assert((VT == MVT::i32 || VT == MVT::i64) &&
3534 "Expect result type to be i32 or i64 since N is combinable to BFM");
3535 SDLoc DL(N);
3536
3537 // Bail out if BFM simplifies away one node in BFM Dst.
3538 if (OrOpd1 != Dst)
3539 return false;
3540
3541 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3542 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3543 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3544 if (BiggerPattern) {
3545 uint64_t SrcAndImm;
3546 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3547 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3548 // OrOpd0 = AND Src, #Mask
3549 // So BFM simplifies away one AND node from Src and doesn't simplify away
3550 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3551 // one node (from Rd), ORR is better since it has higher throughput and
3552 // smaller latency than BFM on many AArch64 processors (and for the rest
3553 // ORR is at least as good as BFM).
3554 SDValue ShiftedOperand;
3555 uint64_t EncodedShiftImm;
3556 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3557 EncodedShiftImm)) {
3558 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3559 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3560 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3561 return true;
3562 }
3563 }
3564 return false;
3565 }
3566
3567 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3568
3569 uint64_t ShlImm;
3570 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3571 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3572 SDValue Ops[] = {
3573 Dst, Src,
3574 CurDAG->getTargetConstant(
3576 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3577 return true;
3578 }
3579
3580 // Select the following pattern to left-shifted operand rather than BFI.
3581 // %val1 = op ..
3582 // %val2 = shl %val1, #imm
3583 // %res = or %val1, %val2
3584 //
3585 // If N is selected to be BFI, we know that
3586 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3587 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3588 //
3589 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3590 if (OrOpd0.getOperand(0) == OrOpd1) {
3591 SDValue Ops[] = {
3592 OrOpd1, OrOpd1,
3593 CurDAG->getTargetConstant(
3595 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3596 return true;
3597 }
3598 }
3599
3600 uint64_t SrlImm;
3601 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3602 // Select the following pattern to right-shifted operand rather than BFXIL.
3603 // %val1 = op ..
3604 // %val2 = lshr %val1, #imm
3605 // %res = or %val1, %val2
3606 //
3607 // If N is selected to be BFXIL, we know that
3608 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3609 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3610 //
3611 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3612 if (OrOpd0.getOperand(0) == OrOpd1) {
3613 SDValue Ops[] = {
3614 OrOpd1, OrOpd1,
3615 CurDAG->getTargetConstant(
3617 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3618 return true;
3619 }
3620 }
3621
3622 return false;
3623}
3624
3625static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3626 SelectionDAG *CurDAG) {
3627 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3628
3629 EVT VT = N->getValueType(0);
3630 if (VT != MVT::i32 && VT != MVT::i64)
3631 return false;
3632
3633 unsigned BitWidth = VT.getSizeInBits();
3634
3635 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3636 // have the expected shape. Try to undo that.
3637
3638 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3639 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3640
3641 // Given a OR operation, check if we have the following pattern
3642 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3643 // isBitfieldExtractOp)
3644 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3645 // countTrailingZeros(mask2) == imm2 - imm + 1
3646 // f = d | c
3647 // if yes, replace the OR instruction with:
3648 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3649
3650 // OR is commutative, check all combinations of operand order and values of
3651 // BiggerPattern, i.e.
3652 // Opd0, Opd1, BiggerPattern=false
3653 // Opd1, Opd0, BiggerPattern=false
3654 // Opd0, Opd1, BiggerPattern=true
3655 // Opd1, Opd0, BiggerPattern=true
3656 // Several of these combinations may match, so check with BiggerPattern=false
3657 // first since that will produce better results by matching more instructions
3658 // and/or inserting fewer extra instructions.
3659 for (int I = 0; I < 4; ++I) {
3660
3661 SDValue Dst, Src;
3662 unsigned ImmR, ImmS;
3663 bool BiggerPattern = I / 2;
3664 SDValue OrOpd0Val = N->getOperand(I % 2);
3665 SDNode *OrOpd0 = OrOpd0Val.getNode();
3666 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3667 SDNode *OrOpd1 = OrOpd1Val.getNode();
3668
3669 unsigned BFXOpc;
3670 int DstLSB, Width;
3671 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3672 NumberOfIgnoredLowBits, BiggerPattern)) {
3673 // Check that the returned opcode is compatible with the pattern,
3674 // i.e., same type and zero extended (U and not S)
3675 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3676 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3677 continue;
3678
3679 // Compute the width of the bitfield insertion
3680 DstLSB = 0;
3681 Width = ImmS - ImmR + 1;
3682 // FIXME: This constraint is to catch bitfield insertion we may
3683 // want to widen the pattern if we want to grab general bitfield
3684 // move case
3685 if (Width <= 0)
3686 continue;
3687
3688 // If the mask on the insertee is correct, we have a BFXIL operation. We
3689 // can share the ImmR and ImmS values from the already-computed UBFM.
3690 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3691 BiggerPattern,
3692 Src, DstLSB, Width)) {
3693 ImmR = (BitWidth - DstLSB) % BitWidth;
3694 ImmS = Width - 1;
3695 } else
3696 continue;
3697
3698 // Check the second part of the pattern
3699 EVT VT = OrOpd1Val.getValueType();
3700 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3701
3702 // Compute the Known Zero for the candidate of the first operand.
3703 // This allows to catch more general case than just looking for
3704 // AND with imm. Indeed, simplify-demanded-bits may have removed
3705 // the AND instruction because it proves it was useless.
3706 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3707
3708 // Check if there is enough room for the second operand to appear
3709 // in the first one
3710 APInt BitsToBeInserted =
3711 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3712
3713 if ((BitsToBeInserted & ~Known.Zero) != 0)
3714 continue;
3715
3716 // Set the first operand
3717 uint64_t Imm;
3718 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3719 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3720 // In that case, we can eliminate the AND
3721 Dst = OrOpd1->getOperand(0);
3722 else
3723 // Maybe the AND has been removed by simplify-demanded-bits
3724 // or is useful because it discards more bits
3725 Dst = OrOpd1Val;
3726
3727 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3728 // with shifted operand is more efficient.
3729 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3730 BiggerPattern))
3731 return true;
3732
3733 // both parts match
3734 SDLoc DL(N);
3735 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3736 CurDAG->getTargetConstant(ImmS, DL, VT)};
3737 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3738 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3739 return true;
3740 }
3741
3742 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3743 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3744 // mask (e.g., 0x000ffff0).
3745 uint64_t Mask0Imm, Mask1Imm;
3746 SDValue And0 = N->getOperand(0);
3747 SDValue And1 = N->getOperand(1);
3748 if (And0.hasOneUse() && And1.hasOneUse() &&
3749 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3750 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3751 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3752 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3753
3754 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3755 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3756 // bits to be inserted.
3757 if (isShiftedMask(Mask0Imm, VT)) {
3758 std::swap(And0, And1);
3759 std::swap(Mask0Imm, Mask1Imm);
3760 }
3761
3762 SDValue Src = And1->getOperand(0);
3763 SDValue Dst = And0->getOperand(0);
3764 unsigned LSB = llvm::countr_zero(Mask1Imm);
3765 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3766
3767 // The BFXIL inserts the low-order bits from a source register, so right
3768 // shift the needed bits into place.
3769 SDLoc DL(N);
3770 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3771 uint64_t LsrImm = LSB;
3772 if (Src->hasOneUse() &&
3773 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3774 (LsrImm + LSB) < BitWidth) {
3775 Src = Src->getOperand(0);
3776 LsrImm += LSB;
3777 }
3778
3779 SDNode *LSR = CurDAG->getMachineNode(
3780 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3781 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3782
3783 // BFXIL is an alias of BFM, so translate to BFM operands.
3784 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3785 unsigned ImmS = Width - 1;
3786
3787 // Create the BFXIL instruction.
3788 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3789 CurDAG->getTargetConstant(ImmR, DL, VT),
3790 CurDAG->getTargetConstant(ImmS, DL, VT)};
3791 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3792 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3793 return true;
3794 }
3795
3796 return false;
3797}
3798
3799bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3800 if (N->getOpcode() != ISD::OR)
3801 return false;
3802
3803 APInt NUsefulBits;
3804 getUsefulBits(SDValue(N, 0), NUsefulBits);
3805
3806 // If all bits are not useful, just return UNDEF.
3807 if (!NUsefulBits) {
3808 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3809 return true;
3810 }
3811
3812 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3813 return true;
3814
3815 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3816}
3817
3818/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3819/// equivalent of a left shift by a constant amount followed by an and masking
3820/// out a contiguous set of bits.
3821bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3822 if (N->getOpcode() != ISD::AND)
3823 return false;
3824
3825 EVT VT = N->getValueType(0);
3826 if (VT != MVT::i32 && VT != MVT::i64)
3827 return false;
3828
3829 SDValue Op0;
3830 int DstLSB, Width;
3831 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3832 Op0, DstLSB, Width))
3833 return false;
3834
3835 // ImmR is the rotate right amount.
3836 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3837 // ImmS is the most significant bit of the source to be moved.
3838 unsigned ImmS = Width - 1;
3839
3840 SDLoc DL(N);
3841 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3842 CurDAG->getTargetConstant(ImmS, DL, VT)};
3843 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3844 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3845 return true;
3846}
3847
3848/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3849/// variable shift/rotate instructions.
3850bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3851 EVT VT = N->getValueType(0);
3852
3853 unsigned Opc;
3854 switch (N->getOpcode()) {
3855 case ISD::ROTR:
3856 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3857 break;
3858 case ISD::SHL:
3859 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3860 break;
3861 case ISD::SRL:
3862 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3863 break;
3864 case ISD::SRA:
3865 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3866 break;
3867 default:
3868 return false;
3869 }
3870
3871 uint64_t Size;
3872 uint64_t Bits;
3873 if (VT == MVT::i32) {
3874 Bits = 5;
3875 Size = 32;
3876 } else if (VT == MVT::i64) {
3877 Bits = 6;
3878 Size = 64;
3879 } else
3880 return false;
3881
3882 SDValue ShiftAmt = N->getOperand(1);
3883 SDLoc DL(N);
3884 SDValue NewShiftAmt;
3885
3886 // Skip over an extend of the shift amount.
3887 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3888 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3889 ShiftAmt = ShiftAmt->getOperand(0);
3890
3891 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3892 SDValue Add0 = ShiftAmt->getOperand(0);
3893 SDValue Add1 = ShiftAmt->getOperand(1);
3894 uint64_t Add0Imm;
3895 uint64_t Add1Imm;
3896 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3897 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3898 // to avoid the ADD/SUB.
3899 NewShiftAmt = Add0;
3900 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3901 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3902 (Add0Imm % Size == 0)) {
3903 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3904 // to generate a NEG instead of a SUB from a constant.
3905 unsigned NegOpc;
3906 unsigned ZeroReg;
3907 EVT SubVT = ShiftAmt->getValueType(0);
3908 if (SubVT == MVT::i32) {
3909 NegOpc = AArch64::SUBWrr;
3910 ZeroReg = AArch64::WZR;
3911 } else {
3912 assert(SubVT == MVT::i64);
3913 NegOpc = AArch64::SUBXrr;
3914 ZeroReg = AArch64::XZR;
3915 }
3916 SDValue Zero =
3917 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3918 MachineSDNode *Neg =
3919 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3920 NewShiftAmt = SDValue(Neg, 0);
3921 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3922 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3923 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3924 // to generate a NOT instead of a SUB from a constant.
3925 unsigned NotOpc;
3926 unsigned ZeroReg;
3927 EVT SubVT = ShiftAmt->getValueType(0);
3928 if (SubVT == MVT::i32) {
3929 NotOpc = AArch64::ORNWrr;
3930 ZeroReg = AArch64::WZR;
3931 } else {
3932 assert(SubVT == MVT::i64);
3933 NotOpc = AArch64::ORNXrr;
3934 ZeroReg = AArch64::XZR;
3935 }
3936 SDValue Zero =
3937 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3938 MachineSDNode *Not =
3939 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3940 NewShiftAmt = SDValue(Not, 0);
3941 } else
3942 return false;
3943 } else {
3944 // If the shift amount is masked with an AND, check that the mask covers the
3945 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3946 // the AND.
3947 uint64_t MaskImm;
3948 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3949 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3950 return false;
3951
3952 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3953 return false;
3954
3955 NewShiftAmt = ShiftAmt->getOperand(0);
3956 }
3957
3958 // Narrow/widen the shift amount to match the size of the shift operation.
3959 if (VT == MVT::i32)
3960 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3961 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3962 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3963 MachineSDNode *Ext = CurDAG->getMachineNode(
3964 AArch64::SUBREG_TO_REG, DL, VT,
3965 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3966 NewShiftAmt = SDValue(Ext, 0);
3967 }
3968
3969 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3970 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3971 return true;
3972}
3973
3975 SDValue &FixedPos,
3976 unsigned RegWidth,
3977 bool isReciprocal) {
3978 APFloat FVal(0.0);
3980 FVal = CN->getValueAPF();
3981 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3982 // Some otherwise illegal constants are allowed in this case.
3983 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3984 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3985 return false;
3986
3987 ConstantPoolSDNode *CN =
3988 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3989 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3990 } else
3991 return false;
3992
3993 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3994 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3995 // x-register.
3996 //
3997 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3998 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3999 // integers.
4000 bool IsExact;
4001
4002 if (isReciprocal)
4003 if (!FVal.getExactInverse(&FVal))
4004 return false;
4005
4006 // fbits is between 1 and 64 in the worst-case, which means the fmul
4007 // could have 2^64 as an actual operand. Need 65 bits of precision.
4008 APSInt IntVal(65, true);
4009 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
4010
4011 // N.b. isPowerOf2 also checks for > 0.
4012 if (!IsExact || !IntVal.isPowerOf2())
4013 return false;
4014 unsigned FBits = IntVal.logBase2();
4015
4016 // Checks above should have guaranteed that we haven't lost information in
4017 // finding FBits, but it must still be in range.
4018 if (FBits == 0 || FBits > RegWidth) return false;
4019
4020 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4021 return true;
4022}
4023
4024bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4025 unsigned RegWidth) {
4026 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4027 false);
4028}
4029
4030bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4031 SDValue &FixedPos,
4032 unsigned RegWidth) {
4033 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4034 true);
4035}
4036
4037// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4038// of the string and obtains the integer values from them and combines these
4039// into a single value to be used in the MRS/MSR instruction.
4042 RegString.split(Fields, ':');
4043
4044 if (Fields.size() == 1)
4045 return -1;
4046
4047 assert(Fields.size() == 5
4048 && "Invalid number of fields in read register string");
4049
4051 bool AllIntFields = true;
4052
4053 for (StringRef Field : Fields) {
4054 unsigned IntField;
4055 AllIntFields &= !Field.getAsInteger(10, IntField);
4056 Ops.push_back(IntField);
4057 }
4058
4059 assert(AllIntFields &&
4060 "Unexpected non-integer value in special register string.");
4061 (void)AllIntFields;
4062
4063 // Need to combine the integer fields of the string into a single value
4064 // based on the bit encoding of MRS/MSR instruction.
4065 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4066 (Ops[3] << 3) | (Ops[4]);
4067}
4068
4069// Lower the read_register intrinsic to an MRS instruction node if the special
4070// register string argument is either of the form detailed in the ALCE (the
4071// form described in getIntOperandsFromRegisterString) or is a named register
4072// known by the MRS SysReg mapper.
4073bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4074 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4075 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4076 SDLoc DL(N);
4077
4078 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4079
4080 unsigned Opcode64Bit = AArch64::MRS;
4081 int Imm = getIntOperandFromRegisterString(RegString->getString());
4082 if (Imm == -1) {
4083 // No match, Use the sysreg mapper to map the remaining possible strings to
4084 // the value for the register to be used for the instruction operand.
4085 const auto *TheReg =
4086 AArch64SysReg::lookupSysRegByName(RegString->getString());
4087 if (TheReg && TheReg->Readable &&
4088 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4089 Imm = TheReg->Encoding;
4090 else
4091 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4092
4093 if (Imm == -1) {
4094 // Still no match, see if this is "pc" or give up.
4095 if (!ReadIs128Bit && RegString->getString() == "pc") {
4096 Opcode64Bit = AArch64::ADR;
4097 Imm = 0;
4098 } else {
4099 return false;
4100 }
4101 }
4102 }
4103
4104 SDValue InChain = N->getOperand(0);
4105 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4106 if (!ReadIs128Bit) {
4107 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4108 {SysRegImm, InChain});
4109 } else {
4110 SDNode *MRRS = CurDAG->getMachineNode(
4111 AArch64::MRRS, DL,
4112 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4113 {SysRegImm, InChain});
4114
4115 // Sysregs are not endian. The even register always contains the low half
4116 // of the register.
4117 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4118 SDValue(MRRS, 0));
4119 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4120 SDValue(MRRS, 0));
4121 SDValue OutChain = SDValue(MRRS, 1);
4122
4123 ReplaceUses(SDValue(N, 0), Lo);
4124 ReplaceUses(SDValue(N, 1), Hi);
4125 ReplaceUses(SDValue(N, 2), OutChain);
4126 };
4127 return true;
4128}
4129
4130// Lower the write_register intrinsic to an MSR instruction node if the special
4131// register string argument is either of the form detailed in the ALCE (the
4132// form described in getIntOperandsFromRegisterString) or is a named register
4133// known by the MSR SysReg mapper.
4134bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4135 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4136 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4137 SDLoc DL(N);
4138
4139 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4140
4141 if (!WriteIs128Bit) {
4142 // Check if the register was one of those allowed as the pstatefield value
4143 // in the MSR (immediate) instruction. To accept the values allowed in the
4144 // pstatefield for the MSR (immediate) instruction, we also require that an
4145 // immediate value has been provided as an argument, we know that this is
4146 // the case as it has been ensured by semantic checking.
4147 auto trySelectPState = [&](auto PMapper, unsigned State) {
4148 if (PMapper) {
4149 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4150 "Expected a constant integer expression.");
4151 unsigned Reg = PMapper->Encoding;
4152 uint64_t Immed = N->getConstantOperandVal(2);
4153 CurDAG->SelectNodeTo(
4154 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4155 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4156 return true;
4157 }
4158 return false;
4159 };
4160
4161 if (trySelectPState(
4162 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4163 AArch64::MSRpstateImm4))
4164 return true;
4165 if (trySelectPState(
4166 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4167 AArch64::MSRpstateImm1))
4168 return true;
4169 }
4170
4171 int Imm = getIntOperandFromRegisterString(RegString->getString());
4172 if (Imm == -1) {
4173 // Use the sysreg mapper to attempt to map the remaining possible strings
4174 // to the value for the register to be used for the MSR (register)
4175 // instruction operand.
4176 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4177 if (TheReg && TheReg->Writeable &&
4178 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4179 Imm = TheReg->Encoding;
4180 else
4181 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4182
4183 if (Imm == -1)
4184 return false;
4185 }
4186
4187 SDValue InChain = N->getOperand(0);
4188 if (!WriteIs128Bit) {
4189 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4190 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4191 N->getOperand(2), InChain);
4192 } else {
4193 // No endian swap. The lower half always goes into the even subreg, and the
4194 // higher half always into the odd supreg.
4195 SDNode *Pair = CurDAG->getMachineNode(
4196 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4197 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4198 MVT::i32),
4199 N->getOperand(2),
4200 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4201 N->getOperand(3),
4202 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4203
4204 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4205 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4206 SDValue(Pair, 0), InChain);
4207 }
4208
4209 return true;
4210}
4211
4212/// We've got special pseudo-instructions for these
4213bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4214 unsigned Opcode;
4215 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4216
4217 // Leave IR for LSE if subtarget supports it.
4218 if (Subtarget->hasLSE()) return false;
4219
4220 if (MemTy == MVT::i8)
4221 Opcode = AArch64::CMP_SWAP_8;
4222 else if (MemTy == MVT::i16)
4223 Opcode = AArch64::CMP_SWAP_16;
4224 else if (MemTy == MVT::i32)
4225 Opcode = AArch64::CMP_SWAP_32;
4226 else if (MemTy == MVT::i64)
4227 Opcode = AArch64::CMP_SWAP_64;
4228 else
4229 llvm_unreachable("Unknown AtomicCmpSwap type");
4230
4231 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4232 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4233 N->getOperand(0)};
4234 SDNode *CmpSwap = CurDAG->getMachineNode(
4235 Opcode, SDLoc(N),
4236 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4237
4238 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4239 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4240
4241 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4242 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4243 CurDAG->RemoveDeadNode(N);
4244
4245 return true;
4246}
4247
4248bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4249 SDValue &Shift, bool Negate) {
4250 if (!isa<ConstantSDNode>(N))
4251 return false;
4252
4253 SDLoc DL(N);
4254 APInt Val =
4255 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4256
4257 if (Negate)
4258 Val = -Val;
4259
4260 switch (VT.SimpleTy) {
4261 case MVT::i8:
4262 // All immediates are supported.
4263 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4264 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4265 return true;
4266 case MVT::i16:
4267 case MVT::i32:
4268 case MVT::i64:
4269 // Support 8bit unsigned immediates.
4270 if ((Val & ~0xff) == 0) {
4271 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4272 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4273 return true;
4274 }
4275 // Support 16bit unsigned immediates that are a multiple of 256.
4276 if ((Val & ~0xff00) == 0) {
4277 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4278 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4279 return true;
4280 }
4281 break;
4282 default:
4283 break;
4284 }
4285
4286 return false;
4287}
4288
4289bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4290 SDValue &Imm, SDValue &Shift,
4291 bool Negate) {
4292 if (!isa<ConstantSDNode>(N))
4293 return false;
4294
4295 SDLoc DL(N);
4296 int64_t Val = cast<ConstantSDNode>(N)
4297 ->getAPIntValue()
4299 .getSExtValue();
4300
4301 if (Negate)
4302 Val = -Val;
4303
4304 // Signed saturating instructions treat their immediate operand as unsigned,
4305 // whereas the related intrinsics define their operands to be signed. This
4306 // means we can only use the immediate form when the operand is non-negative.
4307 if (Val < 0)
4308 return false;
4309
4310 switch (VT.SimpleTy) {
4311 case MVT::i8:
4312 // All positive immediates are supported.
4313 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4314 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4315 return true;
4316 case MVT::i16:
4317 case MVT::i32:
4318 case MVT::i64:
4319 // Support 8bit positive immediates.
4320 if (Val <= 255) {
4321 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4322 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4323 return true;
4324 }
4325 // Support 16bit positive immediates that are a multiple of 256.
4326 if (Val <= 65280 && Val % 256 == 0) {
4327 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4328 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4329 return true;
4330 }
4331 break;
4332 default:
4333 break;
4334 }
4335
4336 return false;
4337}
4338
4339bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4340 SDValue &Shift) {
4341 if (!isa<ConstantSDNode>(N))
4342 return false;
4343
4344 SDLoc DL(N);
4345 int64_t Val = cast<ConstantSDNode>(N)
4346 ->getAPIntValue()
4347 .trunc(VT.getFixedSizeInBits())
4348 .getSExtValue();
4349 int32_t ImmVal, ShiftVal;
4350 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4351 ShiftVal))
4352 return false;
4353
4354 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4355 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4356 return true;
4357}
4358
4359bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4360 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4361 int64_t ImmVal = CNode->getSExtValue();
4362 SDLoc DL(N);
4363 if (ImmVal >= -128 && ImmVal < 128) {
4364 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4365 return true;
4366 }
4367 }
4368 return false;
4369}
4370
4371bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4372 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4373 uint64_t ImmVal = CNode->getZExtValue();
4374
4375 switch (VT.SimpleTy) {
4376 case MVT::i8:
4377 ImmVal &= 0xFF;
4378 break;
4379 case MVT::i16:
4380 ImmVal &= 0xFFFF;
4381 break;
4382 case MVT::i32:
4383 ImmVal &= 0xFFFFFFFF;
4384 break;
4385 case MVT::i64:
4386 break;
4387 default:
4388 llvm_unreachable("Unexpected type");
4389 }
4390
4391 if (ImmVal < 256) {
4392 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4393 return true;
4394 }
4395 }
4396 return false;
4397}
4398
4399bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4400 bool Invert) {
4401 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4402 uint64_t ImmVal = CNode->getZExtValue();
4403 SDLoc DL(N);
4404
4405 if (Invert)
4406 ImmVal = ~ImmVal;
4407
4408 // Shift mask depending on type size.
4409 switch (VT.SimpleTy) {
4410 case MVT::i8:
4411 ImmVal &= 0xFF;
4412 ImmVal |= ImmVal << 8;
4413 ImmVal |= ImmVal << 16;
4414 ImmVal |= ImmVal << 32;
4415 break;
4416 case MVT::i16:
4417 ImmVal &= 0xFFFF;
4418 ImmVal |= ImmVal << 16;
4419 ImmVal |= ImmVal << 32;
4420 break;
4421 case MVT::i32:
4422 ImmVal &= 0xFFFFFFFF;
4423 ImmVal |= ImmVal << 32;
4424 break;
4425 case MVT::i64:
4426 break;
4427 default:
4428 llvm_unreachable("Unexpected type");
4429 }
4430
4431 uint64_t encoding;
4432 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4433 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4434 return true;
4435 }
4436 }
4437 return false;
4438}
4439
4440// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4441// Rather than attempt to normalise everything we can sometimes saturate the
4442// shift amount during selection. This function also allows for consistent
4443// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4444// required by the instructions.
4445bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4446 uint64_t High, bool AllowSaturation,
4447 SDValue &Imm) {
4448 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4449 uint64_t ImmVal = CN->getZExtValue();
4450
4451 // Reject shift amounts that are too small.
4452 if (ImmVal < Low)
4453 return false;
4454
4455 // Reject or saturate shift amounts that are too big.
4456 if (ImmVal > High) {
4457 if (!AllowSaturation)
4458 return false;
4459 ImmVal = High;
4460 }
4461
4462 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4463 return true;
4464 }
4465
4466 return false;
4467}
4468
4469bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4470 // tagp(FrameIndex, IRGstack, tag_offset):
4471 // since the offset between FrameIndex and IRGstack is a compile-time
4472 // constant, this can be lowered to a single ADDG instruction.
4473 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4474 return false;
4475 }
4476
4477 SDValue IRG_SP = N->getOperand(2);
4478 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4479 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4480 return false;
4481 }
4482
4483 const TargetLowering *TLI = getTargetLowering();
4484 SDLoc DL(N);
4485 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4486 SDValue FiOp = CurDAG->getTargetFrameIndex(
4487 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4488 int TagOffset = N->getConstantOperandVal(3);
4489
4490 SDNode *Out = CurDAG->getMachineNode(
4491 AArch64::TAGPstack, DL, MVT::i64,
4492 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4493 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4494 ReplaceNode(N, Out);
4495 return true;
4496}
4497
4498void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4499 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4500 "llvm.aarch64.tagp third argument must be an immediate");
4501 if (trySelectStackSlotTagP(N))
4502 return;
4503 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4504 // compile-time constant, not just for stack allocations.
4505
4506 // General case for unrelated pointers in Op1 and Op2.
4507 SDLoc DL(N);
4508 int TagOffset = N->getConstantOperandVal(3);
4509 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4510 {N->getOperand(1), N->getOperand(2)});
4511 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4512 {SDValue(N1, 0), N->getOperand(2)});
4513 SDNode *N3 = CurDAG->getMachineNode(
4514 AArch64::ADDG, DL, MVT::i64,
4515 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4516 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4517 ReplaceNode(N, N3);
4518}
4519
4520bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4521 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4522
4523 // Bail when not a "cast" like insert_subvector.
4524 if (N->getConstantOperandVal(2) != 0)
4525 return false;
4526 if (!N->getOperand(0).isUndef())
4527 return false;
4528
4529 // Bail when normal isel should do the job.
4530 EVT VT = N->getValueType(0);
4531 EVT InVT = N->getOperand(1).getValueType();
4532 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4533 return false;
4534 if (InVT.getSizeInBits() <= 128)
4535 return false;
4536
4537 // NOTE: We can only get here when doing fixed length SVE code generation.
4538 // We do manual selection because the types involved are not linked to real
4539 // registers (despite being legal) and must be coerced into SVE registers.
4540
4542 "Expected to insert into a packed scalable vector!");
4543
4544 SDLoc DL(N);
4545 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4546 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4547 N->getOperand(1), RC));
4548 return true;
4549}
4550
4551bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4552 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4553
4554 // Bail when not a "cast" like extract_subvector.
4555 if (N->getConstantOperandVal(1) != 0)
4556 return false;
4557
4558 // Bail when normal isel can do the job.
4559 EVT VT = N->getValueType(0);
4560 EVT InVT = N->getOperand(0).getValueType();
4561 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4562 return false;
4563 if (VT.getSizeInBits() <= 128)
4564 return false;
4565
4566 // NOTE: We can only get here when doing fixed length SVE code generation.
4567 // We do manual selection because the types involved are not linked to real
4568 // registers (despite being legal) and must be coerced into SVE registers.
4569
4571 "Expected to extract from a packed scalable vector!");
4572
4573 SDLoc DL(N);
4574 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4575 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4576 N->getOperand(0), RC));
4577 return true;
4578}
4579
4580bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4581 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4582
4583 SDValue N0 = N->getOperand(0);
4584 SDValue N1 = N->getOperand(1);
4585
4586 EVT VT = N->getValueType(0);
4587 SDLoc DL(N);
4588
4589 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4590 // Rotate by a constant is a funnel shift in IR which is exanded to
4591 // an OR with shifted operands.
4592 // We do the following transform:
4593 // OR N0, N1 -> xar (x, y, imm)
4594 // Where:
4595 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4596 // N0 = SHL_PRED true, V, splat(bits-imm)
4597 // V = (xor x, y)
4598 if (VT.isScalableVector() &&
4599 (Subtarget->hasSVE2() ||
4600 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4601 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4602 N1.getOpcode() != AArch64ISD::SRL_PRED)
4603 std::swap(N0, N1);
4604 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4605 N1.getOpcode() != AArch64ISD::SRL_PRED)
4606 return false;
4607
4608 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4609 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4610 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4611 return false;
4612
4613 if (N0.getOperand(1) != N1.getOperand(1))
4614 return false;
4615
4616 SDValue R1, R2;
4617 bool IsXOROperand = true;
4618 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4619 IsXOROperand = false;
4620 } else {
4621 R1 = N0.getOperand(1).getOperand(0);
4622 R2 = N1.getOperand(1).getOperand(1);
4623 }
4624
4625 APInt ShlAmt, ShrAmt;
4626 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4628 return false;
4629
4630 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4631 return false;
4632
4633 if (!IsXOROperand) {
4634 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4635 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4636 SDValue MOVIV = SDValue(MOV, 0);
4637
4638 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4639 SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
4640 VT, Zero, MOVIV, ZSub);
4641
4642 R1 = N1->getOperand(1);
4643 R2 = SDValue(SubRegToReg, 0);
4644 }
4645
4646 SDValue Imm =
4647 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4648
4649 SDValue Ops[] = {R1, R2, Imm};
4651 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4652 AArch64::XAR_ZZZI_D})) {
4653 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4654 return true;
4655 }
4656 return false;
4657 }
4658
4659 // We have Neon SHA3 XAR operation for v2i64 but for types
4660 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4661 // is available.
4662 EVT SVT;
4663 switch (VT.getSimpleVT().SimpleTy) {
4664 case MVT::v4i32:
4665 case MVT::v2i32:
4666 SVT = MVT::nxv4i32;
4667 break;
4668 case MVT::v8i16:
4669 case MVT::v4i16:
4670 SVT = MVT::nxv8i16;
4671 break;
4672 case MVT::v16i8:
4673 case MVT::v8i8:
4674 SVT = MVT::nxv16i8;
4675 break;
4676 case MVT::v2i64:
4677 case MVT::v1i64:
4678 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4679 break;
4680 default:
4681 return false;
4682 }
4683
4684 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4685 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4686 return false;
4687
4688 if (N0->getOpcode() != AArch64ISD::VSHL ||
4689 N1->getOpcode() != AArch64ISD::VLSHR)
4690 return false;
4691
4692 if (N0->getOperand(0) != N1->getOperand(0))
4693 return false;
4694
4695 SDValue R1, R2;
4696 bool IsXOROperand = true;
4697 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4698 IsXOROperand = false;
4699 } else {
4700 SDValue XOR = N0.getOperand(0);
4701 R1 = XOR.getOperand(0);
4702 R2 = XOR.getOperand(1);
4703 }
4704
4705 unsigned HsAmt = N0.getConstantOperandVal(1);
4706 unsigned ShAmt = N1.getConstantOperandVal(1);
4707
4708 SDValue Imm = CurDAG->getTargetConstant(
4709 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4710
4711 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4712 if (ShAmt + HsAmt != VTSizeInBits)
4713 return false;
4714
4715 if (!IsXOROperand) {
4716 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4717 SDNode *MOV =
4718 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4719 SDValue MOVIV = SDValue(MOV, 0);
4720
4721 R1 = N1->getOperand(0);
4722 R2 = MOVIV;
4723 }
4724
4725 if (SVT != VT) {
4726 SDValue Undef =
4727 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4728
4729 if (SVT.isScalableVector() && VT.is64BitVector()) {
4730 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4731
4732 SDValue UndefQ = SDValue(
4733 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4734 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4735
4736 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4737 UndefQ, R1, DSub),
4738 0);
4739 if (R2.getValueType() == VT)
4740 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4741 UndefQ, R2, DSub),
4742 0);
4743 }
4744
4745 SDValue SubReg = CurDAG->getTargetConstant(
4746 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4747
4748 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4749 R1, SubReg),
4750 0);
4751
4752 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4753 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4754 Undef, R2, SubReg),
4755 0);
4756 }
4757
4758 SDValue Ops[] = {R1, R2, Imm};
4759 SDNode *XAR = nullptr;
4760
4761 if (SVT.isScalableVector()) {
4763 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4764 AArch64::XAR_ZZZI_D}))
4765 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4766 } else {
4767 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4768 }
4769
4770 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4771
4772 if (SVT != VT) {
4773 if (VT.is64BitVector() && SVT.isScalableVector()) {
4774 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4775
4776 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4777 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4778 SDValue(XAR, 0), ZSub);
4779
4780 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4781 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4782 SDValue(Q, 0), DSub);
4783 } else {
4784 SDValue SubReg = CurDAG->getTargetConstant(
4785 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4786 MVT::i32);
4787 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4788 SDValue(XAR, 0), SubReg);
4789 }
4790 }
4791 ReplaceNode(N, XAR);
4792 return true;
4793}
4794
4795void AArch64DAGToDAGISel::Select(SDNode *Node) {
4796 // If we have a custom node, we already have selected!
4797 if (Node->isMachineOpcode()) {
4798 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4799 Node->setNodeId(-1);
4800 return;
4801 }
4802
4803 // Few custom selection stuff.
4804 EVT VT = Node->getValueType(0);
4805
4806 switch (Node->getOpcode()) {
4807 default:
4808 break;
4809
4810 case ISD::ATOMIC_CMP_SWAP:
4811 if (SelectCMP_SWAP(Node))
4812 return;
4813 break;
4814
4815 case ISD::READ_REGISTER:
4816 case AArch64ISD::MRRS:
4817 if (tryReadRegister(Node))
4818 return;
4819 break;
4820
4822 case AArch64ISD::MSRR:
4823 if (tryWriteRegister(Node))
4824 return;
4825 break;
4826
4827 case ISD::LOAD: {
4828 // Try to select as an indexed load. Fall through to normal processing
4829 // if we can't.
4830 if (tryIndexedLoad(Node))
4831 return;
4832 break;
4833 }
4834
4835 case ISD::SRL:
4836 case ISD::AND:
4837 case ISD::SRA:
4839 if (tryBitfieldExtractOp(Node))
4840 return;
4841 if (tryBitfieldInsertInZeroOp(Node))
4842 return;
4843 [[fallthrough]];
4844 case ISD::ROTR:
4845 case ISD::SHL:
4846 if (tryShiftAmountMod(Node))
4847 return;
4848 break;
4849
4850 case ISD::SIGN_EXTEND:
4851 if (tryBitfieldExtractOpFromSExt(Node))
4852 return;
4853 break;
4854
4855 case ISD::OR:
4856 if (tryBitfieldInsertOp(Node))
4857 return;
4858 if (trySelectXAR(Node))
4859 return;
4860 break;
4861
4863 if (trySelectCastScalableToFixedLengthVector(Node))
4864 return;
4865 break;
4866 }
4867
4868 case ISD::INSERT_SUBVECTOR: {
4869 if (trySelectCastFixedLengthToScalableVector(Node))
4870 return;
4871 break;
4872 }
4873
4874 case ISD::Constant: {
4875 // Materialize zero constants as copies from WZR/XZR. This allows
4876 // the coalescer to propagate these into other instructions.
4877 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4878 if (ConstNode->isZero()) {
4879 if (VT == MVT::i32) {
4880 SDValue New = CurDAG->getCopyFromReg(
4881 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4882 ReplaceNode(Node, New.getNode());
4883 return;
4884 } else if (VT == MVT::i64) {
4885 SDValue New = CurDAG->getCopyFromReg(
4886 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4887 ReplaceNode(Node, New.getNode());
4888 return;
4889 }
4890 }
4891 break;
4892 }
4893
4894 case ISD::FrameIndex: {
4895 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4896 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4897 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4898 const TargetLowering *TLI = getTargetLowering();
4899 SDValue TFI = CurDAG->getTargetFrameIndex(
4900 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4901 SDLoc DL(Node);
4902 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4903 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4904 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4905 return;
4906 }
4908 unsigned IntNo = Node->getConstantOperandVal(1);
4909 switch (IntNo) {
4910 default:
4911 break;
4912 case Intrinsic::aarch64_gcsss: {
4913 SDLoc DL(Node);
4914 SDValue Chain = Node->getOperand(0);
4915 SDValue Val = Node->getOperand(2);
4916 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4917 SDNode *SS1 =
4918 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4919 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4920 MVT::Other, Zero, SDValue(SS1, 0));
4921 ReplaceNode(Node, SS2);
4922 return;
4923 }
4924 case Intrinsic::aarch64_ldaxp:
4925 case Intrinsic::aarch64_ldxp: {
4926 unsigned Op =
4927 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4928 SDValue MemAddr = Node->getOperand(2);
4929 SDLoc DL(Node);
4930 SDValue Chain = Node->getOperand(0);
4931
4932 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4933 MVT::Other, MemAddr, Chain);
4934
4935 // Transfer memoperands.
4936 MachineMemOperand *MemOp =
4937 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4938 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4939 ReplaceNode(Node, Ld);
4940 return;
4941 }
4942 case Intrinsic::aarch64_stlxp:
4943 case Intrinsic::aarch64_stxp: {
4944 unsigned Op =
4945 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4946 SDLoc DL(Node);
4947 SDValue Chain = Node->getOperand(0);
4948 SDValue ValLo = Node->getOperand(2);
4949 SDValue ValHi = Node->getOperand(3);
4950 SDValue MemAddr = Node->getOperand(4);
4951
4952 // Place arguments in the right order.
4953 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4954
4955 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4956 // Transfer memoperands.
4957 MachineMemOperand *MemOp =
4958 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4959 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4960
4961 ReplaceNode(Node, St);
4962 return;
4963 }
4964 case Intrinsic::aarch64_neon_ld1x2:
4965 if (VT == MVT::v8i8) {
4966 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4967 return;
4968 } else if (VT == MVT::v16i8) {
4969 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4970 return;
4971 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4972 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4973 return;
4974 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4975 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4976 return;
4977 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4978 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4979 return;
4980 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4981 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4982 return;
4983 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4984 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4985 return;
4986 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4987 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4988 return;
4989 }
4990 break;
4991 case Intrinsic::aarch64_neon_ld1x3:
4992 if (VT == MVT::v8i8) {
4993 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4994 return;
4995 } else if (VT == MVT::v16i8) {
4996 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4997 return;
4998 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4999 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5000 return;
5001 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5002 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5003 return;
5004 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5005 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5006 return;
5007 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5008 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5009 return;
5010 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5011 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5012 return;
5013 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5014 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5015 return;
5016 }
5017 break;
5018 case Intrinsic::aarch64_neon_ld1x4:
5019 if (VT == MVT::v8i8) {
5020 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5021 return;
5022 } else if (VT == MVT::v16i8) {
5023 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5024 return;
5025 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5026 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5027 return;
5028 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5029 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5030 return;
5031 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5032 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5033 return;
5034 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5035 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5036 return;
5037 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5038 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5039 return;
5040 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5041 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5042 return;
5043 }
5044 break;
5045 case Intrinsic::aarch64_neon_ld2:
5046 if (VT == MVT::v8i8) {
5047 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5048 return;
5049 } else if (VT == MVT::v16i8) {
5050 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5051 return;
5052 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5053 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5054 return;
5055 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5056 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5057 return;
5058 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5059 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5060 return;
5061 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5062 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5063 return;
5064 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5065 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5066 return;
5067 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5068 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5069 return;
5070 }
5071 break;
5072 case Intrinsic::aarch64_neon_ld3:
5073 if (VT == MVT::v8i8) {
5074 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5075 return;
5076 } else if (VT == MVT::v16i8) {
5077 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5078 return;
5079 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5080 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5081 return;
5082 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5083 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5084 return;
5085 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5086 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5087 return;
5088 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5089 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5090 return;
5091 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5092 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5093 return;
5094 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5095 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5096 return;
5097 }
5098 break;
5099 case Intrinsic::aarch64_neon_ld4:
5100 if (VT == MVT::v8i8) {
5101 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5102 return;
5103 } else if (VT == MVT::v16i8) {
5104 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5105 return;
5106 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5107 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5108 return;
5109 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5110 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5111 return;
5112 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5113 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5114 return;
5115 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5116 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5117 return;
5118 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5119 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5120 return;
5121 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5122 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5123 return;
5124 }
5125 break;
5126 case Intrinsic::aarch64_neon_ld2r:
5127 if (VT == MVT::v8i8) {
5128 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5129 return;
5130 } else if (VT == MVT::v16i8) {
5131 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5132 return;
5133 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5134 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5135 return;
5136 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5137 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5138 return;
5139 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5140 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5141 return;
5142 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5143 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5144 return;
5145 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5146 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5147 return;
5148 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5149 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5150 return;
5151 }
5152 break;
5153 case Intrinsic::aarch64_neon_ld3r:
5154 if (VT == MVT::v8i8) {
5155 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5156 return;
5157 } else if (VT == MVT::v16i8) {
5158 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5159 return;
5160 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5161 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5162 return;
5163 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5164 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5165 return;
5166 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5167 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5168 return;
5169 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5170 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5171 return;
5172 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5173 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5174 return;
5175 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5176 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5177 return;
5178 }
5179 break;
5180 case Intrinsic::aarch64_neon_ld4r:
5181 if (VT == MVT::v8i8) {
5182 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5183 return;
5184 } else if (VT == MVT::v16i8) {
5185 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5186 return;
5187 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5188 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5189 return;
5190 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5191 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5192 return;
5193 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5194 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5195 return;
5196 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5197 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5198 return;
5199 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5200 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5201 return;
5202 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5203 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5204 return;
5205 }
5206 break;
5207 case Intrinsic::aarch64_neon_ld2lane:
5208 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5209 SelectLoadLane(Node, 2, AArch64::LD2i8);
5210 return;
5211 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5212 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5213 SelectLoadLane(Node, 2, AArch64::LD2i16);
5214 return;
5215 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5216 VT == MVT::v2f32) {
5217 SelectLoadLane(Node, 2, AArch64::LD2i32);
5218 return;
5219 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5220 VT == MVT::v1f64) {
5221 SelectLoadLane(Node, 2, AArch64::LD2i64);
5222 return;
5223 }
5224 break;
5225 case Intrinsic::aarch64_neon_ld3lane:
5226 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5227 SelectLoadLane(Node, 3, AArch64::LD3i8);
5228 return;
5229 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5230 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5231 SelectLoadLane(Node, 3, AArch64::LD3i16);
5232 return;
5233 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5234 VT == MVT::v2f32) {
5235 SelectLoadLane(Node, 3, AArch64::LD3i32);
5236 return;
5237 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5238 VT == MVT::v1f64) {
5239 SelectLoadLane(Node, 3, AArch64::LD3i64);
5240 return;
5241 }
5242 break;
5243 case Intrinsic::aarch64_neon_ld4lane:
5244 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5245 SelectLoadLane(Node, 4, AArch64::LD4i8);
5246 return;
5247 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5248 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5249 SelectLoadLane(Node, 4, AArch64::LD4i16);
5250 return;
5251 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5252 VT == MVT::v2f32) {
5253 SelectLoadLane(Node, 4, AArch64::LD4i32);
5254 return;
5255 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5256 VT == MVT::v1f64) {
5257 SelectLoadLane(Node, 4, AArch64::LD4i64);
5258 return;
5259 }
5260 break;
5261 case Intrinsic::aarch64_ld64b:
5262 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5263 return;
5264 case Intrinsic::aarch64_sve_ld2q_sret: {
5265 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5266 return;
5267 }
5268 case Intrinsic::aarch64_sve_ld3q_sret: {
5269 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5270 return;
5271 }
5272 case Intrinsic::aarch64_sve_ld4q_sret: {
5273 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5274 return;
5275 }
5276 case Intrinsic::aarch64_sve_ld2_sret: {
5277 if (VT == MVT::nxv16i8) {
5278 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5279 true);
5280 return;
5281 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5282 VT == MVT::nxv8bf16) {
5283 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5284 true);
5285 return;
5286 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5287 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5288 true);
5289 return;
5290 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5291 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5292 true);
5293 return;
5294 }
5295 break;
5296 }
5297 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5298 if (VT == MVT::nxv16i8) {
5299 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5300 SelectContiguousMultiVectorLoad(
5301 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5302 else if (Subtarget->hasSVE2p1())
5303 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5304 AArch64::LD1B_2Z);
5305 else
5306 break;
5307 return;
5308 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5309 VT == MVT::nxv8bf16) {
5310 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5311 SelectContiguousMultiVectorLoad(
5312 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5313 else if (Subtarget->hasSVE2p1())
5314 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5315 AArch64::LD1H_2Z);
5316 else
5317 break;
5318 return;
5319 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5320 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5321 SelectContiguousMultiVectorLoad(
5322 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5323 else if (Subtarget->hasSVE2p1())
5324 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5325 AArch64::LD1W_2Z);
5326 else
5327 break;
5328 return;
5329 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5330 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5331 SelectContiguousMultiVectorLoad(
5332 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5333 else if (Subtarget->hasSVE2p1())
5334 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5335 AArch64::LD1D_2Z);
5336 else
5337 break;
5338 return;
5339 }
5340 break;
5341 }
5342 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5343 if (VT == MVT::nxv16i8) {
5344 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5345 SelectContiguousMultiVectorLoad(
5346 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5347 else if (Subtarget->hasSVE2p1())
5348 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5349 AArch64::LD1B_4Z);
5350 else
5351 break;
5352 return;
5353 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5354 VT == MVT::nxv8bf16) {
5355 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5356 SelectContiguousMultiVectorLoad(
5357 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5358 else if (Subtarget->hasSVE2p1())
5359 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5360 AArch64::LD1H_4Z);
5361 else
5362 break;
5363 return;
5364 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5365 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5366 SelectContiguousMultiVectorLoad(
5367 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5368 else if (Subtarget->hasSVE2p1())
5369 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5370 AArch64::LD1W_4Z);
5371 else
5372 break;
5373 return;
5374 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5375 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5376 SelectContiguousMultiVectorLoad(
5377 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5378 else if (Subtarget->hasSVE2p1())
5379 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5380 AArch64::LD1D_4Z);
5381 else
5382 break;
5383 return;
5384 }
5385 break;
5386 }
5387 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5388 if (VT == MVT::nxv16i8) {
5389 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5390 SelectContiguousMultiVectorLoad(Node, 2, 0,
5391 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5392 AArch64::LDNT1B_2Z_PSEUDO);
5393 else if (Subtarget->hasSVE2p1())
5394 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5395 AArch64::LDNT1B_2Z);
5396 else
5397 break;
5398 return;
5399 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5400 VT == MVT::nxv8bf16) {
5401 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5402 SelectContiguousMultiVectorLoad(Node, 2, 1,
5403 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5404 AArch64::LDNT1H_2Z_PSEUDO);
5405 else if (Subtarget->hasSVE2p1())
5406 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5407 AArch64::LDNT1H_2Z);
5408 else
5409 break;
5410 return;
5411 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5412 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5413 SelectContiguousMultiVectorLoad(Node, 2, 2,
5414 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5415 AArch64::LDNT1W_2Z_PSEUDO);
5416 else if (Subtarget->hasSVE2p1())
5417 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5418 AArch64::LDNT1W_2Z);
5419 else
5420 break;
5421 return;
5422 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5423 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5424 SelectContiguousMultiVectorLoad(Node, 2, 3,
5425 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5426 AArch64::LDNT1D_2Z_PSEUDO);
5427 else if (Subtarget->hasSVE2p1())
5428 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5429 AArch64::LDNT1D_2Z);
5430 else
5431 break;
5432 return;
5433 }
5434 break;
5435 }
5436 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5437 if (VT == MVT::nxv16i8) {
5438 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5439 SelectContiguousMultiVectorLoad(Node, 4, 0,
5440 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5441 AArch64::LDNT1B_4Z_PSEUDO);
5442 else if (Subtarget->hasSVE2p1())
5443 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5444 AArch64::LDNT1B_4Z);
5445 else
5446 break;
5447 return;
5448 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5449 VT == MVT::nxv8bf16) {
5450 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5451 SelectContiguousMultiVectorLoad(Node, 4, 1,
5452 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5453 AArch64::LDNT1H_4Z_PSEUDO);
5454 else if (Subtarget->hasSVE2p1())
5455 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5456 AArch64::LDNT1H_4Z);
5457 else
5458 break;
5459 return;
5460 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5461 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5462 SelectContiguousMultiVectorLoad(Node, 4, 2,
5463 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5464 AArch64::LDNT1W_4Z_PSEUDO);
5465 else if (Subtarget->hasSVE2p1())
5466 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5467 AArch64::LDNT1W_4Z);
5468 else
5469 break;
5470 return;
5471 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5472 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5473 SelectContiguousMultiVectorLoad(Node, 4, 3,
5474 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5475 AArch64::LDNT1D_4Z_PSEUDO);
5476 else if (Subtarget->hasSVE2p1())
5477 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5478 AArch64::LDNT1D_4Z);
5479 else
5480 break;
5481 return;
5482 }
5483 break;
5484 }
5485 case Intrinsic::aarch64_sve_ld3_sret: {
5486 if (VT == MVT::nxv16i8) {
5487 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5488 true);
5489 return;
5490 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5491 VT == MVT::nxv8bf16) {
5492 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5493 true);
5494 return;
5495 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5496 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5497 true);
5498 return;
5499 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5500 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5501 true);
5502 return;
5503 }
5504 break;
5505 }
5506 case Intrinsic::aarch64_sve_ld4_sret: {
5507 if (VT == MVT::nxv16i8) {
5508 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5509 true);
5510 return;
5511 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5512 VT == MVT::nxv8bf16) {
5513 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5514 true);
5515 return;
5516 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5517 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5518 true);
5519 return;
5520 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5521 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5522 true);
5523 return;
5524 }
5525 break;
5526 }
5527 case Intrinsic::aarch64_sme_read_hor_vg2: {
5528 if (VT == MVT::nxv16i8) {
5529 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5530 AArch64::MOVA_2ZMXI_H_B);
5531 return;
5532 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5533 VT == MVT::nxv8bf16) {
5534 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5535 AArch64::MOVA_2ZMXI_H_H);
5536 return;
5537 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5538 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5539 AArch64::MOVA_2ZMXI_H_S);
5540 return;
5541 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5542 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5543 AArch64::MOVA_2ZMXI_H_D);
5544 return;
5545 }
5546 break;
5547 }
5548 case Intrinsic::aarch64_sme_read_ver_vg2: {
5549 if (VT == MVT::nxv16i8) {
5550 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5551 AArch64::MOVA_2ZMXI_V_B);
5552 return;
5553 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5554 VT == MVT::nxv8bf16) {
5555 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5556 AArch64::MOVA_2ZMXI_V_H);
5557 return;
5558 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5559 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5560 AArch64::MOVA_2ZMXI_V_S);
5561 return;
5562 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5563 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5564 AArch64::MOVA_2ZMXI_V_D);
5565 return;
5566 }
5567 break;
5568 }
5569 case Intrinsic::aarch64_sme_read_hor_vg4: {
5570 if (VT == MVT::nxv16i8) {
5571 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5572 AArch64::MOVA_4ZMXI_H_B);
5573 return;
5574 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5575 VT == MVT::nxv8bf16) {
5576 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5577 AArch64::MOVA_4ZMXI_H_H);
5578 return;
5579 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5580 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5581 AArch64::MOVA_4ZMXI_H_S);
5582 return;
5583 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5584 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5585 AArch64::MOVA_4ZMXI_H_D);
5586 return;
5587 }
5588 break;
5589 }
5590 case Intrinsic::aarch64_sme_read_ver_vg4: {
5591 if (VT == MVT::nxv16i8) {
5592 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5593 AArch64::MOVA_4ZMXI_V_B);
5594 return;
5595 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5596 VT == MVT::nxv8bf16) {
5597 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5598 AArch64::MOVA_4ZMXI_V_H);
5599 return;
5600 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5601 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5602 AArch64::MOVA_4ZMXI_V_S);
5603 return;
5604 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5605 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5606 AArch64::MOVA_4ZMXI_V_D);
5607 return;
5608 }
5609 break;
5610 }
5611 case Intrinsic::aarch64_sme_read_vg1x2: {
5612 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5613 AArch64::MOVA_VG2_2ZMXI);
5614 return;
5615 }
5616 case Intrinsic::aarch64_sme_read_vg1x4: {
5617 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5618 AArch64::MOVA_VG4_4ZMXI);
5619 return;
5620 }
5621 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5622 if (VT == MVT::nxv16i8) {
5623 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5624 return;
5625 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5626 VT == MVT::nxv8bf16) {
5627 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5628 return;
5629 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5630 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5631 return;
5632 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5633 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5634 return;
5635 }
5636 break;
5637 }
5638 case Intrinsic::aarch64_sme_readz_vert_x2: {
5639 if (VT == MVT::nxv16i8) {
5640 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5641 return;
5642 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5643 VT == MVT::nxv8bf16) {
5644 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5645 return;
5646 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5647 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5648 return;
5649 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5650 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5651 return;
5652 }
5653 break;
5654 }
5655 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5656 if (VT == MVT::nxv16i8) {
5657 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5658 return;
5659 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5660 VT == MVT::nxv8bf16) {
5661 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5662 return;
5663 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5664 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5665 return;
5666 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5667 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5668 return;
5669 }
5670 break;
5671 }
5672 case Intrinsic::aarch64_sme_readz_vert_x4: {
5673 if (VT == MVT::nxv16i8) {
5674 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5675 return;
5676 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5677 VT == MVT::nxv8bf16) {
5678 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5679 return;
5680 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5681 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5682 return;
5683 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5684 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5685 return;
5686 }
5687 break;
5688 }
5689 case Intrinsic::aarch64_sme_readz_x2: {
5690 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5691 AArch64::ZA);
5692 return;
5693 }
5694 case Intrinsic::aarch64_sme_readz_x4: {
5695 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5696 AArch64::ZA);
5697 return;
5698 }
5699 case Intrinsic::swift_async_context_addr: {
5700 SDLoc DL(Node);
5701 SDValue Chain = Node->getOperand(0);
5702 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5703 SDValue Res = SDValue(
5704 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5705 CurDAG->getTargetConstant(8, DL, MVT::i32),
5706 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5707 0);
5708 ReplaceUses(SDValue(Node, 0), Res);
5709 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5710 CurDAG->RemoveDeadNode(Node);
5711
5712 auto &MF = CurDAG->getMachineFunction();
5713 MF.getFrameInfo().setFrameAddressIsTaken(true);
5714 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5715 return;
5716 }
5717 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5719 Node->getValueType(0),
5720 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5721 AArch64::LUTI2_4ZTZI_S}))
5722 // Second Immediate must be <= 3:
5723 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5724 return;
5725 }
5726 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5728 Node->getValueType(0),
5729 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5730 // Second Immediate must be <= 1:
5731 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5732 return;
5733 }
5734 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5736 Node->getValueType(0),
5737 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5738 AArch64::LUTI2_2ZTZI_S}))
5739 // Second Immediate must be <= 7:
5740 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5741 return;
5742 }
5743 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5745 Node->getValueType(0),
5746 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5747 AArch64::LUTI4_2ZTZI_S}))
5748 // Second Immediate must be <= 3:
5749 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5750 return;
5751 }
5752 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5753 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5754 return;
5755 }
5756 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5758 Node->getValueType(0),
5759 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5760 SelectCVTIntrinsicFP8(Node, 2, Opc);
5761 return;
5762 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5764 Node->getValueType(0),
5765 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5766 SelectCVTIntrinsicFP8(Node, 2, Opc);
5767 return;
5768 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5770 Node->getValueType(0),
5771 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5772 SelectCVTIntrinsicFP8(Node, 2, Opc);
5773 return;
5774 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5776 Node->getValueType(0),
5777 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5778 SelectCVTIntrinsicFP8(Node, 2, Opc);
5779 return;
5780 }
5781 } break;
5783 unsigned IntNo = Node->getConstantOperandVal(0);
5784 switch (IntNo) {
5785 default:
5786 break;
5787 case Intrinsic::aarch64_tagp:
5788 SelectTagP(Node);
5789 return;
5790
5791 case Intrinsic::ptrauth_auth:
5792 SelectPtrauthAuth(Node);
5793 return;
5794
5795 case Intrinsic::ptrauth_resign:
5796 SelectPtrauthResign(Node);
5797 return;
5798
5799 case Intrinsic::aarch64_neon_tbl2:
5800 SelectTable(Node, 2,
5801 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5802 false);
5803 return;
5804 case Intrinsic::aarch64_neon_tbl3:
5805 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5806 : AArch64::TBLv16i8Three,
5807 false);
5808 return;
5809 case Intrinsic::aarch64_neon_tbl4:
5810 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5811 : AArch64::TBLv16i8Four,
5812 false);
5813 return;
5814 case Intrinsic::aarch64_neon_tbx2:
5815 SelectTable(Node, 2,
5816 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5817 true);
5818 return;
5819 case Intrinsic::aarch64_neon_tbx3:
5820 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5821 : AArch64::TBXv16i8Three,
5822 true);
5823 return;
5824 case Intrinsic::aarch64_neon_tbx4:
5825 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5826 : AArch64::TBXv16i8Four,
5827 true);
5828 return;
5829 case Intrinsic::aarch64_sve_srshl_single_x2:
5831 Node->getValueType(0),
5832 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5833 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5834 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5835 return;
5836 case Intrinsic::aarch64_sve_srshl_single_x4:
5838 Node->getValueType(0),
5839 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5840 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5841 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5842 return;
5843 case Intrinsic::aarch64_sve_urshl_single_x2:
5845 Node->getValueType(0),
5846 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5847 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5848 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5849 return;
5850 case Intrinsic::aarch64_sve_urshl_single_x4:
5852 Node->getValueType(0),
5853 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5854 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5855 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5856 return;
5857 case Intrinsic::aarch64_sve_srshl_x2:
5859 Node->getValueType(0),
5860 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5861 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5862 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5863 return;
5864 case Intrinsic::aarch64_sve_srshl_x4:
5866 Node->getValueType(0),
5867 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5868 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5869 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5870 return;
5871 case Intrinsic::aarch64_sve_urshl_x2:
5873 Node->getValueType(0),
5874 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5875 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5876 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5877 return;
5878 case Intrinsic::aarch64_sve_urshl_x4:
5880 Node->getValueType(0),
5881 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5882 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5883 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5884 return;
5885 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5887 Node->getValueType(0),
5888 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5889 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5890 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5891 return;
5892 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5894 Node->getValueType(0),
5895 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5896 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5897 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5898 return;
5899 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5901 Node->getValueType(0),
5902 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5903 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5904 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5905 return;
5906 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5908 Node->getValueType(0),
5909 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5910 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5911 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5912 return;
5913 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5915 Node->getValueType(0),
5916 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5917 AArch64::FSCALE_2ZZ_D}))
5918 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5919 return;
5920 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5922 Node->getValueType(0),
5923 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5924 AArch64::FSCALE_4ZZ_D}))
5925 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5926 return;
5927 case Intrinsic::aarch64_sme_fp8_scale_x2:
5929 Node->getValueType(0),
5930 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5931 AArch64::FSCALE_2Z2Z_D}))
5932 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5933 return;
5934 case Intrinsic::aarch64_sme_fp8_scale_x4:
5936 Node->getValueType(0),
5937 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5938 AArch64::FSCALE_4Z4Z_D}))
5939 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5940 return;
5941 case Intrinsic::aarch64_sve_whilege_x2:
5943 Node->getValueType(0),
5944 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5945 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5946 SelectWhilePair(Node, Op);
5947 return;
5948 case Intrinsic::aarch64_sve_whilegt_x2:
5950 Node->getValueType(0),
5951 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5952 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5953 SelectWhilePair(Node, Op);
5954 return;
5955 case Intrinsic::aarch64_sve_whilehi_x2:
5957 Node->getValueType(0),
5958 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5959 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5960 SelectWhilePair(Node, Op);
5961 return;
5962 case Intrinsic::aarch64_sve_whilehs_x2:
5964 Node->getValueType(0),
5965 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5966 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5967 SelectWhilePair(Node, Op);
5968 return;
5969 case Intrinsic::aarch64_sve_whilele_x2:
5971 Node->getValueType(0),
5972 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5973 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5974 SelectWhilePair(Node, Op);
5975 return;
5976 case Intrinsic::aarch64_sve_whilelo_x2:
5978 Node->getValueType(0),
5979 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5980 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5981 SelectWhilePair(Node, Op);
5982 return;
5983 case Intrinsic::aarch64_sve_whilels_x2:
5985 Node->getValueType(0),
5986 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5987 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5988 SelectWhilePair(Node, Op);
5989 return;
5990 case Intrinsic::aarch64_sve_whilelt_x2:
5992 Node->getValueType(0),
5993 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5994 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5995 SelectWhilePair(Node, Op);
5996 return;
5997 case Intrinsic::aarch64_sve_smax_single_x2:
5999 Node->getValueType(0),
6000 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6001 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6002 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6003 return;
6004 case Intrinsic::aarch64_sve_umax_single_x2:
6006 Node->getValueType(0),
6007 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6008 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6009 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6010 return;
6011 case Intrinsic::aarch64_sve_fmax_single_x2:
6013 Node->getValueType(0),
6014 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6015 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6016 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6017 return;
6018 case Intrinsic::aarch64_sve_smax_single_x4:
6020 Node->getValueType(0),
6021 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6022 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6023 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6024 return;
6025 case Intrinsic::aarch64_sve_umax_single_x4:
6027 Node->getValueType(0),
6028 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6029 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6030 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6031 return;
6032 case Intrinsic::aarch64_sve_fmax_single_x4:
6034 Node->getValueType(0),
6035 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6036 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6037 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6038 return;
6039 case Intrinsic::aarch64_sve_smin_single_x2:
6041 Node->getValueType(0),
6042 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6043 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6044 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6045 return;
6046 case Intrinsic::aarch64_sve_umin_single_x2:
6048 Node->getValueType(0),
6049 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6050 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6051 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6052 return;
6053 case Intrinsic::aarch64_sve_fmin_single_x2:
6055 Node->getValueType(0),
6056 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6057 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6058 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6059 return;
6060 case Intrinsic::aarch64_sve_smin_single_x4:
6062 Node->getValueType(0),
6063 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6064 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6065 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6066 return;
6067 case Intrinsic::aarch64_sve_umin_single_x4:
6069 Node->getValueType(0),
6070 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6071 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6072 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6073 return;
6074 case Intrinsic::aarch64_sve_fmin_single_x4:
6076 Node->getValueType(0),
6077 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6078 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6079 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6080 return;
6081 case Intrinsic::aarch64_sve_smax_x2:
6083 Node->getValueType(0),
6084 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6085 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6086 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6087 return;
6088 case Intrinsic::aarch64_sve_umax_x2:
6090 Node->getValueType(0),
6091 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6092 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6093 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6094 return;
6095 case Intrinsic::aarch64_sve_fmax_x2:
6097 Node->getValueType(0),
6098 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6099 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6100 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6101 return;
6102 case Intrinsic::aarch64_sve_smax_x4:
6104 Node->getValueType(0),
6105 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6106 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6107 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6108 return;
6109 case Intrinsic::aarch64_sve_umax_x4:
6111 Node->getValueType(0),
6112 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6113 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6114 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6115 return;
6116 case Intrinsic::aarch64_sve_fmax_x4:
6118 Node->getValueType(0),
6119 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6120 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6121 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6122 return;
6123 case Intrinsic::aarch64_sme_famax_x2:
6125 Node->getValueType(0),
6126 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6127 AArch64::FAMAX_2Z2Z_D}))
6128 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6129 return;
6130 case Intrinsic::aarch64_sme_famax_x4:
6132 Node->getValueType(0),
6133 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6134 AArch64::FAMAX_4Z4Z_D}))
6135 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6136 return;
6137 case Intrinsic::aarch64_sme_famin_x2:
6139 Node->getValueType(0),
6140 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6141 AArch64::FAMIN_2Z2Z_D}))
6142 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6143 return;
6144 case Intrinsic::aarch64_sme_famin_x4:
6146 Node->getValueType(0),
6147 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6148 AArch64::FAMIN_4Z4Z_D}))
6149 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6150 return;
6151 case Intrinsic::aarch64_sve_smin_x2:
6153 Node->getValueType(0),
6154 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6155 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6156 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6157 return;
6158 case Intrinsic::aarch64_sve_umin_x2:
6160 Node->getValueType(0),
6161 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6162 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6163 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6164 return;
6165 case Intrinsic::aarch64_sve_fmin_x2:
6167 Node->getValueType(0),
6168 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6169 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6170 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6171 return;
6172 case Intrinsic::aarch64_sve_smin_x4:
6174 Node->getValueType(0),
6175 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6176 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6177 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6178 return;
6179 case Intrinsic::aarch64_sve_umin_x4:
6181 Node->getValueType(0),
6182 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6183 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6184 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6185 return;
6186 case Intrinsic::aarch64_sve_fmin_x4:
6188 Node->getValueType(0),
6189 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6190 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6191 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6192 return;
6193 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6195 Node->getValueType(0),
6196 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6197 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6198 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6199 return;
6200 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6202 Node->getValueType(0),
6203 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6204 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6205 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6206 return;
6207 case Intrinsic::aarch64_sve_fminnm_single_x2:
6209 Node->getValueType(0),
6210 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6211 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6212 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6213 return;
6214 case Intrinsic::aarch64_sve_fminnm_single_x4:
6216 Node->getValueType(0),
6217 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6218 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6219 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6220 return;
6221 case Intrinsic::aarch64_sve_fmaxnm_x2:
6223 Node->getValueType(0),
6224 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6225 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6226 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6227 return;
6228 case Intrinsic::aarch64_sve_fmaxnm_x4:
6230 Node->getValueType(0),
6231 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6232 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6233 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6234 return;
6235 case Intrinsic::aarch64_sve_fminnm_x2:
6237 Node->getValueType(0),
6238 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6239 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6240 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6241 return;
6242 case Intrinsic::aarch64_sve_fminnm_x4:
6244 Node->getValueType(0),
6245 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6246 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6247 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6248 return;
6249 case Intrinsic::aarch64_sve_fcvtzs_x2:
6250 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6251 return;
6252 case Intrinsic::aarch64_sve_scvtf_x2:
6253 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6254 return;
6255 case Intrinsic::aarch64_sve_fcvtzu_x2:
6256 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6257 return;
6258 case Intrinsic::aarch64_sve_ucvtf_x2:
6259 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6260 return;
6261 case Intrinsic::aarch64_sve_fcvtzs_x4:
6262 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6263 return;
6264 case Intrinsic::aarch64_sve_scvtf_x4:
6265 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6266 return;
6267 case Intrinsic::aarch64_sve_fcvtzu_x4:
6268 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6269 return;
6270 case Intrinsic::aarch64_sve_ucvtf_x4:
6271 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6272 return;
6273 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6274 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6275 return;
6276 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6277 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6278 return;
6279 case Intrinsic::aarch64_sve_sclamp_single_x2:
6281 Node->getValueType(0),
6282 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6283 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6284 SelectClamp(Node, 2, Op);
6285 return;
6286 case Intrinsic::aarch64_sve_uclamp_single_x2:
6288 Node->getValueType(0),
6289 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6290 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6291 SelectClamp(Node, 2, Op);
6292 return;
6293 case Intrinsic::aarch64_sve_fclamp_single_x2:
6295 Node->getValueType(0),
6296 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6297 AArch64::FCLAMP_VG2_2Z2Z_D}))
6298 SelectClamp(Node, 2, Op);
6299 return;
6300 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6301 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6302 return;
6303 case Intrinsic::aarch64_sve_sclamp_single_x4:
6305 Node->getValueType(0),
6306 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6307 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6308 SelectClamp(Node, 4, Op);
6309 return;
6310 case Intrinsic::aarch64_sve_uclamp_single_x4:
6312 Node->getValueType(0),
6313 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6314 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6315 SelectClamp(Node, 4, Op);
6316 return;
6317 case Intrinsic::aarch64_sve_fclamp_single_x4:
6319 Node->getValueType(0),
6320 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6321 AArch64::FCLAMP_VG4_4Z4Z_D}))
6322 SelectClamp(Node, 4, Op);
6323 return;
6324 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6325 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6326 return;
6327 case Intrinsic::aarch64_sve_add_single_x2:
6329 Node->getValueType(0),
6330 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6331 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6332 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6333 return;
6334 case Intrinsic::aarch64_sve_add_single_x4:
6336 Node->getValueType(0),
6337 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6338 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6339 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6340 return;
6341 case Intrinsic::aarch64_sve_zip_x2:
6343 Node->getValueType(0),
6344 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6345 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6346 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6347 return;
6348 case Intrinsic::aarch64_sve_zipq_x2:
6349 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6350 AArch64::ZIP_VG2_2ZZZ_Q);
6351 return;
6352 case Intrinsic::aarch64_sve_zip_x4:
6354 Node->getValueType(0),
6355 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6356 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6357 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6358 return;
6359 case Intrinsic::aarch64_sve_zipq_x4:
6360 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6361 AArch64::ZIP_VG4_4Z4Z_Q);
6362 return;
6363 case Intrinsic::aarch64_sve_uzp_x2:
6365 Node->getValueType(0),
6366 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6367 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6368 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6369 return;
6370 case Intrinsic::aarch64_sve_uzpq_x2:
6371 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6372 AArch64::UZP_VG2_2ZZZ_Q);
6373 return;
6374 case Intrinsic::aarch64_sve_uzp_x4:
6376 Node->getValueType(0),
6377 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6378 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6379 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6380 return;
6381 case Intrinsic::aarch64_sve_uzpq_x4:
6382 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6383 AArch64::UZP_VG4_4Z4Z_Q);
6384 return;
6385 case Intrinsic::aarch64_sve_sel_x2:
6387 Node->getValueType(0),
6388 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6389 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6390 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6391 return;
6392 case Intrinsic::aarch64_sve_sel_x4:
6394 Node->getValueType(0),
6395 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6396 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6397 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6398 return;
6399 case Intrinsic::aarch64_sve_frinta_x2:
6400 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6401 return;
6402 case Intrinsic::aarch64_sve_frinta_x4:
6403 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6404 return;
6405 case Intrinsic::aarch64_sve_frintm_x2:
6406 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6407 return;
6408 case Intrinsic::aarch64_sve_frintm_x4:
6409 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6410 return;
6411 case Intrinsic::aarch64_sve_frintn_x2:
6412 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6413 return;
6414 case Intrinsic::aarch64_sve_frintn_x4:
6415 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6416 return;
6417 case Intrinsic::aarch64_sve_frintp_x2:
6418 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6419 return;
6420 case Intrinsic::aarch64_sve_frintp_x4:
6421 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6422 return;
6423 case Intrinsic::aarch64_sve_sunpk_x2:
6425 Node->getValueType(0),
6426 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6427 AArch64::SUNPK_VG2_2ZZ_D}))
6428 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6429 return;
6430 case Intrinsic::aarch64_sve_uunpk_x2:
6432 Node->getValueType(0),
6433 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6434 AArch64::UUNPK_VG2_2ZZ_D}))
6435 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6436 return;
6437 case Intrinsic::aarch64_sve_sunpk_x4:
6439 Node->getValueType(0),
6440 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6441 AArch64::SUNPK_VG4_4Z2Z_D}))
6442 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6443 return;
6444 case Intrinsic::aarch64_sve_uunpk_x4:
6446 Node->getValueType(0),
6447 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6448 AArch64::UUNPK_VG4_4Z2Z_D}))
6449 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6450 return;
6451 case Intrinsic::aarch64_sve_pext_x2: {
6453 Node->getValueType(0),
6454 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6455 AArch64::PEXT_2PCI_D}))
6456 SelectPExtPair(Node, Op);
6457 return;
6458 }
6459 }
6460 break;
6461 }
6462 case ISD::INTRINSIC_VOID: {
6463 unsigned IntNo = Node->getConstantOperandVal(1);
6464 if (Node->getNumOperands() >= 3)
6465 VT = Node->getOperand(2)->getValueType(0);
6466 switch (IntNo) {
6467 default:
6468 break;
6469 case Intrinsic::aarch64_neon_st1x2: {
6470 if (VT == MVT::v8i8) {
6471 SelectStore(Node, 2, AArch64::ST1Twov8b);
6472 return;
6473 } else if (VT == MVT::v16i8) {
6474 SelectStore(Node, 2, AArch64::ST1Twov16b);
6475 return;
6476 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6477 VT == MVT::v4bf16) {
6478 SelectStore(Node, 2, AArch64::ST1Twov4h);
6479 return;
6480 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6481 VT == MVT::v8bf16) {
6482 SelectStore(Node, 2, AArch64::ST1Twov8h);
6483 return;
6484 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6485 SelectStore(Node, 2, AArch64::ST1Twov2s);
6486 return;
6487 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6488 SelectStore(Node, 2, AArch64::ST1Twov4s);
6489 return;
6490 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6491 SelectStore(Node, 2, AArch64::ST1Twov2d);
6492 return;
6493 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6494 SelectStore(Node, 2, AArch64::ST1Twov1d);
6495 return;
6496 }
6497 break;
6498 }
6499 case Intrinsic::aarch64_neon_st1x3: {
6500 if (VT == MVT::v8i8) {
6501 SelectStore(Node, 3, AArch64::ST1Threev8b);
6502 return;
6503 } else if (VT == MVT::v16i8) {
6504 SelectStore(Node, 3, AArch64::ST1Threev16b);
6505 return;
6506 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6507 VT == MVT::v4bf16) {
6508 SelectStore(Node, 3, AArch64::ST1Threev4h);
6509 return;
6510 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6511 VT == MVT::v8bf16) {
6512 SelectStore(Node, 3, AArch64::ST1Threev8h);
6513 return;
6514 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6515 SelectStore(Node, 3, AArch64::ST1Threev2s);
6516 return;
6517 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6518 SelectStore(Node, 3, AArch64::ST1Threev4s);
6519 return;
6520 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6521 SelectStore(Node, 3, AArch64::ST1Threev2d);
6522 return;
6523 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6524 SelectStore(Node, 3, AArch64::ST1Threev1d);
6525 return;
6526 }
6527 break;
6528 }
6529 case Intrinsic::aarch64_neon_st1x4: {
6530 if (VT == MVT::v8i8) {
6531 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6532 return;
6533 } else if (VT == MVT::v16i8) {
6534 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6535 return;
6536 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6537 VT == MVT::v4bf16) {
6538 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6539 return;
6540 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6541 VT == MVT::v8bf16) {
6542 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6543 return;
6544 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6545 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6546 return;
6547 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6548 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6549 return;
6550 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6551 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6552 return;
6553 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6554 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6555 return;
6556 }
6557 break;
6558 }
6559 case Intrinsic::aarch64_neon_st2: {
6560 if (VT == MVT::v8i8) {
6561 SelectStore(Node, 2, AArch64::ST2Twov8b);
6562 return;
6563 } else if (VT == MVT::v16i8) {
6564 SelectStore(Node, 2, AArch64::ST2Twov16b);
6565 return;
6566 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6567 VT == MVT::v4bf16) {
6568 SelectStore(Node, 2, AArch64::ST2Twov4h);
6569 return;
6570 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6571 VT == MVT::v8bf16) {
6572 SelectStore(Node, 2, AArch64::ST2Twov8h);
6573 return;
6574 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6575 SelectStore(Node, 2, AArch64::ST2Twov2s);
6576 return;
6577 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6578 SelectStore(Node, 2, AArch64::ST2Twov4s);
6579 return;
6580 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6581 SelectStore(Node, 2, AArch64::ST2Twov2d);
6582 return;
6583 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6584 SelectStore(Node, 2, AArch64::ST1Twov1d);
6585 return;
6586 }
6587 break;
6588 }
6589 case Intrinsic::aarch64_neon_st3: {
6590 if (VT == MVT::v8i8) {
6591 SelectStore(Node, 3, AArch64::ST3Threev8b);
6592 return;
6593 } else if (VT == MVT::v16i8) {
6594 SelectStore(Node, 3, AArch64::ST3Threev16b);
6595 return;
6596 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6597 VT == MVT::v4bf16) {
6598 SelectStore(Node, 3, AArch64::ST3Threev4h);
6599 return;
6600 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6601 VT == MVT::v8bf16) {
6602 SelectStore(Node, 3, AArch64::ST3Threev8h);
6603 return;
6604 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6605 SelectStore(Node, 3, AArch64::ST3Threev2s);
6606 return;
6607 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6608 SelectStore(Node, 3, AArch64::ST3Threev4s);
6609 return;
6610 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6611 SelectStore(Node, 3, AArch64::ST3Threev2d);
6612 return;
6613 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6614 SelectStore(Node, 3, AArch64::ST1Threev1d);
6615 return;
6616 }
6617 break;
6618 }
6619 case Intrinsic::aarch64_neon_st4: {
6620 if (VT == MVT::v8i8) {
6621 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6622 return;
6623 } else if (VT == MVT::v16i8) {
6624 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6625 return;
6626 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6627 VT == MVT::v4bf16) {
6628 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6629 return;
6630 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6631 VT == MVT::v8bf16) {
6632 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6633 return;
6634 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6635 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6636 return;
6637 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6638 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6639 return;
6640 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6641 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6642 return;
6643 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6644 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6645 return;
6646 }
6647 break;
6648 }
6649 case Intrinsic::aarch64_neon_st2lane: {
6650 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6651 SelectStoreLane(Node, 2, AArch64::ST2i8);
6652 return;
6653 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6654 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6655 SelectStoreLane(Node, 2, AArch64::ST2i16);
6656 return;
6657 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6658 VT == MVT::v2f32) {
6659 SelectStoreLane(Node, 2, AArch64::ST2i32);
6660 return;
6661 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6662 VT == MVT::v1f64) {
6663 SelectStoreLane(Node, 2, AArch64::ST2i64);
6664 return;
6665 }
6666 break;
6667 }
6668 case Intrinsic::aarch64_neon_st3lane: {
6669 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6670 SelectStoreLane(Node, 3, AArch64::ST3i8);
6671 return;
6672 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6673 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6674 SelectStoreLane(Node, 3, AArch64::ST3i16);
6675 return;
6676 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6677 VT == MVT::v2f32) {
6678 SelectStoreLane(Node, 3, AArch64::ST3i32);
6679 return;
6680 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6681 VT == MVT::v1f64) {
6682 SelectStoreLane(Node, 3, AArch64::ST3i64);
6683 return;
6684 }
6685 break;
6686 }
6687 case Intrinsic::aarch64_neon_st4lane: {
6688 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6689 SelectStoreLane(Node, 4, AArch64::ST4i8);
6690 return;
6691 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6692 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6693 SelectStoreLane(Node, 4, AArch64::ST4i16);
6694 return;
6695 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6696 VT == MVT::v2f32) {
6697 SelectStoreLane(Node, 4, AArch64::ST4i32);
6698 return;
6699 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6700 VT == MVT::v1f64) {
6701 SelectStoreLane(Node, 4, AArch64::ST4i64);
6702 return;
6703 }
6704 break;
6705 }
6706 case Intrinsic::aarch64_sve_st2q: {
6707 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6708 return;
6709 }
6710 case Intrinsic::aarch64_sve_st3q: {
6711 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6712 return;
6713 }
6714 case Intrinsic::aarch64_sve_st4q: {
6715 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6716 return;
6717 }
6718 case Intrinsic::aarch64_sve_st2: {
6719 if (VT == MVT::nxv16i8) {
6720 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6721 return;
6722 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6723 VT == MVT::nxv8bf16) {
6724 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6725 return;
6726 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6727 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6728 return;
6729 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6730 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6731 return;
6732 }
6733 break;
6734 }
6735 case Intrinsic::aarch64_sve_st3: {
6736 if (VT == MVT::nxv16i8) {
6737 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6738 return;
6739 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6740 VT == MVT::nxv8bf16) {
6741 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6742 return;
6743 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6744 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6745 return;
6746 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6747 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6748 return;
6749 }
6750 break;
6751 }
6752 case Intrinsic::aarch64_sve_st4: {
6753 if (VT == MVT::nxv16i8) {
6754 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6755 return;
6756 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6757 VT == MVT::nxv8bf16) {
6758 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6759 return;
6760 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6761 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6762 return;
6763 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6764 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6765 return;
6766 }
6767 break;
6768 }
6769 }
6770 break;
6771 }
6772 case AArch64ISD::LD2post: {
6773 if (VT == MVT::v8i8) {
6774 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6775 return;
6776 } else if (VT == MVT::v16i8) {
6777 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6778 return;
6779 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6780 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6781 return;
6782 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6783 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6784 return;
6785 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6786 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6787 return;
6788 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6789 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6790 return;
6791 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6792 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6793 return;
6794 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6795 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6796 return;
6797 }
6798 break;
6799 }
6800 case AArch64ISD::LD3post: {
6801 if (VT == MVT::v8i8) {
6802 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6803 return;
6804 } else if (VT == MVT::v16i8) {
6805 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6806 return;
6807 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6808 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6809 return;
6810 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6811 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6812 return;
6813 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6814 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6815 return;
6816 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6817 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6818 return;
6819 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6820 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6821 return;
6822 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6823 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6824 return;
6825 }
6826 break;
6827 }
6828 case AArch64ISD::LD4post: {
6829 if (VT == MVT::v8i8) {
6830 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6831 return;
6832 } else if (VT == MVT::v16i8) {
6833 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6834 return;
6835 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6836 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6837 return;
6838 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6839 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6840 return;
6841 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6842 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6843 return;
6844 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6845 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6846 return;
6847 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6848 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6849 return;
6850 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6851 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6852 return;
6853 }
6854 break;
6855 }
6856 case AArch64ISD::LD1x2post: {
6857 if (VT == MVT::v8i8) {
6858 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6859 return;
6860 } else if (VT == MVT::v16i8) {
6861 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6862 return;
6863 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6864 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6865 return;
6866 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6867 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6868 return;
6869 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6870 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6871 return;
6872 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6873 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6874 return;
6875 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6876 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6877 return;
6878 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6879 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6880 return;
6881 }
6882 break;
6883 }
6884 case AArch64ISD::LD1x3post: {
6885 if (VT == MVT::v8i8) {
6886 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6887 return;
6888 } else if (VT == MVT::v16i8) {
6889 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6890 return;
6891 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6892 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6893 return;
6894 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6895 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6896 return;
6897 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6898 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6899 return;
6900 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6901 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6902 return;
6903 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6904 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6905 return;
6906 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6907 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6908 return;
6909 }
6910 break;
6911 }
6912 case AArch64ISD::LD1x4post: {
6913 if (VT == MVT::v8i8) {
6914 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6915 return;
6916 } else if (VT == MVT::v16i8) {
6917 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6918 return;
6919 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6920 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6921 return;
6922 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6923 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6924 return;
6925 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6926 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6927 return;
6928 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6929 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6930 return;
6931 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6932 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6933 return;
6934 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6935 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6936 return;
6937 }
6938 break;
6939 }
6940 case AArch64ISD::LD1DUPpost: {
6941 if (VT == MVT::v8i8) {
6942 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6943 return;
6944 } else if (VT == MVT::v16i8) {
6945 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6946 return;
6947 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6948 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6949 return;
6950 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6951 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6952 return;
6953 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6954 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6955 return;
6956 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6957 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6958 return;
6959 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6960 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6961 return;
6962 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6963 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6964 return;
6965 }
6966 break;
6967 }
6968 case AArch64ISD::LD2DUPpost: {
6969 if (VT == MVT::v8i8) {
6970 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6971 return;
6972 } else if (VT == MVT::v16i8) {
6973 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6974 return;
6975 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6976 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6977 return;
6978 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6979 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6980 return;
6981 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6982 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6983 return;
6984 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6985 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6986 return;
6987 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6988 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6989 return;
6990 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6991 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6992 return;
6993 }
6994 break;
6995 }
6996 case AArch64ISD::LD3DUPpost: {
6997 if (VT == MVT::v8i8) {
6998 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6999 return;
7000 } else if (VT == MVT::v16i8) {
7001 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7002 return;
7003 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7004 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7005 return;
7006 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7007 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7008 return;
7009 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7010 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7011 return;
7012 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7013 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7014 return;
7015 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7016 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7017 return;
7018 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7019 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7020 return;
7021 }
7022 break;
7023 }
7024 case AArch64ISD::LD4DUPpost: {
7025 if (VT == MVT::v8i8) {
7026 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7027 return;
7028 } else if (VT == MVT::v16i8) {
7029 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7030 return;
7031 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7032 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7033 return;
7034 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7035 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7036 return;
7037 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7038 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7039 return;
7040 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7041 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7042 return;
7043 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7044 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7045 return;
7046 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7047 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7048 return;
7049 }
7050 break;
7051 }
7052 case AArch64ISD::LD1LANEpost: {
7053 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7054 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7055 return;
7056 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7057 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7058 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7059 return;
7060 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7061 VT == MVT::v2f32) {
7062 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7063 return;
7064 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7065 VT == MVT::v1f64) {
7066 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7067 return;
7068 }
7069 break;
7070 }
7071 case AArch64ISD::LD2LANEpost: {
7072 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7073 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7074 return;
7075 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7076 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7077 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7078 return;
7079 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7080 VT == MVT::v2f32) {
7081 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7082 return;
7083 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7084 VT == MVT::v1f64) {
7085 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7086 return;
7087 }
7088 break;
7089 }
7090 case AArch64ISD::LD3LANEpost: {
7091 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7092 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7093 return;
7094 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7095 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7096 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7097 return;
7098 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7099 VT == MVT::v2f32) {
7100 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7101 return;
7102 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7103 VT == MVT::v1f64) {
7104 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7105 return;
7106 }
7107 break;
7108 }
7109 case AArch64ISD::LD4LANEpost: {
7110 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7111 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7112 return;
7113 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7114 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7115 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7116 return;
7117 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7118 VT == MVT::v2f32) {
7119 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7120 return;
7121 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7122 VT == MVT::v1f64) {
7123 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7124 return;
7125 }
7126 break;
7127 }
7128 case AArch64ISD::ST2post: {
7129 VT = Node->getOperand(1).getValueType();
7130 if (VT == MVT::v8i8) {
7131 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7132 return;
7133 } else if (VT == MVT::v16i8) {
7134 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7135 return;
7136 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7137 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7138 return;
7139 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7140 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7141 return;
7142 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7143 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7144 return;
7145 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7146 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7147 return;
7148 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7149 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7150 return;
7151 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7152 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7153 return;
7154 }
7155 break;
7156 }
7157 case AArch64ISD::ST3post: {
7158 VT = Node->getOperand(1).getValueType();
7159 if (VT == MVT::v8i8) {
7160 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7161 return;
7162 } else if (VT == MVT::v16i8) {
7163 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7164 return;
7165 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7166 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7167 return;
7168 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7169 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7170 return;
7171 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7172 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7173 return;
7174 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7175 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7176 return;
7177 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7178 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7179 return;
7180 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7181 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7182 return;
7183 }
7184 break;
7185 }
7186 case AArch64ISD::ST4post: {
7187 VT = Node->getOperand(1).getValueType();
7188 if (VT == MVT::v8i8) {
7189 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7190 return;
7191 } else if (VT == MVT::v16i8) {
7192 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7193 return;
7194 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7195 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7196 return;
7197 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7198 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7199 return;
7200 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7201 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7202 return;
7203 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7204 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7205 return;
7206 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7207 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7208 return;
7209 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7210 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7211 return;
7212 }
7213 break;
7214 }
7215 case AArch64ISD::ST1x2post: {
7216 VT = Node->getOperand(1).getValueType();
7217 if (VT == MVT::v8i8) {
7218 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7219 return;
7220 } else if (VT == MVT::v16i8) {
7221 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7222 return;
7223 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7224 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7225 return;
7226 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7227 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7228 return;
7229 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7230 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7231 return;
7232 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7233 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7234 return;
7235 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7236 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7237 return;
7238 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7239 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7240 return;
7241 }
7242 break;
7243 }
7244 case AArch64ISD::ST1x3post: {
7245 VT = Node->getOperand(1).getValueType();
7246 if (VT == MVT::v8i8) {
7247 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7248 return;
7249 } else if (VT == MVT::v16i8) {
7250 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7251 return;
7252 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7253 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7254 return;
7255 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7256 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7257 return;
7258 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7259 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7260 return;
7261 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7262 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7263 return;
7264 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7265 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7266 return;
7267 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7268 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7269 return;
7270 }
7271 break;
7272 }
7273 case AArch64ISD::ST1x4post: {
7274 VT = Node->getOperand(1).getValueType();
7275 if (VT == MVT::v8i8) {
7276 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7277 return;
7278 } else if (VT == MVT::v16i8) {
7279 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7280 return;
7281 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7282 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7283 return;
7284 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7285 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7286 return;
7287 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7288 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7289 return;
7290 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7291 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7292 return;
7293 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7294 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7295 return;
7296 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7297 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7298 return;
7299 }
7300 break;
7301 }
7302 case AArch64ISD::ST2LANEpost: {
7303 VT = Node->getOperand(1).getValueType();
7304 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7305 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7306 return;
7307 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7308 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7309 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7310 return;
7311 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7312 VT == MVT::v2f32) {
7313 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7314 return;
7315 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7316 VT == MVT::v1f64) {
7317 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7318 return;
7319 }
7320 break;
7321 }
7322 case AArch64ISD::ST3LANEpost: {
7323 VT = Node->getOperand(1).getValueType();
7324 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7325 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7326 return;
7327 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7328 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7329 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7330 return;
7331 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7332 VT == MVT::v2f32) {
7333 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7334 return;
7335 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7336 VT == MVT::v1f64) {
7337 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7338 return;
7339 }
7340 break;
7341 }
7342 case AArch64ISD::ST4LANEpost: {
7343 VT = Node->getOperand(1).getValueType();
7344 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7345 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7346 return;
7347 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7348 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7349 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7350 return;
7351 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7352 VT == MVT::v2f32) {
7353 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7354 return;
7355 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7356 VT == MVT::v1f64) {
7357 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7358 return;
7359 }
7360 break;
7361 }
7362 }
7363
7364 // Select the default instruction
7365 SelectCode(Node);
7366}
7367
7368/// createAArch64ISelDag - This pass converts a legalized DAG into a
7369/// AArch64-specific DAG, ready for instruction scheduling.
7371 CodeGenOptLevel OptLevel) {
7372 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7373}
7374
7375/// When \p PredVT is a scalable vector predicate in the form
7376/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7377/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7378/// structured vectors (NumVec >1), the output data type is
7379/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7380/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7381/// EVT.
7383 unsigned NumVec) {
7384 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7385 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7386 return EVT();
7387
7388 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7389 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7390 return EVT();
7391
7392 ElementCount EC = PredVT.getVectorElementCount();
7393 EVT ScalarVT =
7394 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7395 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7396
7397 return MemVT;
7398}
7399
7400/// Return the EVT of the data associated to a memory operation in \p
7401/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7403 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7404 return MemIntr->getMemoryVT();
7405
7406 if (isa<MemSDNode>(Root)) {
7407 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7408
7409 EVT DataVT;
7410 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7411 DataVT = Load->getValueType(0);
7412 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7413 DataVT = Load->getValueType(0);
7414 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7415 DataVT = Store->getValue().getValueType();
7416 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7417 DataVT = Store->getValue().getValueType();
7418 else
7419 llvm_unreachable("Unexpected MemSDNode!");
7420
7421 return DataVT.changeVectorElementType(MemVT.getVectorElementType());
7422 }
7423
7424 const unsigned Opcode = Root->getOpcode();
7425 // For custom ISD nodes, we have to look at them individually to extract the
7426 // type of the data moved to/from memory.
7427 switch (Opcode) {
7428 case AArch64ISD::LD1_MERGE_ZERO:
7429 case AArch64ISD::LD1S_MERGE_ZERO:
7430 case AArch64ISD::LDNF1_MERGE_ZERO:
7431 case AArch64ISD::LDNF1S_MERGE_ZERO:
7432 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7433 case AArch64ISD::ST1_PRED:
7434 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7435 default:
7436 break;
7437 }
7438
7439 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7440 return EVT();
7441
7442 switch (Root->getConstantOperandVal(1)) {
7443 default:
7444 return EVT();
7445 case Intrinsic::aarch64_sme_ldr:
7446 case Intrinsic::aarch64_sme_str:
7447 return MVT::nxv16i8;
7448 case Intrinsic::aarch64_sve_prf:
7449 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7450 // width of the predicate.
7452 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7453 case Intrinsic::aarch64_sve_ld2_sret:
7454 case Intrinsic::aarch64_sve_ld2q_sret:
7456 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7457 case Intrinsic::aarch64_sve_st2q:
7459 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7460 case Intrinsic::aarch64_sve_ld3_sret:
7461 case Intrinsic::aarch64_sve_ld3q_sret:
7463 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7464 case Intrinsic::aarch64_sve_st3q:
7466 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7467 case Intrinsic::aarch64_sve_ld4_sret:
7468 case Intrinsic::aarch64_sve_ld4q_sret:
7470 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7471 case Intrinsic::aarch64_sve_st4q:
7473 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7474 case Intrinsic::aarch64_sve_ld1udq:
7475 case Intrinsic::aarch64_sve_st1dq:
7476 return EVT(MVT::nxv1i64);
7477 case Intrinsic::aarch64_sve_ld1uwq:
7478 case Intrinsic::aarch64_sve_st1wq:
7479 return EVT(MVT::nxv1i32);
7480 }
7481}
7482
7483/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7484/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7485/// where Root is the memory access using N for its address.
7486template <int64_t Min, int64_t Max>
7487bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7488 SDValue &Base,
7489 SDValue &OffImm) {
7490 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7491 const DataLayout &DL = CurDAG->getDataLayout();
7492 const MachineFrameInfo &MFI = MF->getFrameInfo();
7493
7494 if (N.getOpcode() == ISD::FrameIndex) {
7495 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7496 // We can only encode VL scaled offsets, so only fold in frame indexes
7497 // referencing SVE objects.
7499 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7500 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7501 return true;
7502 }
7503
7504 return false;
7505 }
7506
7507 if (MemVT == EVT())
7508 return false;
7509
7510 if (N.getOpcode() != ISD::ADD)
7511 return false;
7512
7513 SDValue VScale = N.getOperand(1);
7514 int64_t MulImm = std::numeric_limits<int64_t>::max();
7515 if (VScale.getOpcode() == ISD::VSCALE) {
7516 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7517 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7518 int64_t ByteOffset = C->getSExtValue();
7519 const auto KnownVScale =
7521
7522 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7523 return false;
7524
7525 MulImm = ByteOffset / KnownVScale;
7526 } else
7527 return false;
7528
7529 TypeSize TS = MemVT.getSizeInBits();
7530 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7531
7532 if ((MulImm % MemWidthBytes) != 0)
7533 return false;
7534
7535 int64_t Offset = MulImm / MemWidthBytes;
7537 return false;
7538
7539 Base = N.getOperand(0);
7540 if (Base.getOpcode() == ISD::FrameIndex) {
7541 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7542 // We can only encode VL scaled offsets, so only fold in frame indexes
7543 // referencing SVE objects.
7545 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7546 }
7547
7548 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7549 return true;
7550}
7551
7552/// Select register plus register addressing mode for SVE, with scaled
7553/// offset.
7554bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7555 SDValue &Base,
7556 SDValue &Offset) {
7557 if (N.getOpcode() != ISD::ADD)
7558 return false;
7559
7560 // Process an ADD node.
7561 const SDValue LHS = N.getOperand(0);
7562 const SDValue RHS = N.getOperand(1);
7563
7564 // 8 bit data does not come with the SHL node, so it is treated
7565 // separately.
7566 if (Scale == 0) {
7567 Base = LHS;
7568 Offset = RHS;
7569 return true;
7570 }
7571
7572 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7573 int64_t ImmOff = C->getSExtValue();
7574 unsigned Size = 1 << Scale;
7575
7576 // To use the reg+reg addressing mode, the immediate must be a multiple of
7577 // the vector element's byte size.
7578 if (ImmOff % Size)
7579 return false;
7580
7581 SDLoc DL(N);
7582 Base = LHS;
7583 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7584 SDValue Ops[] = {Offset};
7585 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7586 Offset = SDValue(MI, 0);
7587 return true;
7588 }
7589
7590 // Check if the RHS is a shift node with a constant.
7591 if (RHS.getOpcode() != ISD::SHL)
7592 return false;
7593
7594 const SDValue ShiftRHS = RHS.getOperand(1);
7595 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7596 if (C->getZExtValue() == Scale) {
7597 Base = LHS;
7598 Offset = RHS.getOperand(0);
7599 return true;
7600 }
7601
7602 return false;
7603}
7604
7605bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7606 const AArch64TargetLowering *TLI =
7607 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7608
7609 return TLI->isAllActivePredicate(*CurDAG, N);
7610}
7611
7612bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7613 EVT VT = N.getValueType();
7614 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7615}
7616
7617bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7619 unsigned Scale) {
7620 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7621 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7622 int64_t ImmOff = C->getSExtValue();
7623 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7624 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7625 }
7626 return SDValue();
7627 };
7628
7629 if (SDValue C = MatchConstantOffset(N)) {
7630 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7631 Offset = C;
7632 return true;
7633 }
7634
7635 // Try to untangle an ADD node into a 'reg + offset'
7636 if (CurDAG->isBaseWithConstantOffset(N)) {
7637 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7638 Base = N.getOperand(0);
7639 Offset = C;
7640 return true;
7641 }
7642 }
7643
7644 // By default, just match reg + 0.
7645 Base = N;
7646 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7647 return true;
7648}
7649
7650bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7651 SDValue &Imm) {
7653 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7654 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7655 // Check conservatively if the immediate fits the valid range [0, 64).
7656 // Immediate variants for GE and HS definitely need to be decremented
7657 // when lowering the pseudos later, so an immediate of 1 would become 0.
7658 // For the inverse conditions LT and LO we don't know for sure if they
7659 // will need a decrement but should the decision be made to reverse the
7660 // branch condition, we again end up with the need to decrement.
7661 // The same argument holds for LE, LS, GT and HI and possibly
7662 // incremented immediates. This can lead to slightly less optimal
7663 // codegen, e.g. we never codegen the legal case
7664 // cblt w0, #63, A
7665 // because we could end up with the illegal case
7666 // cbge w0, #64, B
7667 // should the decision to reverse the branch direction be made. For the
7668 // lower bound cases this is no problem since we can express comparisons
7669 // against 0 with either tbz/tnbz or using wzr/xzr.
7670 uint64_t LowerBound = 0, UpperBound = 64;
7671 switch (CC) {
7672 case AArch64CC::GE:
7673 case AArch64CC::HS:
7674 case AArch64CC::LT:
7675 case AArch64CC::LO:
7676 LowerBound = 1;
7677 break;
7678 case AArch64CC::LE:
7679 case AArch64CC::LS:
7680 case AArch64CC::GT:
7681 case AArch64CC::HI:
7682 UpperBound = 63;
7683 break;
7684 default:
7685 break;
7686 }
7687
7688 if (CN->getAPIntValue().uge(LowerBound) &&
7689 CN->getAPIntValue().ult(UpperBound)) {
7690 SDLoc DL(N);
7691 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7692 return true;
7693 }
7694 }
7695
7696 return false;
7697}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5999
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1452
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:279
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:276
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1948
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.