LLVM 22.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
67 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 template <signed Low, signed High>
75 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
76
77 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
78 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
80 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
82 return SelectShiftedRegister(N, false, Reg, Shift);
83 }
84 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
85 return SelectShiftedRegister(N, true, Reg, Shift);
86 }
87 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
88 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
89 }
90 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
91 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
92 }
93 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
94 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
95 }
96 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
97 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
98 }
99 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
100 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
101 }
102 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
103 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
104 }
105 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
106 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
107 }
108 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
109 return SelectAddrModeIndexed(N, 1, Base, OffImm);
110 }
111 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
112 return SelectAddrModeIndexed(N, 2, Base, OffImm);
113 }
114 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
115 return SelectAddrModeIndexed(N, 4, Base, OffImm);
116 }
117 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
118 return SelectAddrModeIndexed(N, 8, Base, OffImm);
119 }
120 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
121 return SelectAddrModeIndexed(N, 16, Base, OffImm);
122 }
123 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
124 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
125 }
126 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
127 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
128 }
129 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
130 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
131 }
132 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
133 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
134 }
135 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
136 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
137 }
138 template <unsigned Size, unsigned Max>
139 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
140 // Test if there is an appropriate addressing mode and check if the
141 // immediate fits.
142 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
143 if (Found) {
144 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
145 int64_t C = CI->getSExtValue();
146 if (C <= Max)
147 return true;
148 }
149 }
150
151 // Otherwise, base only, materialize address in register.
152 Base = N;
153 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
154 return true;
155 }
156
157 template<int Width>
158 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
159 SDValue &SignExtend, SDValue &DoShift) {
160 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
161 }
162
163 template<int Width>
164 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
165 SDValue &SignExtend, SDValue &DoShift) {
166 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
167 }
168
169 bool SelectExtractHigh(SDValue N, SDValue &Res) {
170 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
171 N = N->getOperand(0);
172 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
173 !isa<ConstantSDNode>(N->getOperand(1)))
174 return false;
175 EVT VT = N->getValueType(0);
176 EVT LVT = N->getOperand(0).getValueType();
177 unsigned Index = N->getConstantOperandVal(1);
178 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
179 Index != VT.getVectorNumElements())
180 return false;
181 Res = N->getOperand(0);
182 return true;
183 }
184
185 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
186 if (N.getOpcode() != AArch64ISD::VLSHR)
187 return false;
188 SDValue Op = N->getOperand(0);
189 EVT VT = Op.getValueType();
190 unsigned ShtAmt = N->getConstantOperandVal(1);
191 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
192 return false;
193
194 APInt Imm;
195 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
196 Imm = APInt(VT.getScalarSizeInBits(),
197 Op.getOperand(1).getConstantOperandVal(0)
198 << Op.getOperand(1).getConstantOperandVal(1));
199 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
200 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
201 Imm = APInt(VT.getScalarSizeInBits(),
202 Op.getOperand(1).getConstantOperandVal(0));
203 else
204 return false;
205
206 if (Imm != 1ULL << (ShtAmt - 1))
207 return false;
208
209 Res1 = Op.getOperand(0);
210 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
211 return true;
212 }
213
214 bool SelectDupZeroOrUndef(SDValue N) {
215 switch(N->getOpcode()) {
216 case ISD::UNDEF:
217 return true;
218 case AArch64ISD::DUP:
219 case ISD::SPLAT_VECTOR: {
220 auto Opnd0 = N->getOperand(0);
221 if (isNullConstant(Opnd0))
222 return true;
223 if (isNullFPConstant(Opnd0))
224 return true;
225 break;
226 }
227 default:
228 break;
229 }
230
231 return false;
232 }
233
234 bool SelectAny(SDValue) { return true; }
235
236 bool SelectDupZero(SDValue N) {
237 switch(N->getOpcode()) {
238 case AArch64ISD::DUP:
239 case ISD::SPLAT_VECTOR: {
240 auto Opnd0 = N->getOperand(0);
241 if (isNullConstant(Opnd0))
242 return true;
243 if (isNullFPConstant(Opnd0))
244 return true;
245 break;
246 }
247 }
248
249 return false;
250 }
251
252 template <MVT::SimpleValueType VT, bool Negate>
253 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
254 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
255 }
256
257 template <MVT::SimpleValueType VT, bool Negate>
258 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
259 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
260 }
261
262 template <MVT::SimpleValueType VT>
263 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
264 return SelectSVECpyDupImm(N, VT, Imm, Shift);
265 }
266
267 template <MVT::SimpleValueType VT, bool Invert = false>
268 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
269 return SelectSVELogicalImm(N, VT, Imm, Invert);
270 }
271
272 template <MVT::SimpleValueType VT>
273 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
274 return SelectSVEArithImm(N, VT, Imm);
275 }
276
277 template <unsigned Low, unsigned High, bool AllowSaturation = false>
278 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
279 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
280 }
281
282 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
283 if (N->getOpcode() != ISD::SPLAT_VECTOR)
284 return false;
285
286 EVT EltVT = N->getValueType(0).getVectorElementType();
287 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
288 /* High */ EltVT.getFixedSizeInBits(),
289 /* AllowSaturation */ true, Imm);
290 }
291
292 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
293 template<signed Min, signed Max, signed Scale, bool Shift>
294 bool SelectCntImm(SDValue N, SDValue &Imm) {
296 return false;
297
298 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
299 if (Shift)
300 MulImm = 1LL << MulImm;
301
302 if ((MulImm % std::abs(Scale)) != 0)
303 return false;
304
305 MulImm /= Scale;
306 if ((MulImm >= Min) && (MulImm <= Max)) {
307 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
308 return true;
309 }
310
311 return false;
312 }
313
314 template <signed Max, signed Scale>
315 bool SelectEXTImm(SDValue N, SDValue &Imm) {
317 return false;
318
319 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
320
321 if (MulImm >= 0 && MulImm <= Max) {
322 MulImm *= Scale;
323 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
324 return true;
325 }
326
327 return false;
328 }
329
330 template <unsigned BaseReg, unsigned Max>
331 bool ImmToReg(SDValue N, SDValue &Imm) {
332 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
333 uint64_t C = CI->getZExtValue();
334
335 if (C > Max)
336 return false;
337
338 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
339 return true;
340 }
341 return false;
342 }
343
344 /// Form sequences of consecutive 64/128-bit registers for use in NEON
345 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
346 /// between 1 and 4 elements. If it contains a single element that is returned
347 /// unchanged; otherwise a REG_SEQUENCE value is returned.
350 // Form a sequence of SVE registers for instructions using list of vectors,
351 // e.g. structured loads and stores (ldN, stN).
352 SDValue createZTuple(ArrayRef<SDValue> Vecs);
353
354 // Similar to above, except the register must start at a multiple of the
355 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
356 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
357
358 /// Generic helper for the createDTuple/createQTuple
359 /// functions. Those should almost always be called instead.
360 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
361 const unsigned SubRegs[]);
362
363 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
364
365 bool tryIndexedLoad(SDNode *N);
366
367 void SelectPtrauthAuth(SDNode *N);
368 void SelectPtrauthResign(SDNode *N);
369
370 bool trySelectStackSlotTagP(SDNode *N);
371 void SelectTagP(SDNode *N);
372
373 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
374 unsigned SubRegIdx);
375 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
378 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
380 unsigned Opc_rr, unsigned Opc_ri,
381 bool IsIntr = false);
382 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
383 unsigned Scale, unsigned Opc_ri,
384 unsigned Opc_rr);
385 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
386 bool IsZmMulti, unsigned Opcode,
387 bool HasPred = false);
388 void SelectPExtPair(SDNode *N, unsigned Opc);
389 void SelectWhilePair(SDNode *N, unsigned Opc);
390 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
391 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
394 bool IsTupleInput, unsigned Opc);
395 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
396
397 template <unsigned MaxIdx, unsigned Scale>
398 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
399 unsigned Op);
400 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
401 unsigned Op, unsigned MaxIdx, unsigned Scale,
402 unsigned BaseReg = 0);
403 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
404 /// SVE Reg+Imm addressing mode.
405 template <int64_t Min, int64_t Max>
406 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
407 SDValue &OffImm);
408 /// SVE Reg+Reg address mode.
409 template <unsigned Scale>
410 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
411 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
412 }
413
414 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
415 unsigned Opc, uint32_t MaxImm);
416
417 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
418
419 template <unsigned MaxIdx, unsigned Scale>
420 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
421 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
422 }
423
424 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
425 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
429 unsigned Opc_rr, unsigned Opc_ri);
430 std::tuple<unsigned, SDValue, SDValue>
431 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
432 const SDValue &OldBase, const SDValue &OldOffset,
433 unsigned Scale);
434
435 bool tryBitfieldExtractOp(SDNode *N);
436 bool tryBitfieldExtractOpFromSExt(SDNode *N);
437 bool tryBitfieldInsertOp(SDNode *N);
438 bool tryBitfieldInsertInZeroOp(SDNode *N);
439 bool tryShiftAmountMod(SDNode *N);
440
441 bool tryReadRegister(SDNode *N);
442 bool tryWriteRegister(SDNode *N);
443
444 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
445 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
446
447 bool trySelectXAR(SDNode *N);
448
449// Include the pieces autogenerated from the target description.
450#include "AArch64GenDAGISel.inc"
451
452private:
453 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
454 SDValue &Shift);
455 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
456 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
457 SDValue &OffImm) {
458 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
459 }
460 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
461 unsigned Size, SDValue &Base,
462 SDValue &OffImm);
463 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
464 SDValue &OffImm);
465 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &OffImm);
467 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &Offset, SDValue &SignExtend,
469 SDValue &DoShift);
470 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
471 SDValue &Offset, SDValue &SignExtend,
472 SDValue &DoShift);
473 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
474 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
475 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
476 SDValue &Offset, SDValue &SignExtend);
477
478 template<unsigned RegWidth>
479 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
480 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
481 }
482
483 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485 template<unsigned RegWidth>
486 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
487 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
488 }
489
490 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
491 unsigned Width);
492
493 bool SelectCMP_SWAP(SDNode *N);
494
495 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
496 bool Negate);
497 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
498 bool Negate);
499 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
500 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
501
502 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
503 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
504 bool AllowSaturation, SDValue &Imm);
505
506 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
507 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
508 SDValue &Offset);
509 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
510 SDValue &Offset, unsigned Scale = 1);
511
512 bool SelectAllActivePredicate(SDValue N);
513 bool SelectAnyPredicate(SDValue N);
514
515 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
516};
517
518class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
519public:
520 static char ID;
521 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
522 CodeGenOptLevel OptLevel)
524 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
525};
526} // end anonymous namespace
527
528char AArch64DAGToDAGISelLegacy::ID = 0;
529
530INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
531
532/// isIntImmediate - This method tests to see if the node is a constant
533/// operand. If so Imm will receive the 32-bit value.
534static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
536 Imm = C->getZExtValue();
537 return true;
538 }
539 return false;
540}
541
542// isIntImmediate - This method tests to see if a constant operand.
543// If so Imm will receive the value.
544static bool isIntImmediate(SDValue N, uint64_t &Imm) {
545 return isIntImmediate(N.getNode(), Imm);
546}
547
548// isOpcWithIntImmediate - This method tests to see if the node is a specific
549// opcode and that it has a immediate integer right operand.
550// If so Imm will receive the 32 bit value.
551static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
552 uint64_t &Imm) {
553 return N->getOpcode() == Opc &&
554 isIntImmediate(N->getOperand(1).getNode(), Imm);
555}
556
557// isIntImmediateEq - This method tests to see if N is a constant operand that
558// is equivalent to 'ImmExpected'.
559#ifndef NDEBUG
560static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
561 uint64_t Imm;
562 if (!isIntImmediate(N.getNode(), Imm))
563 return false;
564 return Imm == ImmExpected;
565}
566#endif
567
568bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
569 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
570 std::vector<SDValue> &OutOps) {
571 switch(ConstraintID) {
572 default:
573 llvm_unreachable("Unexpected asm memory constraint");
574 case InlineAsm::ConstraintCode::m:
575 case InlineAsm::ConstraintCode::o:
576 case InlineAsm::ConstraintCode::Q:
577 // We need to make sure that this one operand does not end up in XZR, thus
578 // require the address to be in a PointerRegClass register.
579 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
580 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
581 SDLoc dl(Op);
582 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
583 SDValue NewOp =
584 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
585 dl, Op.getValueType(),
586 Op, RC), 0);
587 OutOps.push_back(NewOp);
588 return false;
589 }
590 return true;
591}
592
593/// SelectArithImmed - Select an immediate value that can be represented as
594/// a 12-bit value shifted left by either 0 or 12. If so, return true with
595/// Val set to the 12-bit value and Shift set to the shifter operand.
596bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
597 SDValue &Shift) {
598 // This function is called from the addsub_shifted_imm ComplexPattern,
599 // which lists [imm] as the list of opcode it's interested in, however
600 // we still need to check whether the operand is actually an immediate
601 // here because the ComplexPattern opcode list is only used in
602 // root-level opcode matching.
603 if (!isa<ConstantSDNode>(N.getNode()))
604 return false;
605
606 uint64_t Immed = N.getNode()->getAsZExtVal();
607 unsigned ShiftAmt;
608
609 if (Immed >> 12 == 0) {
610 ShiftAmt = 0;
611 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
612 ShiftAmt = 12;
613 Immed = Immed >> 12;
614 } else
615 return false;
616
617 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
618 SDLoc dl(N);
619 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
620 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
621 return true;
622}
623
624/// SelectNegArithImmed - As above, but negates the value before trying to
625/// select it.
626bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
627 SDValue &Shift) {
628 // This function is called from the addsub_shifted_imm ComplexPattern,
629 // which lists [imm] as the list of opcode it's interested in, however
630 // we still need to check whether the operand is actually an immediate
631 // here because the ComplexPattern opcode list is only used in
632 // root-level opcode matching.
633 if (!isa<ConstantSDNode>(N.getNode()))
634 return false;
635
636 // The immediate operand must be a 24-bit zero-extended immediate.
637 uint64_t Immed = N.getNode()->getAsZExtVal();
638
639 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
640 // have the opposite effect on the C flag, so this pattern mustn't match under
641 // those circumstances.
642 if (Immed == 0)
643 return false;
644
645 if (N.getValueType() == MVT::i32)
646 Immed = ~((uint32_t)Immed) + 1;
647 else
648 Immed = ~Immed + 1ULL;
649 if (Immed & 0xFFFFFFFFFF000000ULL)
650 return false;
651
652 Immed &= 0xFFFFFFULL;
653 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
654 Shift);
655}
656
657/// getShiftTypeForNode - Translate a shift node to the corresponding
658/// ShiftType value.
660 switch (N.getOpcode()) {
661 default:
663 case ISD::SHL:
664 return AArch64_AM::LSL;
665 case ISD::SRL:
666 return AArch64_AM::LSR;
667 case ISD::SRA:
668 return AArch64_AM::ASR;
669 case ISD::ROTR:
670 return AArch64_AM::ROR;
671 }
672}
673
675 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
676}
677
678/// Determine whether it is worth it to fold SHL into the addressing
679/// mode.
681 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
682 // It is worth folding logical shift of up to three places.
683 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
684 if (!CSD)
685 return false;
686 unsigned ShiftVal = CSD->getZExtValue();
687 if (ShiftVal > 3)
688 return false;
689
690 // Check if this particular node is reused in any non-memory related
691 // operation. If yes, do not try to fold this node into the address
692 // computation, since the computation will be kept.
693 const SDNode *Node = V.getNode();
694 for (SDNode *UI : Node->users())
695 if (!isMemOpOrPrefetch(UI))
696 for (SDNode *UII : UI->users())
697 if (!isMemOpOrPrefetch(UII))
698 return false;
699 return true;
700}
701
702/// Determine whether it is worth to fold V into an extended register addressing
703/// mode.
704bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
705 // Trivial if we are optimizing for code size or if there is only
706 // one use of the value.
707 if (CurDAG->shouldOptForSize() || V.hasOneUse())
708 return true;
709
710 // If a subtarget has a slow shift, folding a shift into multiple loads
711 // costs additional micro-ops.
712 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
713 return false;
714
715 // Check whether we're going to emit the address arithmetic anyway because
716 // it's used by a non-address operation.
717 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
718 return true;
719 if (V.getOpcode() == ISD::ADD) {
720 const SDValue LHS = V.getOperand(0);
721 const SDValue RHS = V.getOperand(1);
722 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
723 return true;
724 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
725 return true;
726 }
727
728 // It hurts otherwise, since the value will be reused.
729 return false;
730}
731
732/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
733/// to select more shifted register
734bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
735 SDValue &Shift) {
736 EVT VT = N.getValueType();
737 if (VT != MVT::i32 && VT != MVT::i64)
738 return false;
739
740 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
741 return false;
742 SDValue LHS = N.getOperand(0);
743 if (!LHS->hasOneUse())
744 return false;
745
746 unsigned LHSOpcode = LHS->getOpcode();
747 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
748 return false;
749
750 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
751 if (!ShiftAmtNode)
752 return false;
753
754 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
755 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
756 if (!RHSC)
757 return false;
758
759 APInt AndMask = RHSC->getAPIntValue();
760 unsigned LowZBits, MaskLen;
761 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
762 return false;
763
764 unsigned BitWidth = N.getValueSizeInBits();
765 SDLoc DL(LHS);
766 uint64_t NewShiftC;
767 unsigned NewShiftOp;
768 if (LHSOpcode == ISD::SHL) {
769 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
770 // BitWidth != LowZBits + MaskLen doesn't match the pattern
771 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
772 return false;
773
774 NewShiftC = LowZBits - ShiftAmtC;
775 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
776 } else {
777 if (LowZBits == 0)
778 return false;
779
780 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
781 NewShiftC = LowZBits + ShiftAmtC;
782 if (NewShiftC >= BitWidth)
783 return false;
784
785 // SRA need all high bits
786 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
787 return false;
788
789 // SRL high bits can be 0 or 1
790 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
791 return false;
792
793 if (LHSOpcode == ISD::SRL)
794 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
795 else
796 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
797 }
798
799 assert(NewShiftC < BitWidth && "Invalid shift amount");
800 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
801 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
802 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
803 NewShiftAmt, BitWidthMinus1),
804 0);
805 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
806 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
807 return true;
808}
809
810/// getExtendTypeForNode - Translate an extend node to the corresponding
811/// ExtendType value.
813getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
814 if (N.getOpcode() == ISD::SIGN_EXTEND ||
815 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
816 EVT SrcVT;
817 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
818 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
819 else
820 SrcVT = N.getOperand(0).getValueType();
821
822 if (!IsLoadStore && SrcVT == MVT::i8)
823 return AArch64_AM::SXTB;
824 else if (!IsLoadStore && SrcVT == MVT::i16)
825 return AArch64_AM::SXTH;
826 else if (SrcVT == MVT::i32)
827 return AArch64_AM::SXTW;
828 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
829
831 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
832 N.getOpcode() == ISD::ANY_EXTEND) {
833 EVT SrcVT = N.getOperand(0).getValueType();
834 if (!IsLoadStore && SrcVT == MVT::i8)
835 return AArch64_AM::UXTB;
836 else if (!IsLoadStore && SrcVT == MVT::i16)
837 return AArch64_AM::UXTH;
838 else if (SrcVT == MVT::i32)
839 return AArch64_AM::UXTW;
840 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
841
843 } else if (N.getOpcode() == ISD::AND) {
844 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
845 if (!CSD)
847 uint64_t AndMask = CSD->getZExtValue();
848
849 switch (AndMask) {
850 default:
852 case 0xFF:
853 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
854 case 0xFFFF:
855 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
856 case 0xFFFFFFFF:
857 return AArch64_AM::UXTW;
858 }
859 }
860
862}
863
864/// Determine whether it is worth to fold V into an extended register of an
865/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
866/// instruction, and the shift should be treated as worth folding even if has
867/// multiple uses.
868bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
869 // Trivial if we are optimizing for code size or if there is only
870 // one use of the value.
871 if (CurDAG->shouldOptForSize() || V.hasOneUse())
872 return true;
873
874 // If a subtarget has a fastpath LSL we can fold a logical shift into
875 // the add/sub and save a cycle.
876 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
877 V.getConstantOperandVal(1) <= 4 &&
879 return true;
880
881 // It hurts otherwise, since the value will be reused.
882 return false;
883}
884
885/// SelectShiftedRegister - Select a "shifted register" operand. If the value
886/// is not shifted, set the Shift operand to default of "LSL 0". The logical
887/// instructions allow the shifted register to be rotated, but the arithmetic
888/// instructions do not. The AllowROR parameter specifies whether ROR is
889/// supported.
890bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
891 SDValue &Reg, SDValue &Shift) {
892 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
893 return true;
894
896 if (ShType == AArch64_AM::InvalidShiftExtend)
897 return false;
898 if (!AllowROR && ShType == AArch64_AM::ROR)
899 return false;
900
901 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
902 unsigned BitSize = N.getValueSizeInBits();
903 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
904 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
905
906 Reg = N.getOperand(0);
907 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
908 return isWorthFoldingALU(N, true);
909 }
910
911 return false;
912}
913
914/// Instructions that accept extend modifiers like UXTW expect the register
915/// being extended to be a GPR32, but the incoming DAG might be acting on a
916/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
917/// this is the case.
919 if (N.getValueType() == MVT::i32)
920 return N;
921
922 SDLoc dl(N);
923 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
924}
925
926// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
927template<signed Low, signed High, signed Scale>
928bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
930 return false;
931
932 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
933 if ((MulImm % std::abs(Scale)) == 0) {
934 int64_t RDVLImm = MulImm / Scale;
935 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
936 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
937 return true;
938 }
939 }
940
941 return false;
942}
943
944// Returns a suitable RDSVL multiplier from a left shift.
945template <signed Low, signed High>
946bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
948 return false;
949
950 int64_t MulImm = 1 << cast<ConstantSDNode>(N)->getSExtValue();
951 if (MulImm >= Low && MulImm <= High) {
952 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
953 return true;
954 }
955
956 return false;
957}
958
959/// SelectArithExtendedRegister - Select a "extended register" operand. This
960/// operand folds in an extend followed by an optional left shift.
961bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
962 SDValue &Shift) {
963 unsigned ShiftVal = 0;
965
966 if (N.getOpcode() == ISD::SHL) {
967 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
968 if (!CSD)
969 return false;
970 ShiftVal = CSD->getZExtValue();
971 if (ShiftVal > 4)
972 return false;
973
974 Ext = getExtendTypeForNode(N.getOperand(0));
976 return false;
977
978 Reg = N.getOperand(0).getOperand(0);
979 } else {
982 return false;
983
984 Reg = N.getOperand(0);
985
986 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
987 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
988 auto isDef32 = [](SDValue N) {
989 unsigned Opc = N.getOpcode();
990 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
993 Opc != ISD::FREEZE;
994 };
995 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
996 isDef32(Reg))
997 return false;
998 }
999
1000 // AArch64 mandates that the RHS of the operation must use the smallest
1001 // register class that could contain the size being extended from. Thus,
1002 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1003 // there might not be an actual 32-bit value in the program. We can
1004 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1005 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1006 Reg = narrowIfNeeded(CurDAG, Reg);
1007 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1008 MVT::i32);
1009 return isWorthFoldingALU(N);
1010}
1011
1012/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1013/// operand is referred by the instructions have SP operand
1014bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1015 SDValue &Shift) {
1016 unsigned ShiftVal = 0;
1018
1019 if (N.getOpcode() != ISD::SHL)
1020 return false;
1021
1022 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1023 if (!CSD)
1024 return false;
1025 ShiftVal = CSD->getZExtValue();
1026 if (ShiftVal > 4)
1027 return false;
1028
1030 Reg = N.getOperand(0);
1031 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1032 MVT::i32);
1033 return isWorthFoldingALU(N);
1034}
1035
1036/// If there's a use of this ADDlow that's not itself a load/store then we'll
1037/// need to create a real ADD instruction from it anyway and there's no point in
1038/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1039/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1040/// leads to duplicated ADRP instructions.
1042 for (auto *User : N->users()) {
1043 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1044 User->getOpcode() != ISD::ATOMIC_LOAD &&
1045 User->getOpcode() != ISD::ATOMIC_STORE)
1046 return false;
1047
1048 // ldar and stlr have much more restrictive addressing modes (just a
1049 // register).
1050 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1051 return false;
1052 }
1053
1054 return true;
1055}
1056
1057/// Check if the immediate offset is valid as a scaled immediate.
1058static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1059 unsigned Size) {
1060 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1061 Offset < (Range << Log2_32(Size)))
1062 return true;
1063 return false;
1064}
1065
1066/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1067/// immediate" address. The "Size" argument is the size in bytes of the memory
1068/// reference, which determines the scale.
1069bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1070 unsigned BW, unsigned Size,
1071 SDValue &Base,
1072 SDValue &OffImm) {
1073 SDLoc dl(N);
1074 const DataLayout &DL = CurDAG->getDataLayout();
1075 const TargetLowering *TLI = getTargetLowering();
1076 if (N.getOpcode() == ISD::FrameIndex) {
1077 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1078 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1079 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1080 return true;
1081 }
1082
1083 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1084 // selected here doesn't support labels/immediates, only base+offset.
1085 if (CurDAG->isBaseWithConstantOffset(N)) {
1086 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1087 if (IsSignedImm) {
1088 int64_t RHSC = RHS->getSExtValue();
1089 unsigned Scale = Log2_32(Size);
1090 int64_t Range = 0x1LL << (BW - 1);
1091
1092 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1093 RHSC < (Range << Scale)) {
1094 Base = N.getOperand(0);
1095 if (Base.getOpcode() == ISD::FrameIndex) {
1096 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1097 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1098 }
1099 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1100 return true;
1101 }
1102 } else {
1103 // unsigned Immediate
1104 uint64_t RHSC = RHS->getZExtValue();
1105 unsigned Scale = Log2_32(Size);
1106 uint64_t Range = 0x1ULL << BW;
1107
1108 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1109 Base = N.getOperand(0);
1110 if (Base.getOpcode() == ISD::FrameIndex) {
1111 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1112 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1113 }
1114 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1115 return true;
1116 }
1117 }
1118 }
1119 }
1120 // Base only. The address will be materialized into a register before
1121 // the memory is accessed.
1122 // add x0, Xbase, #offset
1123 // stp x1, x2, [x0]
1124 Base = N;
1125 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1126 return true;
1127}
1128
1129/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1130/// immediate" address. The "Size" argument is the size in bytes of the memory
1131/// reference, which determines the scale.
1132bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1133 SDValue &Base, SDValue &OffImm) {
1134 SDLoc dl(N);
1135 const DataLayout &DL = CurDAG->getDataLayout();
1136 const TargetLowering *TLI = getTargetLowering();
1137 if (N.getOpcode() == ISD::FrameIndex) {
1138 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1139 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1140 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1141 return true;
1142 }
1143
1144 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1145 GlobalAddressSDNode *GAN =
1146 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1147 Base = N.getOperand(0);
1148 OffImm = N.getOperand(1);
1149 if (!GAN)
1150 return true;
1151
1152 if (GAN->getOffset() % Size == 0 &&
1154 return true;
1155 }
1156
1157 if (CurDAG->isBaseWithConstantOffset(N)) {
1158 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1159 int64_t RHSC = (int64_t)RHS->getZExtValue();
1160 unsigned Scale = Log2_32(Size);
1161 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1162 Base = N.getOperand(0);
1163 if (Base.getOpcode() == ISD::FrameIndex) {
1164 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1165 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1166 }
1167 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1168 return true;
1169 }
1170 }
1171 }
1172
1173 // Before falling back to our general case, check if the unscaled
1174 // instructions can handle this. If so, that's preferable.
1175 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1176 return false;
1177
1178 // Base only. The address will be materialized into a register before
1179 // the memory is accessed.
1180 // add x0, Xbase, #offset
1181 // ldr x0, [x0]
1182 Base = N;
1183 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1184 return true;
1185}
1186
1187/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1188/// immediate" address. This should only match when there is an offset that
1189/// is not valid for a scaled immediate addressing mode. The "Size" argument
1190/// is the size in bytes of the memory reference, which is needed here to know
1191/// what is valid for a scaled immediate.
1192bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1193 SDValue &Base,
1194 SDValue &OffImm) {
1195 if (!CurDAG->isBaseWithConstantOffset(N))
1196 return false;
1197 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1198 int64_t RHSC = RHS->getSExtValue();
1199 if (RHSC >= -256 && RHSC < 256) {
1200 Base = N.getOperand(0);
1201 if (Base.getOpcode() == ISD::FrameIndex) {
1202 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1203 const TargetLowering *TLI = getTargetLowering();
1204 Base = CurDAG->getTargetFrameIndex(
1205 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1206 }
1207 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1208 return true;
1209 }
1210 }
1211 return false;
1212}
1213
1215 SDLoc dl(N);
1216 SDValue ImpDef = SDValue(
1217 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1218 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1219 N);
1220}
1221
1222/// Check if the given SHL node (\p N), can be used to form an
1223/// extended register for an addressing mode.
1224bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1225 bool WantExtend, SDValue &Offset,
1226 SDValue &SignExtend) {
1227 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1228 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1229 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1230 return false;
1231
1232 SDLoc dl(N);
1233 if (WantExtend) {
1235 getExtendTypeForNode(N.getOperand(0), true);
1237 return false;
1238
1239 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1240 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1241 MVT::i32);
1242 } else {
1243 Offset = N.getOperand(0);
1244 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1245 }
1246
1247 unsigned LegalShiftVal = Log2_32(Size);
1248 unsigned ShiftVal = CSD->getZExtValue();
1249
1250 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1251 return false;
1252
1253 return isWorthFoldingAddr(N, Size);
1254}
1255
1256bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1258 SDValue &SignExtend,
1259 SDValue &DoShift) {
1260 if (N.getOpcode() != ISD::ADD)
1261 return false;
1262 SDValue LHS = N.getOperand(0);
1263 SDValue RHS = N.getOperand(1);
1264 SDLoc dl(N);
1265
1266 // We don't want to match immediate adds here, because they are better lowered
1267 // to the register-immediate addressing modes.
1269 return false;
1270
1271 // Check if this particular node is reused in any non-memory related
1272 // operation. If yes, do not try to fold this node into the address
1273 // computation, since the computation will be kept.
1274 const SDNode *Node = N.getNode();
1275 for (SDNode *UI : Node->users()) {
1276 if (!isMemOpOrPrefetch(UI))
1277 return false;
1278 }
1279
1280 // Remember if it is worth folding N when it produces extended register.
1281 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1282
1283 // Try to match a shifted extend on the RHS.
1284 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1285 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1286 Base = LHS;
1287 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1288 return true;
1289 }
1290
1291 // Try to match a shifted extend on the LHS.
1292 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1293 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1294 Base = RHS;
1295 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1296 return true;
1297 }
1298
1299 // There was no shift, whatever else we find.
1300 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1301
1303 // Try to match an unshifted extend on the LHS.
1304 if (IsExtendedRegisterWorthFolding &&
1305 (Ext = getExtendTypeForNode(LHS, true)) !=
1307 Base = RHS;
1308 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1309 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1310 MVT::i32);
1311 if (isWorthFoldingAddr(LHS, Size))
1312 return true;
1313 }
1314
1315 // Try to match an unshifted extend on the RHS.
1316 if (IsExtendedRegisterWorthFolding &&
1317 (Ext = getExtendTypeForNode(RHS, true)) !=
1319 Base = LHS;
1320 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1321 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1322 MVT::i32);
1323 if (isWorthFoldingAddr(RHS, Size))
1324 return true;
1325 }
1326
1327 return false;
1328}
1329
1330// Check if the given immediate is preferred by ADD. If an immediate can be
1331// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1332// encoded by one MOVZ, return true.
1333static bool isPreferredADD(int64_t ImmOff) {
1334 // Constant in [0x0, 0xfff] can be encoded in ADD.
1335 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1336 return true;
1337 // Check if it can be encoded in an "ADD LSL #12".
1338 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1339 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1340 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1341 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1342 return false;
1343}
1344
1345bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1347 SDValue &SignExtend,
1348 SDValue &DoShift) {
1349 if (N.getOpcode() != ISD::ADD)
1350 return false;
1351 SDValue LHS = N.getOperand(0);
1352 SDValue RHS = N.getOperand(1);
1353 SDLoc DL(N);
1354
1355 // Check if this particular node is reused in any non-memory related
1356 // operation. If yes, do not try to fold this node into the address
1357 // computation, since the computation will be kept.
1358 const SDNode *Node = N.getNode();
1359 for (SDNode *UI : Node->users()) {
1360 if (!isMemOpOrPrefetch(UI))
1361 return false;
1362 }
1363
1364 // Watch out if RHS is a wide immediate, it can not be selected into
1365 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1366 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1367 // instructions like:
1368 // MOV X0, WideImmediate
1369 // ADD X1, BaseReg, X0
1370 // LDR X2, [X1, 0]
1371 // For such situation, using [BaseReg, XReg] addressing mode can save one
1372 // ADD/SUB:
1373 // MOV X0, WideImmediate
1374 // LDR X2, [BaseReg, X0]
1375 if (isa<ConstantSDNode>(RHS)) {
1376 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1377 // Skip the immediate can be selected by load/store addressing mode.
1378 // Also skip the immediate can be encoded by a single ADD (SUB is also
1379 // checked by using -ImmOff).
1380 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1381 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1382 return false;
1383
1384 SDValue Ops[] = { RHS };
1385 SDNode *MOVI =
1386 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1387 SDValue MOVIV = SDValue(MOVI, 0);
1388 // This ADD of two X register will be selected into [Reg+Reg] mode.
1389 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1390 }
1391
1392 // Remember if it is worth folding N when it produces extended register.
1393 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1394
1395 // Try to match a shifted extend on the RHS.
1396 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1397 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1398 Base = LHS;
1399 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1400 return true;
1401 }
1402
1403 // Try to match a shifted extend on the LHS.
1404 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1405 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1406 Base = RHS;
1407 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1408 return true;
1409 }
1410
1411 // Match any non-shifted, non-extend, non-immediate add expression.
1412 Base = LHS;
1413 Offset = RHS;
1414 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1415 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1416 // Reg1 + Reg2 is free: no check needed.
1417 return true;
1418}
1419
1420SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1421 static const unsigned RegClassIDs[] = {
1422 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1423 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1424 AArch64::dsub2, AArch64::dsub3};
1425
1426 return createTuple(Regs, RegClassIDs, SubRegs);
1427}
1428
1429SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1430 static const unsigned RegClassIDs[] = {
1431 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1432 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1433 AArch64::qsub2, AArch64::qsub3};
1434
1435 return createTuple(Regs, RegClassIDs, SubRegs);
1436}
1437
1438SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1439 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1440 AArch64::ZPR3RegClassID,
1441 AArch64::ZPR4RegClassID};
1442 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1443 AArch64::zsub2, AArch64::zsub3};
1444
1445 return createTuple(Regs, RegClassIDs, SubRegs);
1446}
1447
1448SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1449 assert(Regs.size() == 2 || Regs.size() == 4);
1450
1451 // The createTuple interface requires 3 RegClassIDs for each possible
1452 // tuple type even though we only have them for ZPR2 and ZPR4.
1453 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1454 AArch64::ZPR4Mul4RegClassID};
1455 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1456 AArch64::zsub2, AArch64::zsub3};
1457 return createTuple(Regs, RegClassIDs, SubRegs);
1458}
1459
1460SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1461 const unsigned RegClassIDs[],
1462 const unsigned SubRegs[]) {
1463 // There's no special register-class for a vector-list of 1 element: it's just
1464 // a vector.
1465 if (Regs.size() == 1)
1466 return Regs[0];
1467
1468 assert(Regs.size() >= 2 && Regs.size() <= 4);
1469
1470 SDLoc DL(Regs[0]);
1471
1473
1474 // First operand of REG_SEQUENCE is the desired RegClass.
1475 Ops.push_back(
1476 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1477
1478 // Then we get pairs of source & subregister-position for the components.
1479 for (unsigned i = 0; i < Regs.size(); ++i) {
1480 Ops.push_back(Regs[i]);
1481 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1482 }
1483
1484 SDNode *N =
1485 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1486 return SDValue(N, 0);
1487}
1488
1489void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1490 bool isExt) {
1491 SDLoc dl(N);
1492 EVT VT = N->getValueType(0);
1493
1494 unsigned ExtOff = isExt;
1495
1496 // Form a REG_SEQUENCE to force register allocation.
1497 unsigned Vec0Off = ExtOff + 1;
1498 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1499 SDValue RegSeq = createQTuple(Regs);
1500
1502 if (isExt)
1503 Ops.push_back(N->getOperand(1));
1504 Ops.push_back(RegSeq);
1505 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1506 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1507}
1508
1509static std::tuple<SDValue, SDValue>
1511 SDLoc DL(Disc);
1512 SDValue AddrDisc;
1513 SDValue ConstDisc;
1514
1515 // If this is a blend, remember the constant and address discriminators.
1516 // Otherwise, it's either a constant discriminator, or a non-blended
1517 // address discriminator.
1518 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1519 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1520 AddrDisc = Disc->getOperand(1);
1521 ConstDisc = Disc->getOperand(2);
1522 } else {
1523 ConstDisc = Disc;
1524 }
1525
1526 // If the constant discriminator (either the blend RHS, or the entire
1527 // discriminator value) isn't a 16-bit constant, bail out, and let the
1528 // discriminator be computed separately.
1529 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1530 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1531 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1532
1533 // If there's no address discriminator, use XZR directly.
1534 if (!AddrDisc)
1535 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1536
1537 return std::make_tuple(
1538 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1539 AddrDisc);
1540}
1541
1542void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1543 SDLoc DL(N);
1544 // IntrinsicID is operand #0
1545 SDValue Val = N->getOperand(1);
1546 SDValue AUTKey = N->getOperand(2);
1547 SDValue AUTDisc = N->getOperand(3);
1548
1549 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1550 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1551
1552 SDValue AUTAddrDisc, AUTConstDisc;
1553 std::tie(AUTConstDisc, AUTAddrDisc) =
1554 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1555
1556 if (!Subtarget->isX16X17Safer()) {
1557 SDValue Ops[] = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1558
1559 SDNode *AUT =
1560 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1561 ReplaceNode(N, AUT);
1562 } else {
1563 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1564 AArch64::X16, Val, SDValue());
1565 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1566
1567 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1568 ReplaceNode(N, AUT);
1569 }
1570}
1571
1572void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1573 SDLoc DL(N);
1574 // IntrinsicID is operand #0
1575 SDValue Val = N->getOperand(1);
1576 SDValue AUTKey = N->getOperand(2);
1577 SDValue AUTDisc = N->getOperand(3);
1578 SDValue PACKey = N->getOperand(4);
1579 SDValue PACDisc = N->getOperand(5);
1580
1581 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1582 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1583
1584 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1585 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1586
1587 SDValue AUTAddrDisc, AUTConstDisc;
1588 std::tie(AUTConstDisc, AUTAddrDisc) =
1589 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1590
1591 SDValue PACAddrDisc, PACConstDisc;
1592 std::tie(PACConstDisc, PACAddrDisc) =
1593 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1594
1595 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1596 AArch64::X16, Val, SDValue());
1597
1598 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1599 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1600
1601 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1602 ReplaceNode(N, AUTPAC);
1603}
1604
1605bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1606 LoadSDNode *LD = cast<LoadSDNode>(N);
1607 if (LD->isUnindexed())
1608 return false;
1609 EVT VT = LD->getMemoryVT();
1610 EVT DstVT = N->getValueType(0);
1611 ISD::MemIndexedMode AM = LD->getAddressingMode();
1612 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1613 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1614 int OffsetVal = (int)OffsetOp->getZExtValue();
1615
1616 // We're not doing validity checking here. That was done when checking
1617 // if we should mark the load as indexed or not. We're just selecting
1618 // the right instruction.
1619 unsigned Opcode = 0;
1620
1621 ISD::LoadExtType ExtType = LD->getExtensionType();
1622 bool InsertTo64 = false;
1623 if (VT == MVT::i64)
1624 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1625 else if (VT == MVT::i32) {
1626 if (ExtType == ISD::NON_EXTLOAD)
1627 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1628 else if (ExtType == ISD::SEXTLOAD)
1629 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1630 else {
1631 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1632 InsertTo64 = true;
1633 // The result of the load is only i32. It's the subreg_to_reg that makes
1634 // it into an i64.
1635 DstVT = MVT::i32;
1636 }
1637 } else if (VT == MVT::i16) {
1638 if (ExtType == ISD::SEXTLOAD) {
1639 if (DstVT == MVT::i64)
1640 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1641 else
1642 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1643 } else {
1644 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1645 InsertTo64 = DstVT == MVT::i64;
1646 // The result of the load is only i32. It's the subreg_to_reg that makes
1647 // it into an i64.
1648 DstVT = MVT::i32;
1649 }
1650 } else if (VT == MVT::i8) {
1651 if (ExtType == ISD::SEXTLOAD) {
1652 if (DstVT == MVT::i64)
1653 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1654 else
1655 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1656 } else {
1657 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1658 InsertTo64 = DstVT == MVT::i64;
1659 // The result of the load is only i32. It's the subreg_to_reg that makes
1660 // it into an i64.
1661 DstVT = MVT::i32;
1662 }
1663 } else if (VT == MVT::f16) {
1664 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1665 } else if (VT == MVT::bf16) {
1666 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1667 } else if (VT == MVT::f32) {
1668 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1669 } else if (VT == MVT::f64 ||
1670 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1671 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1672 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1673 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1674 } else if (VT.is64BitVector()) {
1675 if (IsPre || OffsetVal != 8)
1676 return false;
1677 switch (VT.getScalarSizeInBits()) {
1678 case 8:
1679 Opcode = AArch64::LD1Onev8b_POST;
1680 break;
1681 case 16:
1682 Opcode = AArch64::LD1Onev4h_POST;
1683 break;
1684 case 32:
1685 Opcode = AArch64::LD1Onev2s_POST;
1686 break;
1687 case 64:
1688 Opcode = AArch64::LD1Onev1d_POST;
1689 break;
1690 default:
1691 llvm_unreachable("Expected vector element to be a power of 2");
1692 }
1693 } else if (VT.is128BitVector()) {
1694 if (IsPre || OffsetVal != 16)
1695 return false;
1696 switch (VT.getScalarSizeInBits()) {
1697 case 8:
1698 Opcode = AArch64::LD1Onev16b_POST;
1699 break;
1700 case 16:
1701 Opcode = AArch64::LD1Onev8h_POST;
1702 break;
1703 case 32:
1704 Opcode = AArch64::LD1Onev4s_POST;
1705 break;
1706 case 64:
1707 Opcode = AArch64::LD1Onev2d_POST;
1708 break;
1709 default:
1710 llvm_unreachable("Expected vector element to be a power of 2");
1711 }
1712 } else
1713 return false;
1714 SDValue Chain = LD->getChain();
1715 SDValue Base = LD->getBasePtr();
1716 SDLoc dl(N);
1717 // LD1 encodes an immediate offset by using XZR as the offset register.
1718 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1719 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1720 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1721 SDValue Ops[] = { Base, Offset, Chain };
1722 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1723 MVT::Other, Ops);
1724
1725 // Transfer memoperands.
1726 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1727 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1728
1729 // Either way, we're replacing the node, so tell the caller that.
1730 SDValue LoadedVal = SDValue(Res, 1);
1731 if (InsertTo64) {
1732 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1733 LoadedVal =
1734 SDValue(CurDAG->getMachineNode(
1735 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1736 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1737 SubReg),
1738 0);
1739 }
1740
1741 ReplaceUses(SDValue(N, 0), LoadedVal);
1742 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1743 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1744 CurDAG->RemoveDeadNode(N);
1745 return true;
1746}
1747
1748void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1749 unsigned SubRegIdx) {
1750 SDLoc dl(N);
1751 EVT VT = N->getValueType(0);
1752 SDValue Chain = N->getOperand(0);
1753
1754 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1755 Chain};
1756
1757 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1758
1759 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1760 SDValue SuperReg = SDValue(Ld, 0);
1761 for (unsigned i = 0; i < NumVecs; ++i)
1762 ReplaceUses(SDValue(N, i),
1763 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1764
1765 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1766
1767 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1768 // because it's too simple to have needed special treatment during lowering.
1769 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1770 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1771 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1772 }
1773
1774 CurDAG->RemoveDeadNode(N);
1775}
1776
1777void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1778 unsigned Opc, unsigned SubRegIdx) {
1779 SDLoc dl(N);
1780 EVT VT = N->getValueType(0);
1781 SDValue Chain = N->getOperand(0);
1782
1783 SDValue Ops[] = {N->getOperand(1), // Mem operand
1784 N->getOperand(2), // Incremental
1785 Chain};
1786
1787 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1788 MVT::Untyped, MVT::Other};
1789
1790 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1791
1792 // Update uses of write back register
1793 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1794
1795 // Update uses of vector list
1796 SDValue SuperReg = SDValue(Ld, 1);
1797 if (NumVecs == 1)
1798 ReplaceUses(SDValue(N, 0), SuperReg);
1799 else
1800 for (unsigned i = 0; i < NumVecs; ++i)
1801 ReplaceUses(SDValue(N, i),
1802 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1803
1804 // Update the chain
1805 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1806 CurDAG->RemoveDeadNode(N);
1807}
1808
1809/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1810/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1811/// new Base and an SDValue representing the new offset.
1812std::tuple<unsigned, SDValue, SDValue>
1813AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1814 unsigned Opc_ri,
1815 const SDValue &OldBase,
1816 const SDValue &OldOffset,
1817 unsigned Scale) {
1818 SDValue NewBase = OldBase;
1819 SDValue NewOffset = OldOffset;
1820 // Detect a possible Reg+Imm addressing mode.
1821 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1822 N, OldBase, NewBase, NewOffset);
1823
1824 // Detect a possible reg+reg addressing mode, but only if we haven't already
1825 // detected a Reg+Imm one.
1826 const bool IsRegReg =
1827 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1828
1829 // Select the instruction.
1830 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1831}
1832
1833enum class SelectTypeKind {
1834 Int1 = 0,
1835 Int = 1,
1836 FP = 2,
1838};
1839
1840/// This function selects an opcode from a list of opcodes, which is
1841/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1842/// element types, in this order.
1843template <SelectTypeKind Kind>
1844static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1845 // Only match scalable vector VTs
1846 if (!VT.isScalableVector())
1847 return 0;
1848
1849 EVT EltVT = VT.getVectorElementType();
1850 unsigned Key = VT.getVectorMinNumElements();
1851 switch (Kind) {
1853 break;
1855 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1856 EltVT != MVT::i64)
1857 return 0;
1858 break;
1860 if (EltVT != MVT::i1)
1861 return 0;
1862 break;
1863 case SelectTypeKind::FP:
1864 if (EltVT == MVT::bf16)
1865 Key = 16;
1866 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1867 EltVT != MVT::f64)
1868 return 0;
1869 break;
1870 }
1871
1872 unsigned Offset;
1873 switch (Key) {
1874 case 16: // 8-bit or bf16
1875 Offset = 0;
1876 break;
1877 case 8: // 16-bit
1878 Offset = 1;
1879 break;
1880 case 4: // 32-bit
1881 Offset = 2;
1882 break;
1883 case 2: // 64-bit
1884 Offset = 3;
1885 break;
1886 default:
1887 return 0;
1888 }
1889
1890 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1891}
1892
1893// This function is almost identical to SelectWhilePair, but has an
1894// extra check on the range of the immediate operand.
1895// TODO: Merge these two functions together at some point?
1896void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1897 // Immediate can be either 0 or 1.
1898 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1899 if (Imm->getZExtValue() > 1)
1900 return;
1901
1902 SDLoc DL(N);
1903 EVT VT = N->getValueType(0);
1904 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1905 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1906 SDValue SuperReg = SDValue(WhilePair, 0);
1907
1908 for (unsigned I = 0; I < 2; ++I)
1909 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1910 AArch64::psub0 + I, DL, VT, SuperReg));
1911
1912 CurDAG->RemoveDeadNode(N);
1913}
1914
1915void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1916 SDLoc DL(N);
1917 EVT VT = N->getValueType(0);
1918
1919 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1920
1921 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1922 SDValue SuperReg = SDValue(WhilePair, 0);
1923
1924 for (unsigned I = 0; I < 2; ++I)
1925 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1926 AArch64::psub0 + I, DL, VT, SuperReg));
1927
1928 CurDAG->RemoveDeadNode(N);
1929}
1930
1931void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1932 unsigned Opcode) {
1933 EVT VT = N->getValueType(0);
1934 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1935 SDValue Ops = createZTuple(Regs);
1936 SDLoc DL(N);
1937 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1938 SDValue SuperReg = SDValue(Intrinsic, 0);
1939 for (unsigned i = 0; i < NumVecs; ++i)
1940 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1941 AArch64::zsub0 + i, DL, VT, SuperReg));
1942
1943 CurDAG->RemoveDeadNode(N);
1944}
1945
1946void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1947 unsigned Opcode) {
1948 SDLoc DL(N);
1949 EVT VT = N->getValueType(0);
1950 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1951 Ops.push_back(/*Chain*/ N->getOperand(0));
1952
1953 SDNode *Instruction =
1954 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1955 SDValue SuperReg = SDValue(Instruction, 0);
1956
1957 for (unsigned i = 0; i < NumVecs; ++i)
1958 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1959 AArch64::zsub0 + i, DL, VT, SuperReg));
1960
1961 // Copy chain
1962 unsigned ChainIdx = NumVecs;
1963 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1964 CurDAG->RemoveDeadNode(N);
1965}
1966
1967void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1968 unsigned NumVecs,
1969 bool IsZmMulti,
1970 unsigned Opcode,
1971 bool HasPred) {
1972 assert(Opcode != 0 && "Unexpected opcode");
1973
1974 SDLoc DL(N);
1975 EVT VT = N->getValueType(0);
1976 unsigned FirstVecIdx = HasPred ? 2 : 1;
1977
1978 auto GetMultiVecOperand = [=](unsigned StartIdx) {
1979 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
1980 return createZMulTuple(Regs);
1981 };
1982
1983 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1984
1985 SDValue Zm;
1986 if (IsZmMulti)
1987 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1988 else
1989 Zm = N->getOperand(NumVecs + FirstVecIdx);
1990
1991 SDNode *Intrinsic;
1992 if (HasPred)
1993 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1994 N->getOperand(1), Zdn, Zm);
1995 else
1996 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1997 SDValue SuperReg = SDValue(Intrinsic, 0);
1998 for (unsigned i = 0; i < NumVecs; ++i)
1999 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2000 AArch64::zsub0 + i, DL, VT, SuperReg));
2001
2002 CurDAG->RemoveDeadNode(N);
2003}
2004
2005void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2006 unsigned Scale, unsigned Opc_ri,
2007 unsigned Opc_rr, bool IsIntr) {
2008 assert(Scale < 5 && "Invalid scaling value.");
2009 SDLoc DL(N);
2010 EVT VT = N->getValueType(0);
2011 SDValue Chain = N->getOperand(0);
2012
2013 // Optimize addressing mode.
2015 unsigned Opc;
2016 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2017 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2018 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2019
2020 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2021 Base, // Memory operand
2022 Offset, Chain};
2023
2024 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2025
2026 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2027 SDValue SuperReg = SDValue(Load, 0);
2028 for (unsigned i = 0; i < NumVecs; ++i)
2029 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2030 AArch64::zsub0 + i, DL, VT, SuperReg));
2031
2032 // Copy chain
2033 unsigned ChainIdx = NumVecs;
2034 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2035 CurDAG->RemoveDeadNode(N);
2036}
2037
2038void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2039 unsigned NumVecs,
2040 unsigned Scale,
2041 unsigned Opc_ri,
2042 unsigned Opc_rr) {
2043 assert(Scale < 4 && "Invalid scaling value.");
2044 SDLoc DL(N);
2045 EVT VT = N->getValueType(0);
2046 SDValue Chain = N->getOperand(0);
2047
2048 SDValue PNg = N->getOperand(2);
2049 SDValue Base = N->getOperand(3);
2050 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2051 unsigned Opc;
2052 std::tie(Opc, Base, Offset) =
2053 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2054
2055 SDValue Ops[] = {PNg, // Predicate-as-counter
2056 Base, // Memory operand
2057 Offset, Chain};
2058
2059 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2060
2061 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2062 SDValue SuperReg = SDValue(Load, 0);
2063 for (unsigned i = 0; i < NumVecs; ++i)
2064 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2065 AArch64::zsub0 + i, DL, VT, SuperReg));
2066
2067 // Copy chain
2068 unsigned ChainIdx = NumVecs;
2069 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2070 CurDAG->RemoveDeadNode(N);
2071}
2072
2073void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2074 unsigned Opcode) {
2075 if (N->getValueType(0) != MVT::nxv4f32)
2076 return;
2077 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2078}
2079
2080void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2081 unsigned NumOutVecs,
2082 unsigned Opc,
2083 uint32_t MaxImm) {
2084 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2085 if (Imm->getZExtValue() > MaxImm)
2086 return;
2087
2088 SDValue ZtValue;
2089 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2090 return;
2091
2092 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
2093 SDLoc DL(Node);
2094 EVT VT = Node->getValueType(0);
2095
2096 SDNode *Instruction =
2097 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2098 SDValue SuperReg = SDValue(Instruction, 0);
2099
2100 for (unsigned I = 0; I < NumOutVecs; ++I)
2101 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2102 AArch64::zsub0 + I, DL, VT, SuperReg));
2103
2104 // Copy chain
2105 unsigned ChainIdx = NumOutVecs;
2106 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2107 CurDAG->RemoveDeadNode(Node);
2108}
2109
2110void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2111 unsigned NumOutVecs,
2112 unsigned Opc) {
2113
2114 SDValue ZtValue;
2116 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2117 return;
2118
2119 Ops.push_back(ZtValue);
2120 Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)}));
2121 SDLoc DL(Node);
2122 EVT VT = Node->getValueType(0);
2123
2124 SDNode *Instruction =
2125 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2126 SDValue SuperReg = SDValue(Instruction, 0);
2127
2128 for (unsigned I = 0; I < NumOutVecs; ++I)
2129 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2130 AArch64::zsub0 + I, DL, VT, SuperReg));
2131
2132 // Copy chain
2133 unsigned ChainIdx = NumOutVecs;
2134 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2135 CurDAG->RemoveDeadNode(Node);
2136}
2137
2138void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2139 unsigned Op) {
2140 SDLoc DL(N);
2141 EVT VT = N->getValueType(0);
2142
2143 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2144 SDValue Zd = createZMulTuple(Regs);
2145 SDValue Zn = N->getOperand(1 + NumVecs);
2146 SDValue Zm = N->getOperand(2 + NumVecs);
2147
2148 SDValue Ops[] = {Zd, Zn, Zm};
2149
2150 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2151 SDValue SuperReg = SDValue(Intrinsic, 0);
2152 for (unsigned i = 0; i < NumVecs; ++i)
2153 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2154 AArch64::zsub0 + i, DL, VT, SuperReg));
2155
2156 CurDAG->RemoveDeadNode(N);
2157}
2158
2159bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2160 switch (BaseReg) {
2161 default:
2162 return false;
2163 case AArch64::ZA:
2164 case AArch64::ZAB0:
2165 if (TileNum == 0)
2166 break;
2167 return false;
2168 case AArch64::ZAH0:
2169 if (TileNum <= 1)
2170 break;
2171 return false;
2172 case AArch64::ZAS0:
2173 if (TileNum <= 3)
2174 break;
2175 return false;
2176 case AArch64::ZAD0:
2177 if (TileNum <= 7)
2178 break;
2179 return false;
2180 }
2181
2182 BaseReg += TileNum;
2183 return true;
2184}
2185
2186template <unsigned MaxIdx, unsigned Scale>
2187void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2188 unsigned BaseReg, unsigned Op) {
2189 unsigned TileNum = 0;
2190 if (BaseReg != AArch64::ZA)
2191 TileNum = N->getConstantOperandVal(2);
2192
2193 if (!SelectSMETile(BaseReg, TileNum))
2194 return;
2195
2196 SDValue SliceBase, Base, Offset;
2197 if (BaseReg == AArch64::ZA)
2198 SliceBase = N->getOperand(2);
2199 else
2200 SliceBase = N->getOperand(3);
2201
2202 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2203 return;
2204
2205 SDLoc DL(N);
2206 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2207 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2208 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2209
2210 EVT VT = N->getValueType(0);
2211 for (unsigned I = 0; I < NumVecs; ++I)
2212 ReplaceUses(SDValue(N, I),
2213 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2214 SDValue(Mov, 0)));
2215 // Copy chain
2216 unsigned ChainIdx = NumVecs;
2217 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2218 CurDAG->RemoveDeadNode(N);
2219}
2220
2221void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2222 unsigned Op, unsigned MaxIdx,
2223 unsigned Scale, unsigned BaseReg) {
2224 // Slice can be in different positions
2225 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2226 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2227 SDValue SliceBase = N->getOperand(2);
2228 if (BaseReg != AArch64::ZA)
2229 SliceBase = N->getOperand(3);
2230
2232 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2233 return;
2234 // The correct Za tile number is computed in Machine Instruction
2235 // See EmitZAInstr
2236 // DAG cannot select Za tile as an output register with ZReg
2237 SDLoc DL(N);
2239 if (BaseReg != AArch64::ZA )
2240 Ops.push_back(N->getOperand(2));
2241 Ops.push_back(Base);
2242 Ops.push_back(Offset);
2243 Ops.push_back(N->getOperand(0)); //Chain
2244 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2245
2246 EVT VT = N->getValueType(0);
2247 for (unsigned I = 0; I < NumVecs; ++I)
2248 ReplaceUses(SDValue(N, I),
2249 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2250 SDValue(Mov, 0)));
2251
2252 // Copy chain
2253 unsigned ChainIdx = NumVecs;
2254 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2255 CurDAG->RemoveDeadNode(N);
2256}
2257
2258void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2259 unsigned NumOutVecs,
2260 bool IsTupleInput,
2261 unsigned Opc) {
2262 SDLoc DL(N);
2263 EVT VT = N->getValueType(0);
2264 unsigned NumInVecs = N->getNumOperands() - 1;
2265
2267 if (IsTupleInput) {
2268 assert((NumInVecs == 2 || NumInVecs == 4) &&
2269 "Don't know how to handle multi-register input!");
2270 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2271 Ops.push_back(createZMulTuple(Regs));
2272 } else {
2273 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2274 for (unsigned I = 0; I < NumInVecs; I++)
2275 Ops.push_back(N->getOperand(1 + I));
2276 }
2277
2278 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2279 SDValue SuperReg = SDValue(Res, 0);
2280
2281 for (unsigned I = 0; I < NumOutVecs; I++)
2282 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2283 AArch64::zsub0 + I, DL, VT, SuperReg));
2284 CurDAG->RemoveDeadNode(N);
2285}
2286
2287void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2288 unsigned Opc) {
2289 SDLoc dl(N);
2290 EVT VT = N->getOperand(2)->getValueType(0);
2291
2292 // Form a REG_SEQUENCE to force register allocation.
2293 bool Is128Bit = VT.getSizeInBits() == 128;
2294 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2295 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2296
2297 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2298 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2299
2300 // Transfer memoperands.
2301 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2302 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2303
2304 ReplaceNode(N, St);
2305}
2306
2307void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2308 unsigned Scale, unsigned Opc_rr,
2309 unsigned Opc_ri) {
2310 SDLoc dl(N);
2311
2312 // Form a REG_SEQUENCE to force register allocation.
2313 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2314 SDValue RegSeq = createZTuple(Regs);
2315
2316 // Optimize addressing mode.
2317 unsigned Opc;
2319 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2320 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2321 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2322
2323 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2324 Base, // address
2325 Offset, // offset
2326 N->getOperand(0)}; // chain
2327 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2328
2329 ReplaceNode(N, St);
2330}
2331
2332bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2333 SDValue &OffImm) {
2334 SDLoc dl(N);
2335 const DataLayout &DL = CurDAG->getDataLayout();
2336 const TargetLowering *TLI = getTargetLowering();
2337
2338 // Try to match it for the frame address
2339 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2340 int FI = FINode->getIndex();
2341 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2342 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2343 return true;
2344 }
2345
2346 return false;
2347}
2348
2349void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2350 unsigned Opc) {
2351 SDLoc dl(N);
2352 EVT VT = N->getOperand(2)->getValueType(0);
2353 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2354 MVT::Other}; // Type for the Chain
2355
2356 // Form a REG_SEQUENCE to force register allocation.
2357 bool Is128Bit = VT.getSizeInBits() == 128;
2358 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2359 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2360
2361 SDValue Ops[] = {RegSeq,
2362 N->getOperand(NumVecs + 1), // base register
2363 N->getOperand(NumVecs + 2), // Incremental
2364 N->getOperand(0)}; // Chain
2365 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2366
2367 ReplaceNode(N, St);
2368}
2369
2370namespace {
2371/// WidenVector - Given a value in the V64 register class, produce the
2372/// equivalent value in the V128 register class.
2373class WidenVector {
2374 SelectionDAG &DAG;
2375
2376public:
2377 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2378
2379 SDValue operator()(SDValue V64Reg) {
2380 EVT VT = V64Reg.getValueType();
2381 unsigned NarrowSize = VT.getVectorNumElements();
2382 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2383 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2384 SDLoc DL(V64Reg);
2385
2386 SDValue Undef =
2387 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2388 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2389 }
2390};
2391} // namespace
2392
2393/// NarrowVector - Given a value in the V128 register class, produce the
2394/// equivalent value in the V64 register class.
2396 EVT VT = V128Reg.getValueType();
2397 unsigned WideSize = VT.getVectorNumElements();
2398 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2399 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2400
2401 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2402 V128Reg);
2403}
2404
2405void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2406 unsigned Opc) {
2407 SDLoc dl(N);
2408 EVT VT = N->getValueType(0);
2409 bool Narrow = VT.getSizeInBits() == 64;
2410
2411 // Form a REG_SEQUENCE to force register allocation.
2412 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2413
2414 if (Narrow)
2415 transform(Regs, Regs.begin(),
2416 WidenVector(*CurDAG));
2417
2418 SDValue RegSeq = createQTuple(Regs);
2419
2420 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2421
2422 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2423
2424 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2425 N->getOperand(NumVecs + 3), N->getOperand(0)};
2426 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2427 SDValue SuperReg = SDValue(Ld, 0);
2428
2429 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2430 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2431 AArch64::qsub2, AArch64::qsub3 };
2432 for (unsigned i = 0; i < NumVecs; ++i) {
2433 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2434 if (Narrow)
2435 NV = NarrowVector(NV, *CurDAG);
2436 ReplaceUses(SDValue(N, i), NV);
2437 }
2438
2439 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2440 CurDAG->RemoveDeadNode(N);
2441}
2442
2443void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2444 unsigned Opc) {
2445 SDLoc dl(N);
2446 EVT VT = N->getValueType(0);
2447 bool Narrow = VT.getSizeInBits() == 64;
2448
2449 // Form a REG_SEQUENCE to force register allocation.
2450 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2451
2452 if (Narrow)
2453 transform(Regs, Regs.begin(),
2454 WidenVector(*CurDAG));
2455
2456 SDValue RegSeq = createQTuple(Regs);
2457
2458 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2459 RegSeq->getValueType(0), MVT::Other};
2460
2461 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2462
2463 SDValue Ops[] = {RegSeq,
2464 CurDAG->getTargetConstant(LaneNo, dl,
2465 MVT::i64), // Lane Number
2466 N->getOperand(NumVecs + 2), // Base register
2467 N->getOperand(NumVecs + 3), // Incremental
2468 N->getOperand(0)};
2469 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2470
2471 // Update uses of the write back register
2472 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2473
2474 // Update uses of the vector list
2475 SDValue SuperReg = SDValue(Ld, 1);
2476 if (NumVecs == 1) {
2477 ReplaceUses(SDValue(N, 0),
2478 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2479 } else {
2480 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2481 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2482 AArch64::qsub2, AArch64::qsub3 };
2483 for (unsigned i = 0; i < NumVecs; ++i) {
2484 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2485 SuperReg);
2486 if (Narrow)
2487 NV = NarrowVector(NV, *CurDAG);
2488 ReplaceUses(SDValue(N, i), NV);
2489 }
2490 }
2491
2492 // Update the Chain
2493 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2494 CurDAG->RemoveDeadNode(N);
2495}
2496
2497void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2498 unsigned Opc) {
2499 SDLoc dl(N);
2500 EVT VT = N->getOperand(2)->getValueType(0);
2501 bool Narrow = VT.getSizeInBits() == 64;
2502
2503 // Form a REG_SEQUENCE to force register allocation.
2504 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2505
2506 if (Narrow)
2507 transform(Regs, Regs.begin(),
2508 WidenVector(*CurDAG));
2509
2510 SDValue RegSeq = createQTuple(Regs);
2511
2512 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2513
2514 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2515 N->getOperand(NumVecs + 3), N->getOperand(0)};
2516 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2517
2518 // Transfer memoperands.
2519 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2520 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2521
2522 ReplaceNode(N, St);
2523}
2524
2525void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2526 unsigned Opc) {
2527 SDLoc dl(N);
2528 EVT VT = N->getOperand(2)->getValueType(0);
2529 bool Narrow = VT.getSizeInBits() == 64;
2530
2531 // Form a REG_SEQUENCE to force register allocation.
2532 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2533
2534 if (Narrow)
2535 transform(Regs, Regs.begin(),
2536 WidenVector(*CurDAG));
2537
2538 SDValue RegSeq = createQTuple(Regs);
2539
2540 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2541 MVT::Other};
2542
2543 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2544
2545 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2546 N->getOperand(NumVecs + 2), // Base Register
2547 N->getOperand(NumVecs + 3), // Incremental
2548 N->getOperand(0)};
2549 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2550
2551 // Transfer memoperands.
2552 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2553 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2554
2555 ReplaceNode(N, St);
2556}
2557
2559 unsigned &Opc, SDValue &Opd0,
2560 unsigned &LSB, unsigned &MSB,
2561 unsigned NumberOfIgnoredLowBits,
2562 bool BiggerPattern) {
2563 assert(N->getOpcode() == ISD::AND &&
2564 "N must be a AND operation to call this function");
2565
2566 EVT VT = N->getValueType(0);
2567
2568 // Here we can test the type of VT and return false when the type does not
2569 // match, but since it is done prior to that call in the current context
2570 // we turned that into an assert to avoid redundant code.
2571 assert((VT == MVT::i32 || VT == MVT::i64) &&
2572 "Type checking must have been done before calling this function");
2573
2574 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2575 // changed the AND node to a 32-bit mask operation. We'll have to
2576 // undo that as part of the transform here if we want to catch all
2577 // the opportunities.
2578 // Currently the NumberOfIgnoredLowBits argument helps to recover
2579 // from these situations when matching bigger pattern (bitfield insert).
2580
2581 // For unsigned extracts, check for a shift right and mask
2582 uint64_t AndImm = 0;
2583 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2584 return false;
2585
2586 const SDNode *Op0 = N->getOperand(0).getNode();
2587
2588 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2589 // simplified. Try to undo that
2590 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2591
2592 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2593 if (AndImm & (AndImm + 1))
2594 return false;
2595
2596 bool ClampMSB = false;
2597 uint64_t SrlImm = 0;
2598 // Handle the SRL + ANY_EXTEND case.
2599 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2600 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2601 // Extend the incoming operand of the SRL to 64-bit.
2602 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2603 // Make sure to clamp the MSB so that we preserve the semantics of the
2604 // original operations.
2605 ClampMSB = true;
2606 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2608 SrlImm)) {
2609 // If the shift result was truncated, we can still combine them.
2610 Opd0 = Op0->getOperand(0).getOperand(0);
2611
2612 // Use the type of SRL node.
2613 VT = Opd0->getValueType(0);
2614 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2615 Opd0 = Op0->getOperand(0);
2616 ClampMSB = (VT == MVT::i32);
2617 } else if (BiggerPattern) {
2618 // Let's pretend a 0 shift right has been performed.
2619 // The resulting code will be at least as good as the original one
2620 // plus it may expose more opportunities for bitfield insert pattern.
2621 // FIXME: Currently we limit this to the bigger pattern, because
2622 // some optimizations expect AND and not UBFM.
2623 Opd0 = N->getOperand(0);
2624 } else
2625 return false;
2626
2627 // Bail out on large immediates. This happens when no proper
2628 // combining/constant folding was performed.
2629 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2630 LLVM_DEBUG(
2631 (dbgs() << N
2632 << ": Found large shift immediate, this should not happen\n"));
2633 return false;
2634 }
2635
2636 LSB = SrlImm;
2637 MSB = SrlImm +
2638 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2639 : llvm::countr_one<uint64_t>(AndImm)) -
2640 1;
2641 if (ClampMSB)
2642 // Since we're moving the extend before the right shift operation, we need
2643 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2644 // the zeros which would get shifted in with the original right shift
2645 // operation.
2646 MSB = MSB > 31 ? 31 : MSB;
2647
2648 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2649 return true;
2650}
2651
2653 SDValue &Opd0, unsigned &Immr,
2654 unsigned &Imms) {
2655 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2656
2657 EVT VT = N->getValueType(0);
2658 unsigned BitWidth = VT.getSizeInBits();
2659 assert((VT == MVT::i32 || VT == MVT::i64) &&
2660 "Type checking must have been done before calling this function");
2661
2662 SDValue Op = N->getOperand(0);
2663 if (Op->getOpcode() == ISD::TRUNCATE) {
2664 Op = Op->getOperand(0);
2665 VT = Op->getValueType(0);
2666 BitWidth = VT.getSizeInBits();
2667 }
2668
2669 uint64_t ShiftImm;
2670 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2671 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2672 return false;
2673
2674 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2675 if (ShiftImm + Width > BitWidth)
2676 return false;
2677
2678 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2679 Opd0 = Op.getOperand(0);
2680 Immr = ShiftImm;
2681 Imms = ShiftImm + Width - 1;
2682 return true;
2683}
2684
2686 SDValue &Opd0, unsigned &LSB,
2687 unsigned &MSB) {
2688 // We are looking for the following pattern which basically extracts several
2689 // continuous bits from the source value and places it from the LSB of the
2690 // destination value, all other bits of the destination value or set to zero:
2691 //
2692 // Value2 = AND Value, MaskImm
2693 // SRL Value2, ShiftImm
2694 //
2695 // with MaskImm >> ShiftImm to search for the bit width.
2696 //
2697 // This gets selected into a single UBFM:
2698 //
2699 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2700 //
2701
2702 if (N->getOpcode() != ISD::SRL)
2703 return false;
2704
2705 uint64_t AndMask = 0;
2706 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2707 return false;
2708
2709 Opd0 = N->getOperand(0).getOperand(0);
2710
2711 uint64_t SrlImm = 0;
2712 if (!isIntImmediate(N->getOperand(1), SrlImm))
2713 return false;
2714
2715 // Check whether we really have several bits extract here.
2716 if (!isMask_64(AndMask >> SrlImm))
2717 return false;
2718
2719 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2720 LSB = SrlImm;
2721 MSB = llvm::Log2_64(AndMask);
2722 return true;
2723}
2724
2725static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2726 unsigned &Immr, unsigned &Imms,
2727 bool BiggerPattern) {
2728 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2729 "N must be a SHR/SRA operation to call this function");
2730
2731 EVT VT = N->getValueType(0);
2732
2733 // Here we can test the type of VT and return false when the type does not
2734 // match, but since it is done prior to that call in the current context
2735 // we turned that into an assert to avoid redundant code.
2736 assert((VT == MVT::i32 || VT == MVT::i64) &&
2737 "Type checking must have been done before calling this function");
2738
2739 // Check for AND + SRL doing several bits extract.
2740 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2741 return true;
2742
2743 // We're looking for a shift of a shift.
2744 uint64_t ShlImm = 0;
2745 uint64_t TruncBits = 0;
2746 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2747 Opd0 = N->getOperand(0).getOperand(0);
2748 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2749 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2750 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2751 // be considered as setting high 32 bits as zero. Our strategy here is to
2752 // always generate 64bit UBFM. This consistency will help the CSE pass
2753 // later find more redundancy.
2754 Opd0 = N->getOperand(0).getOperand(0);
2755 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2756 VT = Opd0.getValueType();
2757 assert(VT == MVT::i64 && "the promoted type should be i64");
2758 } else if (BiggerPattern) {
2759 // Let's pretend a 0 shift left has been performed.
2760 // FIXME: Currently we limit this to the bigger pattern case,
2761 // because some optimizations expect AND and not UBFM
2762 Opd0 = N->getOperand(0);
2763 } else
2764 return false;
2765
2766 // Missing combines/constant folding may have left us with strange
2767 // constants.
2768 if (ShlImm >= VT.getSizeInBits()) {
2769 LLVM_DEBUG(
2770 (dbgs() << N
2771 << ": Found large shift immediate, this should not happen\n"));
2772 return false;
2773 }
2774
2775 uint64_t SrlImm = 0;
2776 if (!isIntImmediate(N->getOperand(1), SrlImm))
2777 return false;
2778
2779 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2780 "bad amount in shift node!");
2781 int immr = SrlImm - ShlImm;
2782 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2783 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2784 // SRA requires a signed extraction
2785 if (VT == MVT::i32)
2786 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2787 else
2788 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2789 return true;
2790}
2791
2792bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2793 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2794
2795 EVT VT = N->getValueType(0);
2796 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2797 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2798 return false;
2799
2800 uint64_t ShiftImm;
2801 SDValue Op = N->getOperand(0);
2802 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2803 return false;
2804
2805 SDLoc dl(N);
2806 // Extend the incoming operand of the shift to 64-bits.
2807 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2808 unsigned Immr = ShiftImm;
2809 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2810 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2811 CurDAG->getTargetConstant(Imms, dl, VT)};
2812 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2813 return true;
2814}
2815
2816static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2817 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2818 unsigned NumberOfIgnoredLowBits = 0,
2819 bool BiggerPattern = false) {
2820 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2821 return false;
2822
2823 switch (N->getOpcode()) {
2824 default:
2825 if (!N->isMachineOpcode())
2826 return false;
2827 break;
2828 case ISD::AND:
2829 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2830 NumberOfIgnoredLowBits, BiggerPattern);
2831 case ISD::SRL:
2832 case ISD::SRA:
2833 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2834
2836 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2837 }
2838
2839 unsigned NOpc = N->getMachineOpcode();
2840 switch (NOpc) {
2841 default:
2842 return false;
2843 case AArch64::SBFMWri:
2844 case AArch64::UBFMWri:
2845 case AArch64::SBFMXri:
2846 case AArch64::UBFMXri:
2847 Opc = NOpc;
2848 Opd0 = N->getOperand(0);
2849 Immr = N->getConstantOperandVal(1);
2850 Imms = N->getConstantOperandVal(2);
2851 return true;
2852 }
2853 // Unreachable
2854 return false;
2855}
2856
2857bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2858 unsigned Opc, Immr, Imms;
2859 SDValue Opd0;
2860 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2861 return false;
2862
2863 EVT VT = N->getValueType(0);
2864 SDLoc dl(N);
2865
2866 // If the bit extract operation is 64bit but the original type is 32bit, we
2867 // need to add one EXTRACT_SUBREG.
2868 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2869 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2870 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2871
2872 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2873 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2874 MVT::i32, SDValue(BFM, 0));
2875 ReplaceNode(N, Inner.getNode());
2876 return true;
2877 }
2878
2879 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2880 CurDAG->getTargetConstant(Imms, dl, VT)};
2881 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2882 return true;
2883}
2884
2885/// Does DstMask form a complementary pair with the mask provided by
2886/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2887/// this asks whether DstMask zeroes precisely those bits that will be set by
2888/// the other half.
2889static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2890 unsigned NumberOfIgnoredHighBits, EVT VT) {
2891 assert((VT == MVT::i32 || VT == MVT::i64) &&
2892 "i32 or i64 mask type expected!");
2893 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2894
2895 // Enable implicitTrunc as we're intentionally ignoring high bits.
2896 APInt SignificantDstMask =
2897 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2898 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2899
2900 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2901 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2902}
2903
2904// Look for bits that will be useful for later uses.
2905// A bit is consider useless as soon as it is dropped and never used
2906// before it as been dropped.
2907// E.g., looking for useful bit of x
2908// 1. y = x & 0x7
2909// 2. z = y >> 2
2910// After #1, x useful bits are 0x7, then the useful bits of x, live through
2911// y.
2912// After #2, the useful bits of x are 0x4.
2913// However, if x is used on an unpredictable instruction, then all its bits
2914// are useful.
2915// E.g.
2916// 1. y = x & 0x7
2917// 2. z = y >> 2
2918// 3. str x, [@x]
2919static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2920
2922 unsigned Depth) {
2923 uint64_t Imm =
2924 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2925 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2926 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2927 getUsefulBits(Op, UsefulBits, Depth + 1);
2928}
2929
2931 uint64_t Imm, uint64_t MSB,
2932 unsigned Depth) {
2933 // inherit the bitwidth value
2934 APInt OpUsefulBits(UsefulBits);
2935 OpUsefulBits = 1;
2936
2937 if (MSB >= Imm) {
2938 OpUsefulBits <<= MSB - Imm + 1;
2939 --OpUsefulBits;
2940 // The interesting part will be in the lower part of the result
2941 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2942 // The interesting part was starting at Imm in the argument
2943 OpUsefulBits <<= Imm;
2944 } else {
2945 OpUsefulBits <<= MSB + 1;
2946 --OpUsefulBits;
2947 // The interesting part will be shifted in the result
2948 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2949 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2950 // The interesting part was at zero in the argument
2951 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2952 }
2953
2954 UsefulBits &= OpUsefulBits;
2955}
2956
2957static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2958 unsigned Depth) {
2959 uint64_t Imm =
2960 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2961 uint64_t MSB =
2962 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2963
2964 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2965}
2966
2968 unsigned Depth) {
2969 uint64_t ShiftTypeAndValue =
2970 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2971 APInt Mask(UsefulBits);
2972 Mask.clearAllBits();
2973 Mask.flipAllBits();
2974
2975 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2976 // Shift Left
2977 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2978 Mask <<= ShiftAmt;
2979 getUsefulBits(Op, Mask, Depth + 1);
2980 Mask.lshrInPlace(ShiftAmt);
2981 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2982 // Shift Right
2983 // We do not handle AArch64_AM::ASR, because the sign will change the
2984 // number of useful bits
2985 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2986 Mask.lshrInPlace(ShiftAmt);
2987 getUsefulBits(Op, Mask, Depth + 1);
2988 Mask <<= ShiftAmt;
2989 } else
2990 return;
2991
2992 UsefulBits &= Mask;
2993}
2994
2995static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2996 unsigned Depth) {
2997 uint64_t Imm =
2998 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2999 uint64_t MSB =
3000 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3001
3002 APInt OpUsefulBits(UsefulBits);
3003 OpUsefulBits = 1;
3004
3005 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3006 ResultUsefulBits.flipAllBits();
3007 APInt Mask(UsefulBits.getBitWidth(), 0);
3008
3009 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3010
3011 if (MSB >= Imm) {
3012 // The instruction is a BFXIL.
3013 uint64_t Width = MSB - Imm + 1;
3014 uint64_t LSB = Imm;
3015
3016 OpUsefulBits <<= Width;
3017 --OpUsefulBits;
3018
3019 if (Op.getOperand(1) == Orig) {
3020 // Copy the low bits from the result to bits starting from LSB.
3021 Mask = ResultUsefulBits & OpUsefulBits;
3022 Mask <<= LSB;
3023 }
3024
3025 if (Op.getOperand(0) == Orig)
3026 // Bits starting from LSB in the input contribute to the result.
3027 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3028 } else {
3029 // The instruction is a BFI.
3030 uint64_t Width = MSB + 1;
3031 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3032
3033 OpUsefulBits <<= Width;
3034 --OpUsefulBits;
3035 OpUsefulBits <<= LSB;
3036
3037 if (Op.getOperand(1) == Orig) {
3038 // Copy the bits from the result to the zero bits.
3039 Mask = ResultUsefulBits & OpUsefulBits;
3040 Mask.lshrInPlace(LSB);
3041 }
3042
3043 if (Op.getOperand(0) == Orig)
3044 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3045 }
3046
3047 UsefulBits &= Mask;
3048}
3049
3050static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3051 SDValue Orig, unsigned Depth) {
3052
3053 // Users of this node should have already been instruction selected
3054 // FIXME: Can we turn that into an assert?
3055 if (!UserNode->isMachineOpcode())
3056 return;
3057
3058 switch (UserNode->getMachineOpcode()) {
3059 default:
3060 return;
3061 case AArch64::ANDSWri:
3062 case AArch64::ANDSXri:
3063 case AArch64::ANDWri:
3064 case AArch64::ANDXri:
3065 // We increment Depth only when we call the getUsefulBits
3066 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3067 Depth);
3068 case AArch64::UBFMWri:
3069 case AArch64::UBFMXri:
3070 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3071
3072 case AArch64::ORRWrs:
3073 case AArch64::ORRXrs:
3074 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3075 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3076 Depth);
3077 return;
3078 case AArch64::BFMWri:
3079 case AArch64::BFMXri:
3080 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3081
3082 case AArch64::STRBBui:
3083 case AArch64::STURBBi:
3084 if (UserNode->getOperand(0) != Orig)
3085 return;
3086 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3087 return;
3088
3089 case AArch64::STRHHui:
3090 case AArch64::STURHHi:
3091 if (UserNode->getOperand(0) != Orig)
3092 return;
3093 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3094 return;
3095 }
3096}
3097
3098static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3100 return;
3101 // Initialize UsefulBits
3102 if (!Depth) {
3103 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3104 // At the beginning, assume every produced bits is useful
3105 UsefulBits = APInt(Bitwidth, 0);
3106 UsefulBits.flipAllBits();
3107 }
3108 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3109
3110 for (SDNode *Node : Op.getNode()->users()) {
3111 // A use cannot produce useful bits
3112 APInt UsefulBitsForUse = APInt(UsefulBits);
3113 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3114 UsersUsefulBits |= UsefulBitsForUse;
3115 }
3116 // UsefulBits contains the produced bits that are meaningful for the
3117 // current definition, thus a user cannot make a bit meaningful at
3118 // this point
3119 UsefulBits &= UsersUsefulBits;
3120}
3121
3122/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3123/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3124/// 0, return Op unchanged.
3125static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3126 if (ShlAmount == 0)
3127 return Op;
3128
3129 EVT VT = Op.getValueType();
3130 SDLoc dl(Op);
3131 unsigned BitWidth = VT.getSizeInBits();
3132 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3133
3134 SDNode *ShiftNode;
3135 if (ShlAmount > 0) {
3136 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3137 ShiftNode = CurDAG->getMachineNode(
3138 UBFMOpc, dl, VT, Op,
3139 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3140 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3141 } else {
3142 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3143 assert(ShlAmount < 0 && "expected right shift");
3144 int ShrAmount = -ShlAmount;
3145 ShiftNode = CurDAG->getMachineNode(
3146 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3147 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3148 }
3149
3150 return SDValue(ShiftNode, 0);
3151}
3152
3153// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3154static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3155 bool BiggerPattern,
3156 const uint64_t NonZeroBits,
3157 SDValue &Src, int &DstLSB,
3158 int &Width);
3159
3160// For bit-field-positioning pattern "shl VAL, N)".
3161static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3162 bool BiggerPattern,
3163 const uint64_t NonZeroBits,
3164 SDValue &Src, int &DstLSB,
3165 int &Width);
3166
3167/// Does this tree qualify as an attempt to move a bitfield into position,
3168/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3170 bool BiggerPattern, SDValue &Src,
3171 int &DstLSB, int &Width) {
3172 EVT VT = Op.getValueType();
3173 unsigned BitWidth = VT.getSizeInBits();
3174 (void)BitWidth;
3175 assert(BitWidth == 32 || BitWidth == 64);
3176
3177 KnownBits Known = CurDAG->computeKnownBits(Op);
3178
3179 // Non-zero in the sense that they're not provably zero, which is the key
3180 // point if we want to use this value
3181 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3182 if (!isShiftedMask_64(NonZeroBits))
3183 return false;
3184
3185 switch (Op.getOpcode()) {
3186 default:
3187 break;
3188 case ISD::AND:
3189 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3190 NonZeroBits, Src, DstLSB, Width);
3191 case ISD::SHL:
3192 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3193 NonZeroBits, Src, DstLSB, Width);
3194 }
3195
3196 return false;
3197}
3198
3200 bool BiggerPattern,
3201 const uint64_t NonZeroBits,
3202 SDValue &Src, int &DstLSB,
3203 int &Width) {
3204 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3205
3206 EVT VT = Op.getValueType();
3207 assert((VT == MVT::i32 || VT == MVT::i64) &&
3208 "Caller guarantees VT is one of i32 or i64");
3209 (void)VT;
3210
3211 uint64_t AndImm;
3212 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3213 return false;
3214
3215 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3216 // 1) (AndImm & (1 << POS) == 0)
3217 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3218 //
3219 // 1) and 2) don't agree so something must be wrong (e.g., in
3220 // 'SelectionDAG::computeKnownBits')
3221 assert((~AndImm & NonZeroBits) == 0 &&
3222 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3223
3224 SDValue AndOp0 = Op.getOperand(0);
3225
3226 uint64_t ShlImm;
3227 SDValue ShlOp0;
3228 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3229 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3230 ShlOp0 = AndOp0.getOperand(0);
3231 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3233 ShlImm)) {
3234 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3235
3236 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3237 SDValue ShlVal = AndOp0.getOperand(0);
3238
3239 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3240 // expect VT to be MVT::i32.
3241 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3242
3243 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3244 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3245 } else
3246 return false;
3247
3248 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3249 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3250 // AndOp0+AND.
3251 if (!BiggerPattern && !AndOp0.hasOneUse())
3252 return false;
3253
3254 DstLSB = llvm::countr_zero(NonZeroBits);
3255 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3256
3257 // Bail out on large Width. This happens when no proper combining / constant
3258 // folding was performed.
3259 if (Width >= (int)VT.getSizeInBits()) {
3260 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3261 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3262 // "val".
3263 // If VT is i32, what Width >= 32 means:
3264 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3265 // demands at least 'Width' bits (after dag-combiner). This together with
3266 // `any_extend` Op (undefined higher bits) indicates missed combination
3267 // when lowering the 'and' IR instruction to an machine IR instruction.
3268 LLVM_DEBUG(
3269 dbgs()
3270 << "Found large Width in bit-field-positioning -- this indicates no "
3271 "proper combining / constant folding was performed\n");
3272 return false;
3273 }
3274
3275 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3276 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3277 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3278 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3279 // which case it is not profitable to insert an extra shift.
3280 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3281 return false;
3282
3283 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3284 return true;
3285}
3286
3287// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3288// UBFIZ.
3290 SDValue &Src, int &DstLSB,
3291 int &Width) {
3292 // Caller should have verified that N is a left shift with constant shift
3293 // amount; asserts that.
3294 assert(Op.getOpcode() == ISD::SHL &&
3295 "Op.getNode() should be a SHL node to call this function");
3296 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3297 "Op.getNode() should shift ShlImm to call this function");
3298
3299 uint64_t AndImm = 0;
3300 SDValue Op0 = Op.getOperand(0);
3301 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3302 return false;
3303
3304 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3305 if (isMask_64(ShiftedAndImm)) {
3306 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3307 // should end with Mask, and could be prefixed with random bits if those
3308 // bits are shifted out.
3309 //
3310 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3311 // the AND result corresponding to those bits are shifted out, so it's fine
3312 // to not extract them.
3313 Width = llvm::countr_one(ShiftedAndImm);
3314 DstLSB = ShlImm;
3315 Src = Op0.getOperand(0);
3316 return true;
3317 }
3318 return false;
3319}
3320
3322 bool BiggerPattern,
3323 const uint64_t NonZeroBits,
3324 SDValue &Src, int &DstLSB,
3325 int &Width) {
3326 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3327
3328 EVT VT = Op.getValueType();
3329 assert((VT == MVT::i32 || VT == MVT::i64) &&
3330 "Caller guarantees that type is i32 or i64");
3331 (void)VT;
3332
3333 uint64_t ShlImm;
3334 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3335 return false;
3336
3337 if (!BiggerPattern && !Op.hasOneUse())
3338 return false;
3339
3340 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3341 return true;
3342
3343 DstLSB = llvm::countr_zero(NonZeroBits);
3344 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3345
3346 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3347 return false;
3348
3349 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3350 return true;
3351}
3352
3353static bool isShiftedMask(uint64_t Mask, EVT VT) {
3354 assert(VT == MVT::i32 || VT == MVT::i64);
3355 if (VT == MVT::i32)
3356 return isShiftedMask_32(Mask);
3357 return isShiftedMask_64(Mask);
3358}
3359
3360// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3361// inserted only sets known zero bits.
3363 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3364
3365 EVT VT = N->getValueType(0);
3366 if (VT != MVT::i32 && VT != MVT::i64)
3367 return false;
3368
3369 unsigned BitWidth = VT.getSizeInBits();
3370
3371 uint64_t OrImm;
3372 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3373 return false;
3374
3375 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3376 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3377 // performance neutral.
3379 return false;
3380
3381 uint64_t MaskImm;
3382 SDValue And = N->getOperand(0);
3383 // Must be a single use AND with an immediate operand.
3384 if (!And.hasOneUse() ||
3385 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3386 return false;
3387
3388 // Compute the Known Zero for the AND as this allows us to catch more general
3389 // cases than just looking for AND with imm.
3390 KnownBits Known = CurDAG->computeKnownBits(And);
3391
3392 // Non-zero in the sense that they're not provably zero, which is the key
3393 // point if we want to use this value.
3394 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3395
3396 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3397 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3398 return false;
3399
3400 // The bits being inserted must only set those bits that are known to be zero.
3401 if ((OrImm & NotKnownZero) != 0) {
3402 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3403 // currently handle this case.
3404 return false;
3405 }
3406
3407 // BFI/BFXIL dst, src, #lsb, #width.
3408 int LSB = llvm::countr_one(NotKnownZero);
3409 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3410
3411 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3412 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3413 unsigned ImmS = Width - 1;
3414
3415 // If we're creating a BFI instruction avoid cases where we need more
3416 // instructions to materialize the BFI constant as compared to the original
3417 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3418 // should be no worse in this case.
3419 bool IsBFI = LSB != 0;
3420 uint64_t BFIImm = OrImm >> LSB;
3421 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3422 // We have a BFI instruction and we know the constant can't be materialized
3423 // with a ORR-immediate with the zero register.
3424 unsigned OrChunks = 0, BFIChunks = 0;
3425 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3426 if (((OrImm >> Shift) & 0xFFFF) != 0)
3427 ++OrChunks;
3428 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3429 ++BFIChunks;
3430 }
3431 if (BFIChunks > OrChunks)
3432 return false;
3433 }
3434
3435 // Materialize the constant to be inserted.
3436 SDLoc DL(N);
3437 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3438 SDNode *MOVI = CurDAG->getMachineNode(
3439 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3440
3441 // Create the BFI/BFXIL instruction.
3442 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3443 CurDAG->getTargetConstant(ImmR, DL, VT),
3444 CurDAG->getTargetConstant(ImmS, DL, VT)};
3445 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3446 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3447 return true;
3448}
3449
3451 SDValue &ShiftedOperand,
3452 uint64_t &EncodedShiftImm) {
3453 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3454 if (!Dst.hasOneUse())
3455 return false;
3456
3457 EVT VT = Dst.getValueType();
3458 assert((VT == MVT::i32 || VT == MVT::i64) &&
3459 "Caller should guarantee that VT is one of i32 or i64");
3460 const unsigned SizeInBits = VT.getSizeInBits();
3461
3462 SDLoc DL(Dst.getNode());
3463 uint64_t AndImm, ShlImm;
3464 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3465 isShiftedMask_64(AndImm)) {
3466 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3467 SDValue DstOp0 = Dst.getOperand(0);
3468 if (!DstOp0.hasOneUse())
3469 return false;
3470
3471 // An example to illustrate the transformation
3472 // From:
3473 // lsr x8, x1, #1
3474 // and x8, x8, #0x3f80
3475 // bfxil x8, x1, #0, #7
3476 // To:
3477 // and x8, x23, #0x7f
3478 // ubfx x9, x23, #8, #7
3479 // orr x23, x8, x9, lsl #7
3480 //
3481 // The number of instructions remains the same, but ORR is faster than BFXIL
3482 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3483 // the dependency chain is improved after the transformation.
3484 uint64_t SrlImm;
3485 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3486 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3487 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3488 unsigned MaskWidth =
3489 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3490 unsigned UBFMOpc =
3491 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3492 SDNode *UBFMNode = CurDAG->getMachineNode(
3493 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3494 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3495 VT),
3496 CurDAG->getTargetConstant(
3497 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3498 ShiftedOperand = SDValue(UBFMNode, 0);
3499 EncodedShiftImm = AArch64_AM::getShifterImm(
3500 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3501 return true;
3502 }
3503 }
3504 return false;
3505 }
3506
3507 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3508 ShiftedOperand = Dst.getOperand(0);
3509 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3510 return true;
3511 }
3512
3513 uint64_t SrlImm;
3514 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3515 ShiftedOperand = Dst.getOperand(0);
3516 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3517 return true;
3518 }
3519 return false;
3520}
3521
3522// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3523// the operands and select it to AArch64::ORR with shifted registers if
3524// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3525static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3526 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3527 const bool BiggerPattern) {
3528 EVT VT = N->getValueType(0);
3529 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3530 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3531 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3532 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3533 assert((VT == MVT::i32 || VT == MVT::i64) &&
3534 "Expect result type to be i32 or i64 since N is combinable to BFM");
3535 SDLoc DL(N);
3536
3537 // Bail out if BFM simplifies away one node in BFM Dst.
3538 if (OrOpd1 != Dst)
3539 return false;
3540
3541 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3542 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3543 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3544 if (BiggerPattern) {
3545 uint64_t SrcAndImm;
3546 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3547 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3548 // OrOpd0 = AND Src, #Mask
3549 // So BFM simplifies away one AND node from Src and doesn't simplify away
3550 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3551 // one node (from Rd), ORR is better since it has higher throughput and
3552 // smaller latency than BFM on many AArch64 processors (and for the rest
3553 // ORR is at least as good as BFM).
3554 SDValue ShiftedOperand;
3555 uint64_t EncodedShiftImm;
3556 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3557 EncodedShiftImm)) {
3558 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3559 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3560 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3561 return true;
3562 }
3563 }
3564 return false;
3565 }
3566
3567 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3568
3569 uint64_t ShlImm;
3570 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3571 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3572 SDValue Ops[] = {
3573 Dst, Src,
3574 CurDAG->getTargetConstant(
3576 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3577 return true;
3578 }
3579
3580 // Select the following pattern to left-shifted operand rather than BFI.
3581 // %val1 = op ..
3582 // %val2 = shl %val1, #imm
3583 // %res = or %val1, %val2
3584 //
3585 // If N is selected to be BFI, we know that
3586 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3587 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3588 //
3589 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3590 if (OrOpd0.getOperand(0) == OrOpd1) {
3591 SDValue Ops[] = {
3592 OrOpd1, OrOpd1,
3593 CurDAG->getTargetConstant(
3595 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3596 return true;
3597 }
3598 }
3599
3600 uint64_t SrlImm;
3601 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3602 // Select the following pattern to right-shifted operand rather than BFXIL.
3603 // %val1 = op ..
3604 // %val2 = lshr %val1, #imm
3605 // %res = or %val1, %val2
3606 //
3607 // If N is selected to be BFXIL, we know that
3608 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3609 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3610 //
3611 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3612 if (OrOpd0.getOperand(0) == OrOpd1) {
3613 SDValue Ops[] = {
3614 OrOpd1, OrOpd1,
3615 CurDAG->getTargetConstant(
3617 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3618 return true;
3619 }
3620 }
3621
3622 return false;
3623}
3624
3625static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3626 SelectionDAG *CurDAG) {
3627 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3628
3629 EVT VT = N->getValueType(0);
3630 if (VT != MVT::i32 && VT != MVT::i64)
3631 return false;
3632
3633 unsigned BitWidth = VT.getSizeInBits();
3634
3635 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3636 // have the expected shape. Try to undo that.
3637
3638 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3639 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3640
3641 // Given a OR operation, check if we have the following pattern
3642 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3643 // isBitfieldExtractOp)
3644 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3645 // countTrailingZeros(mask2) == imm2 - imm + 1
3646 // f = d | c
3647 // if yes, replace the OR instruction with:
3648 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3649
3650 // OR is commutative, check all combinations of operand order and values of
3651 // BiggerPattern, i.e.
3652 // Opd0, Opd1, BiggerPattern=false
3653 // Opd1, Opd0, BiggerPattern=false
3654 // Opd0, Opd1, BiggerPattern=true
3655 // Opd1, Opd0, BiggerPattern=true
3656 // Several of these combinations may match, so check with BiggerPattern=false
3657 // first since that will produce better results by matching more instructions
3658 // and/or inserting fewer extra instructions.
3659 for (int I = 0; I < 4; ++I) {
3660
3661 SDValue Dst, Src;
3662 unsigned ImmR, ImmS;
3663 bool BiggerPattern = I / 2;
3664 SDValue OrOpd0Val = N->getOperand(I % 2);
3665 SDNode *OrOpd0 = OrOpd0Val.getNode();
3666 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3667 SDNode *OrOpd1 = OrOpd1Val.getNode();
3668
3669 unsigned BFXOpc;
3670 int DstLSB, Width;
3671 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3672 NumberOfIgnoredLowBits, BiggerPattern)) {
3673 // Check that the returned opcode is compatible with the pattern,
3674 // i.e., same type and zero extended (U and not S)
3675 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3676 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3677 continue;
3678
3679 // Compute the width of the bitfield insertion
3680 DstLSB = 0;
3681 Width = ImmS - ImmR + 1;
3682 // FIXME: This constraint is to catch bitfield insertion we may
3683 // want to widen the pattern if we want to grab general bitfield
3684 // move case
3685 if (Width <= 0)
3686 continue;
3687
3688 // If the mask on the insertee is correct, we have a BFXIL operation. We
3689 // can share the ImmR and ImmS values from the already-computed UBFM.
3690 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3691 BiggerPattern,
3692 Src, DstLSB, Width)) {
3693 ImmR = (BitWidth - DstLSB) % BitWidth;
3694 ImmS = Width - 1;
3695 } else
3696 continue;
3697
3698 // Check the second part of the pattern
3699 EVT VT = OrOpd1Val.getValueType();
3700 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3701
3702 // Compute the Known Zero for the candidate of the first operand.
3703 // This allows to catch more general case than just looking for
3704 // AND with imm. Indeed, simplify-demanded-bits may have removed
3705 // the AND instruction because it proves it was useless.
3706 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3707
3708 // Check if there is enough room for the second operand to appear
3709 // in the first one
3710 APInt BitsToBeInserted =
3711 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3712
3713 if ((BitsToBeInserted & ~Known.Zero) != 0)
3714 continue;
3715
3716 // Set the first operand
3717 uint64_t Imm;
3718 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3719 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3720 // In that case, we can eliminate the AND
3721 Dst = OrOpd1->getOperand(0);
3722 else
3723 // Maybe the AND has been removed by simplify-demanded-bits
3724 // or is useful because it discards more bits
3725 Dst = OrOpd1Val;
3726
3727 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3728 // with shifted operand is more efficient.
3729 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3730 BiggerPattern))
3731 return true;
3732
3733 // both parts match
3734 SDLoc DL(N);
3735 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3736 CurDAG->getTargetConstant(ImmS, DL, VT)};
3737 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3738 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3739 return true;
3740 }
3741
3742 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3743 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3744 // mask (e.g., 0x000ffff0).
3745 uint64_t Mask0Imm, Mask1Imm;
3746 SDValue And0 = N->getOperand(0);
3747 SDValue And1 = N->getOperand(1);
3748 if (And0.hasOneUse() && And1.hasOneUse() &&
3749 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3750 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3751 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3752 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3753
3754 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3755 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3756 // bits to be inserted.
3757 if (isShiftedMask(Mask0Imm, VT)) {
3758 std::swap(And0, And1);
3759 std::swap(Mask0Imm, Mask1Imm);
3760 }
3761
3762 SDValue Src = And1->getOperand(0);
3763 SDValue Dst = And0->getOperand(0);
3764 unsigned LSB = llvm::countr_zero(Mask1Imm);
3765 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3766
3767 // The BFXIL inserts the low-order bits from a source register, so right
3768 // shift the needed bits into place.
3769 SDLoc DL(N);
3770 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3771 uint64_t LsrImm = LSB;
3772 if (Src->hasOneUse() &&
3773 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3774 (LsrImm + LSB) < BitWidth) {
3775 Src = Src->getOperand(0);
3776 LsrImm += LSB;
3777 }
3778
3779 SDNode *LSR = CurDAG->getMachineNode(
3780 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3781 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3782
3783 // BFXIL is an alias of BFM, so translate to BFM operands.
3784 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3785 unsigned ImmS = Width - 1;
3786
3787 // Create the BFXIL instruction.
3788 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3789 CurDAG->getTargetConstant(ImmR, DL, VT),
3790 CurDAG->getTargetConstant(ImmS, DL, VT)};
3791 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3792 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3793 return true;
3794 }
3795
3796 return false;
3797}
3798
3799bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3800 if (N->getOpcode() != ISD::OR)
3801 return false;
3802
3803 APInt NUsefulBits;
3804 getUsefulBits(SDValue(N, 0), NUsefulBits);
3805
3806 // If all bits are not useful, just return UNDEF.
3807 if (!NUsefulBits) {
3808 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3809 return true;
3810 }
3811
3812 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3813 return true;
3814
3815 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3816}
3817
3818/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3819/// equivalent of a left shift by a constant amount followed by an and masking
3820/// out a contiguous set of bits.
3821bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3822 if (N->getOpcode() != ISD::AND)
3823 return false;
3824
3825 EVT VT = N->getValueType(0);
3826 if (VT != MVT::i32 && VT != MVT::i64)
3827 return false;
3828
3829 SDValue Op0;
3830 int DstLSB, Width;
3831 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3832 Op0, DstLSB, Width))
3833 return false;
3834
3835 // ImmR is the rotate right amount.
3836 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3837 // ImmS is the most significant bit of the source to be moved.
3838 unsigned ImmS = Width - 1;
3839
3840 SDLoc DL(N);
3841 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3842 CurDAG->getTargetConstant(ImmS, DL, VT)};
3843 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3844 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3845 return true;
3846}
3847
3848/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3849/// variable shift/rotate instructions.
3850bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3851 EVT VT = N->getValueType(0);
3852
3853 unsigned Opc;
3854 switch (N->getOpcode()) {
3855 case ISD::ROTR:
3856 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3857 break;
3858 case ISD::SHL:
3859 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3860 break;
3861 case ISD::SRL:
3862 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3863 break;
3864 case ISD::SRA:
3865 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3866 break;
3867 default:
3868 return false;
3869 }
3870
3871 uint64_t Size;
3872 uint64_t Bits;
3873 if (VT == MVT::i32) {
3874 Bits = 5;
3875 Size = 32;
3876 } else if (VT == MVT::i64) {
3877 Bits = 6;
3878 Size = 64;
3879 } else
3880 return false;
3881
3882 SDValue ShiftAmt = N->getOperand(1);
3883 SDLoc DL(N);
3884 SDValue NewShiftAmt;
3885
3886 // Skip over an extend of the shift amount.
3887 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3888 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3889 ShiftAmt = ShiftAmt->getOperand(0);
3890
3891 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3892 SDValue Add0 = ShiftAmt->getOperand(0);
3893 SDValue Add1 = ShiftAmt->getOperand(1);
3894 uint64_t Add0Imm;
3895 uint64_t Add1Imm;
3896 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3897 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3898 // to avoid the ADD/SUB.
3899 NewShiftAmt = Add0;
3900 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3901 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3902 (Add0Imm % Size == 0)) {
3903 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3904 // to generate a NEG instead of a SUB from a constant.
3905 unsigned NegOpc;
3906 unsigned ZeroReg;
3907 EVT SubVT = ShiftAmt->getValueType(0);
3908 if (SubVT == MVT::i32) {
3909 NegOpc = AArch64::SUBWrr;
3910 ZeroReg = AArch64::WZR;
3911 } else {
3912 assert(SubVT == MVT::i64);
3913 NegOpc = AArch64::SUBXrr;
3914 ZeroReg = AArch64::XZR;
3915 }
3916 SDValue Zero =
3917 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3918 MachineSDNode *Neg =
3919 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3920 NewShiftAmt = SDValue(Neg, 0);
3921 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3922 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3923 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3924 // to generate a NOT instead of a SUB from a constant.
3925 unsigned NotOpc;
3926 unsigned ZeroReg;
3927 EVT SubVT = ShiftAmt->getValueType(0);
3928 if (SubVT == MVT::i32) {
3929 NotOpc = AArch64::ORNWrr;
3930 ZeroReg = AArch64::WZR;
3931 } else {
3932 assert(SubVT == MVT::i64);
3933 NotOpc = AArch64::ORNXrr;
3934 ZeroReg = AArch64::XZR;
3935 }
3936 SDValue Zero =
3937 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3938 MachineSDNode *Not =
3939 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3940 NewShiftAmt = SDValue(Not, 0);
3941 } else
3942 return false;
3943 } else {
3944 // If the shift amount is masked with an AND, check that the mask covers the
3945 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3946 // the AND.
3947 uint64_t MaskImm;
3948 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3949 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3950 return false;
3951
3952 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3953 return false;
3954
3955 NewShiftAmt = ShiftAmt->getOperand(0);
3956 }
3957
3958 // Narrow/widen the shift amount to match the size of the shift operation.
3959 if (VT == MVT::i32)
3960 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3961 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3962 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3963 MachineSDNode *Ext = CurDAG->getMachineNode(
3964 AArch64::SUBREG_TO_REG, DL, VT,
3965 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3966 NewShiftAmt = SDValue(Ext, 0);
3967 }
3968
3969 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3970 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3971 return true;
3972}
3973
3975 SDValue &FixedPos,
3976 unsigned RegWidth,
3977 bool isReciprocal) {
3978 APFloat FVal(0.0);
3980 FVal = CN->getValueAPF();
3981 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3982 // Some otherwise illegal constants are allowed in this case.
3983 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3984 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3985 return false;
3986
3987 ConstantPoolSDNode *CN =
3988 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3989 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3990 } else
3991 return false;
3992
3993 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3994 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3995 // x-register.
3996 //
3997 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3998 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3999 // integers.
4000 bool IsExact;
4001
4002 if (isReciprocal)
4003 if (!FVal.getExactInverse(&FVal))
4004 return false;
4005
4006 // fbits is between 1 and 64 in the worst-case, which means the fmul
4007 // could have 2^64 as an actual operand. Need 65 bits of precision.
4008 APSInt IntVal(65, true);
4009 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
4010
4011 // N.b. isPowerOf2 also checks for > 0.
4012 if (!IsExact || !IntVal.isPowerOf2())
4013 return false;
4014 unsigned FBits = IntVal.logBase2();
4015
4016 // Checks above should have guaranteed that we haven't lost information in
4017 // finding FBits, but it must still be in range.
4018 if (FBits == 0 || FBits > RegWidth) return false;
4019
4020 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4021 return true;
4022}
4023
4024bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4025 unsigned RegWidth) {
4026 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4027 false);
4028}
4029
4030bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4031 SDValue &FixedPos,
4032 unsigned RegWidth) {
4033 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4034 true);
4035}
4036
4037// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4038// of the string and obtains the integer values from them and combines these
4039// into a single value to be used in the MRS/MSR instruction.
4042 RegString.split(Fields, ':');
4043
4044 if (Fields.size() == 1)
4045 return -1;
4046
4047 assert(Fields.size() == 5
4048 && "Invalid number of fields in read register string");
4049
4051 bool AllIntFields = true;
4052
4053 for (StringRef Field : Fields) {
4054 unsigned IntField;
4055 AllIntFields &= !Field.getAsInteger(10, IntField);
4056 Ops.push_back(IntField);
4057 }
4058
4059 assert(AllIntFields &&
4060 "Unexpected non-integer value in special register string.");
4061 (void)AllIntFields;
4062
4063 // Need to combine the integer fields of the string into a single value
4064 // based on the bit encoding of MRS/MSR instruction.
4065 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4066 (Ops[3] << 3) | (Ops[4]);
4067}
4068
4069// Lower the read_register intrinsic to an MRS instruction node if the special
4070// register string argument is either of the form detailed in the ALCE (the
4071// form described in getIntOperandsFromRegisterString) or is a named register
4072// known by the MRS SysReg mapper.
4073bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4074 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4075 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4076 SDLoc DL(N);
4077
4078 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4079
4080 unsigned Opcode64Bit = AArch64::MRS;
4081 int Imm = getIntOperandFromRegisterString(RegString->getString());
4082 if (Imm == -1) {
4083 // No match, Use the sysreg mapper to map the remaining possible strings to
4084 // the value for the register to be used for the instruction operand.
4085 const auto *TheReg =
4086 AArch64SysReg::lookupSysRegByName(RegString->getString());
4087 if (TheReg && TheReg->Readable &&
4088 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4089 Imm = TheReg->Encoding;
4090 else
4091 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4092
4093 if (Imm == -1) {
4094 // Still no match, see if this is "pc" or give up.
4095 if (!ReadIs128Bit && RegString->getString() == "pc") {
4096 Opcode64Bit = AArch64::ADR;
4097 Imm = 0;
4098 } else {
4099 return false;
4100 }
4101 }
4102 }
4103
4104 SDValue InChain = N->getOperand(0);
4105 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4106 if (!ReadIs128Bit) {
4107 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4108 {SysRegImm, InChain});
4109 } else {
4110 SDNode *MRRS = CurDAG->getMachineNode(
4111 AArch64::MRRS, DL,
4112 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4113 {SysRegImm, InChain});
4114
4115 // Sysregs are not endian. The even register always contains the low half
4116 // of the register.
4117 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4118 SDValue(MRRS, 0));
4119 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4120 SDValue(MRRS, 0));
4121 SDValue OutChain = SDValue(MRRS, 1);
4122
4123 ReplaceUses(SDValue(N, 0), Lo);
4124 ReplaceUses(SDValue(N, 1), Hi);
4125 ReplaceUses(SDValue(N, 2), OutChain);
4126 };
4127 return true;
4128}
4129
4130// Lower the write_register intrinsic to an MSR instruction node if the special
4131// register string argument is either of the form detailed in the ALCE (the
4132// form described in getIntOperandsFromRegisterString) or is a named register
4133// known by the MSR SysReg mapper.
4134bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4135 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4136 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4137 SDLoc DL(N);
4138
4139 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4140
4141 if (!WriteIs128Bit) {
4142 // Check if the register was one of those allowed as the pstatefield value
4143 // in the MSR (immediate) instruction. To accept the values allowed in the
4144 // pstatefield for the MSR (immediate) instruction, we also require that an
4145 // immediate value has been provided as an argument, we know that this is
4146 // the case as it has been ensured by semantic checking.
4147 auto trySelectPState = [&](auto PMapper, unsigned State) {
4148 if (PMapper) {
4149 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4150 "Expected a constant integer expression.");
4151 unsigned Reg = PMapper->Encoding;
4152 uint64_t Immed = N->getConstantOperandVal(2);
4153 CurDAG->SelectNodeTo(
4154 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4155 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4156 return true;
4157 }
4158 return false;
4159 };
4160
4161 if (trySelectPState(
4162 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4163 AArch64::MSRpstateImm4))
4164 return true;
4165 if (trySelectPState(
4166 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4167 AArch64::MSRpstateImm1))
4168 return true;
4169 }
4170
4171 int Imm = getIntOperandFromRegisterString(RegString->getString());
4172 if (Imm == -1) {
4173 // Use the sysreg mapper to attempt to map the remaining possible strings
4174 // to the value for the register to be used for the MSR (register)
4175 // instruction operand.
4176 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4177 if (TheReg && TheReg->Writeable &&
4178 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4179 Imm = TheReg->Encoding;
4180 else
4181 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4182
4183 if (Imm == -1)
4184 return false;
4185 }
4186
4187 SDValue InChain = N->getOperand(0);
4188 if (!WriteIs128Bit) {
4189 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4190 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4191 N->getOperand(2), InChain);
4192 } else {
4193 // No endian swap. The lower half always goes into the even subreg, and the
4194 // higher half always into the odd supreg.
4195 SDNode *Pair = CurDAG->getMachineNode(
4196 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4197 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4198 MVT::i32),
4199 N->getOperand(2),
4200 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4201 N->getOperand(3),
4202 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4203
4204 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4205 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4206 SDValue(Pair, 0), InChain);
4207 }
4208
4209 return true;
4210}
4211
4212/// We've got special pseudo-instructions for these
4213bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4214 unsigned Opcode;
4215 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4216
4217 // Leave IR for LSE if subtarget supports it.
4218 if (Subtarget->hasLSE()) return false;
4219
4220 if (MemTy == MVT::i8)
4221 Opcode = AArch64::CMP_SWAP_8;
4222 else if (MemTy == MVT::i16)
4223 Opcode = AArch64::CMP_SWAP_16;
4224 else if (MemTy == MVT::i32)
4225 Opcode = AArch64::CMP_SWAP_32;
4226 else if (MemTy == MVT::i64)
4227 Opcode = AArch64::CMP_SWAP_64;
4228 else
4229 llvm_unreachable("Unknown AtomicCmpSwap type");
4230
4231 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4232 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4233 N->getOperand(0)};
4234 SDNode *CmpSwap = CurDAG->getMachineNode(
4235 Opcode, SDLoc(N),
4236 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4237
4238 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4239 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4240
4241 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4242 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4243 CurDAG->RemoveDeadNode(N);
4244
4245 return true;
4246}
4247
4248bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4249 SDValue &Shift, bool Negate) {
4250 if (!isa<ConstantSDNode>(N))
4251 return false;
4252
4253 SDLoc DL(N);
4254 APInt Val =
4255 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4256
4257 if (Negate)
4258 Val = -Val;
4259
4260 switch (VT.SimpleTy) {
4261 case MVT::i8:
4262 // All immediates are supported.
4263 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4264 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4265 return true;
4266 case MVT::i16:
4267 case MVT::i32:
4268 case MVT::i64:
4269 // Support 8bit unsigned immediates.
4270 if ((Val & ~0xff) == 0) {
4271 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4272 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4273 return true;
4274 }
4275 // Support 16bit unsigned immediates that are a multiple of 256.
4276 if ((Val & ~0xff00) == 0) {
4277 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4278 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4279 return true;
4280 }
4281 break;
4282 default:
4283 break;
4284 }
4285
4286 return false;
4287}
4288
4289bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4290 SDValue &Imm, SDValue &Shift,
4291 bool Negate) {
4292 if (!isa<ConstantSDNode>(N))
4293 return false;
4294
4295 SDLoc DL(N);
4296 int64_t Val = cast<ConstantSDNode>(N)
4297 ->getAPIntValue()
4299 .getSExtValue();
4300
4301 if (Negate)
4302 Val = -Val;
4303
4304 // Signed saturating instructions treat their immediate operand as unsigned,
4305 // whereas the related intrinsics define their operands to be signed. This
4306 // means we can only use the immediate form when the operand is non-negative.
4307 if (Val < 0)
4308 return false;
4309
4310 switch (VT.SimpleTy) {
4311 case MVT::i8:
4312 // All positive immediates are supported.
4313 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4314 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4315 return true;
4316 case MVT::i16:
4317 case MVT::i32:
4318 case MVT::i64:
4319 // Support 8bit positive immediates.
4320 if (Val <= 255) {
4321 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4322 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4323 return true;
4324 }
4325 // Support 16bit positive immediates that are a multiple of 256.
4326 if (Val <= 65280 && Val % 256 == 0) {
4327 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4328 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4329 return true;
4330 }
4331 break;
4332 default:
4333 break;
4334 }
4335
4336 return false;
4337}
4338
4339bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4340 SDValue &Shift) {
4341 if (!isa<ConstantSDNode>(N))
4342 return false;
4343
4344 SDLoc DL(N);
4345 int64_t Val = cast<ConstantSDNode>(N)
4346 ->getAPIntValue()
4347 .trunc(VT.getFixedSizeInBits())
4348 .getSExtValue();
4349
4350 switch (VT.SimpleTy) {
4351 case MVT::i8:
4352 // All immediates are supported.
4353 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4354 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4355 return true;
4356 case MVT::i16:
4357 case MVT::i32:
4358 case MVT::i64:
4359 // Support 8bit signed immediates.
4360 if (Val >= -128 && Val <= 127) {
4361 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4362 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4363 return true;
4364 }
4365 // Support 16bit signed immediates that are a multiple of 256.
4366 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4367 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4368 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4369 return true;
4370 }
4371 break;
4372 default:
4373 break;
4374 }
4375
4376 return false;
4377}
4378
4379bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4380 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4381 int64_t ImmVal = CNode->getSExtValue();
4382 SDLoc DL(N);
4383 if (ImmVal >= -128 && ImmVal < 128) {
4384 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4385 return true;
4386 }
4387 }
4388 return false;
4389}
4390
4391bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4392 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4393 uint64_t ImmVal = CNode->getZExtValue();
4394
4395 switch (VT.SimpleTy) {
4396 case MVT::i8:
4397 ImmVal &= 0xFF;
4398 break;
4399 case MVT::i16:
4400 ImmVal &= 0xFFFF;
4401 break;
4402 case MVT::i32:
4403 ImmVal &= 0xFFFFFFFF;
4404 break;
4405 case MVT::i64:
4406 break;
4407 default:
4408 llvm_unreachable("Unexpected type");
4409 }
4410
4411 if (ImmVal < 256) {
4412 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4413 return true;
4414 }
4415 }
4416 return false;
4417}
4418
4419bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4420 bool Invert) {
4421 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4422 uint64_t ImmVal = CNode->getZExtValue();
4423 SDLoc DL(N);
4424
4425 if (Invert)
4426 ImmVal = ~ImmVal;
4427
4428 // Shift mask depending on type size.
4429 switch (VT.SimpleTy) {
4430 case MVT::i8:
4431 ImmVal &= 0xFF;
4432 ImmVal |= ImmVal << 8;
4433 ImmVal |= ImmVal << 16;
4434 ImmVal |= ImmVal << 32;
4435 break;
4436 case MVT::i16:
4437 ImmVal &= 0xFFFF;
4438 ImmVal |= ImmVal << 16;
4439 ImmVal |= ImmVal << 32;
4440 break;
4441 case MVT::i32:
4442 ImmVal &= 0xFFFFFFFF;
4443 ImmVal |= ImmVal << 32;
4444 break;
4445 case MVT::i64:
4446 break;
4447 default:
4448 llvm_unreachable("Unexpected type");
4449 }
4450
4451 uint64_t encoding;
4452 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4453 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4454 return true;
4455 }
4456 }
4457 return false;
4458}
4459
4460// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4461// Rather than attempt to normalise everything we can sometimes saturate the
4462// shift amount during selection. This function also allows for consistent
4463// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4464// required by the instructions.
4465bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4466 uint64_t High, bool AllowSaturation,
4467 SDValue &Imm) {
4468 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4469 uint64_t ImmVal = CN->getZExtValue();
4470
4471 // Reject shift amounts that are too small.
4472 if (ImmVal < Low)
4473 return false;
4474
4475 // Reject or saturate shift amounts that are too big.
4476 if (ImmVal > High) {
4477 if (!AllowSaturation)
4478 return false;
4479 ImmVal = High;
4480 }
4481
4482 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4483 return true;
4484 }
4485
4486 return false;
4487}
4488
4489bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4490 // tagp(FrameIndex, IRGstack, tag_offset):
4491 // since the offset between FrameIndex and IRGstack is a compile-time
4492 // constant, this can be lowered to a single ADDG instruction.
4493 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4494 return false;
4495 }
4496
4497 SDValue IRG_SP = N->getOperand(2);
4498 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4499 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4500 return false;
4501 }
4502
4503 const TargetLowering *TLI = getTargetLowering();
4504 SDLoc DL(N);
4505 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4506 SDValue FiOp = CurDAG->getTargetFrameIndex(
4507 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4508 int TagOffset = N->getConstantOperandVal(3);
4509
4510 SDNode *Out = CurDAG->getMachineNode(
4511 AArch64::TAGPstack, DL, MVT::i64,
4512 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4513 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4514 ReplaceNode(N, Out);
4515 return true;
4516}
4517
4518void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4519 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4520 "llvm.aarch64.tagp third argument must be an immediate");
4521 if (trySelectStackSlotTagP(N))
4522 return;
4523 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4524 // compile-time constant, not just for stack allocations.
4525
4526 // General case for unrelated pointers in Op1 and Op2.
4527 SDLoc DL(N);
4528 int TagOffset = N->getConstantOperandVal(3);
4529 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4530 {N->getOperand(1), N->getOperand(2)});
4531 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4532 {SDValue(N1, 0), N->getOperand(2)});
4533 SDNode *N3 = CurDAG->getMachineNode(
4534 AArch64::ADDG, DL, MVT::i64,
4535 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4536 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4537 ReplaceNode(N, N3);
4538}
4539
4540bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4541 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4542
4543 // Bail when not a "cast" like insert_subvector.
4544 if (N->getConstantOperandVal(2) != 0)
4545 return false;
4546 if (!N->getOperand(0).isUndef())
4547 return false;
4548
4549 // Bail when normal isel should do the job.
4550 EVT VT = N->getValueType(0);
4551 EVT InVT = N->getOperand(1).getValueType();
4552 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4553 return false;
4554 if (InVT.getSizeInBits() <= 128)
4555 return false;
4556
4557 // NOTE: We can only get here when doing fixed length SVE code generation.
4558 // We do manual selection because the types involved are not linked to real
4559 // registers (despite being legal) and must be coerced into SVE registers.
4560
4562 "Expected to insert into a packed scalable vector!");
4563
4564 SDLoc DL(N);
4565 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4566 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4567 N->getOperand(1), RC));
4568 return true;
4569}
4570
4571bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4572 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4573
4574 // Bail when not a "cast" like extract_subvector.
4575 if (N->getConstantOperandVal(1) != 0)
4576 return false;
4577
4578 // Bail when normal isel can do the job.
4579 EVT VT = N->getValueType(0);
4580 EVT InVT = N->getOperand(0).getValueType();
4581 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4582 return false;
4583 if (VT.getSizeInBits() <= 128)
4584 return false;
4585
4586 // NOTE: We can only get here when doing fixed length SVE code generation.
4587 // We do manual selection because the types involved are not linked to real
4588 // registers (despite being legal) and must be coerced into SVE registers.
4589
4591 "Expected to extract from a packed scalable vector!");
4592
4593 SDLoc DL(N);
4594 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4595 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4596 N->getOperand(0), RC));
4597 return true;
4598}
4599
4600bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4601 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4602
4603 SDValue N0 = N->getOperand(0);
4604 SDValue N1 = N->getOperand(1);
4605
4606 EVT VT = N->getValueType(0);
4607 SDLoc DL(N);
4608
4609 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4610 // Rotate by a constant is a funnel shift in IR which is exanded to
4611 // an OR with shifted operands.
4612 // We do the following transform:
4613 // OR N0, N1 -> xar (x, y, imm)
4614 // Where:
4615 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4616 // N0 = SHL_PRED true, V, splat(bits-imm)
4617 // V = (xor x, y)
4618 if (VT.isScalableVector() &&
4619 (Subtarget->hasSVE2() ||
4620 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4621 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4622 N1.getOpcode() != AArch64ISD::SRL_PRED)
4623 std::swap(N0, N1);
4624 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4625 N1.getOpcode() != AArch64ISD::SRL_PRED)
4626 return false;
4627
4628 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4629 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4630 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4631 return false;
4632
4633 if (N0.getOperand(1) != N1.getOperand(1))
4634 return false;
4635
4636 SDValue R1, R2;
4637 bool IsXOROperand = true;
4638 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4639 IsXOROperand = false;
4640 } else {
4641 R1 = N0.getOperand(1).getOperand(0);
4642 R2 = N1.getOperand(1).getOperand(1);
4643 }
4644
4645 APInt ShlAmt, ShrAmt;
4646 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4648 return false;
4649
4650 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4651 return false;
4652
4653 if (!IsXOROperand) {
4654 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4655 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4656 SDValue MOVIV = SDValue(MOV, 0);
4657
4658 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4659 SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
4660 VT, Zero, MOVIV, ZSub);
4661
4662 R1 = N1->getOperand(1);
4663 R2 = SDValue(SubRegToReg, 0);
4664 }
4665
4666 SDValue Imm =
4667 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4668
4669 SDValue Ops[] = {R1, R2, Imm};
4671 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4672 AArch64::XAR_ZZZI_D})) {
4673 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4674 return true;
4675 }
4676 return false;
4677 }
4678
4679 // We have Neon SHA3 XAR operation for v2i64 but for types
4680 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4681 // is available.
4682 EVT SVT;
4683 switch (VT.getSimpleVT().SimpleTy) {
4684 case MVT::v4i32:
4685 case MVT::v2i32:
4686 SVT = MVT::nxv4i32;
4687 break;
4688 case MVT::v8i16:
4689 case MVT::v4i16:
4690 SVT = MVT::nxv8i16;
4691 break;
4692 case MVT::v16i8:
4693 case MVT::v8i8:
4694 SVT = MVT::nxv16i8;
4695 break;
4696 case MVT::v2i64:
4697 case MVT::v1i64:
4698 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4699 break;
4700 default:
4701 return false;
4702 }
4703
4704 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4705 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4706 return false;
4707
4708 if (N0->getOpcode() != AArch64ISD::VSHL ||
4709 N1->getOpcode() != AArch64ISD::VLSHR)
4710 return false;
4711
4712 if (N0->getOperand(0) != N1->getOperand(0))
4713 return false;
4714
4715 SDValue R1, R2;
4716 bool IsXOROperand = true;
4717 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4718 IsXOROperand = false;
4719 } else {
4720 SDValue XOR = N0.getOperand(0);
4721 R1 = XOR.getOperand(0);
4722 R2 = XOR.getOperand(1);
4723 }
4724
4725 unsigned HsAmt = N0.getConstantOperandVal(1);
4726 unsigned ShAmt = N1.getConstantOperandVal(1);
4727
4728 SDValue Imm = CurDAG->getTargetConstant(
4729 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4730
4731 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4732 if (ShAmt + HsAmt != VTSizeInBits)
4733 return false;
4734
4735 if (!IsXOROperand) {
4736 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4737 SDNode *MOV =
4738 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4739 SDValue MOVIV = SDValue(MOV, 0);
4740
4741 R1 = N1->getOperand(0);
4742 R2 = MOVIV;
4743 }
4744
4745 if (SVT != VT) {
4746 SDValue Undef =
4747 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4748
4749 if (SVT.isScalableVector() && VT.is64BitVector()) {
4750 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4751
4752 SDValue UndefQ = SDValue(
4753 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4754 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4755
4756 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4757 UndefQ, R1, DSub),
4758 0);
4759 if (R2.getValueType() == VT)
4760 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4761 UndefQ, R2, DSub),
4762 0);
4763 }
4764
4765 SDValue SubReg = CurDAG->getTargetConstant(
4766 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4767
4768 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4769 R1, SubReg),
4770 0);
4771
4772 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4773 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4774 Undef, R2, SubReg),
4775 0);
4776 }
4777
4778 SDValue Ops[] = {R1, R2, Imm};
4779 SDNode *XAR = nullptr;
4780
4781 if (SVT.isScalableVector()) {
4783 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4784 AArch64::XAR_ZZZI_D}))
4785 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4786 } else {
4787 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4788 }
4789
4790 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4791
4792 if (SVT != VT) {
4793 if (VT.is64BitVector() && SVT.isScalableVector()) {
4794 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4795
4796 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4797 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4798 SDValue(XAR, 0), ZSub);
4799
4800 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4801 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4802 SDValue(Q, 0), DSub);
4803 } else {
4804 SDValue SubReg = CurDAG->getTargetConstant(
4805 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4806 MVT::i32);
4807 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4808 SDValue(XAR, 0), SubReg);
4809 }
4810 }
4811 ReplaceNode(N, XAR);
4812 return true;
4813}
4814
4815void AArch64DAGToDAGISel::Select(SDNode *Node) {
4816 // If we have a custom node, we already have selected!
4817 if (Node->isMachineOpcode()) {
4818 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4819 Node->setNodeId(-1);
4820 return;
4821 }
4822
4823 // Few custom selection stuff.
4824 EVT VT = Node->getValueType(0);
4825
4826 switch (Node->getOpcode()) {
4827 default:
4828 break;
4829
4830 case ISD::ATOMIC_CMP_SWAP:
4831 if (SelectCMP_SWAP(Node))
4832 return;
4833 break;
4834
4835 case ISD::READ_REGISTER:
4836 case AArch64ISD::MRRS:
4837 if (tryReadRegister(Node))
4838 return;
4839 break;
4840
4842 case AArch64ISD::MSRR:
4843 if (tryWriteRegister(Node))
4844 return;
4845 break;
4846
4847 case ISD::LOAD: {
4848 // Try to select as an indexed load. Fall through to normal processing
4849 // if we can't.
4850 if (tryIndexedLoad(Node))
4851 return;
4852 break;
4853 }
4854
4855 case ISD::SRL:
4856 case ISD::AND:
4857 case ISD::SRA:
4859 if (tryBitfieldExtractOp(Node))
4860 return;
4861 if (tryBitfieldInsertInZeroOp(Node))
4862 return;
4863 [[fallthrough]];
4864 case ISD::ROTR:
4865 case ISD::SHL:
4866 if (tryShiftAmountMod(Node))
4867 return;
4868 break;
4869
4870 case ISD::SIGN_EXTEND:
4871 if (tryBitfieldExtractOpFromSExt(Node))
4872 return;
4873 break;
4874
4875 case ISD::OR:
4876 if (tryBitfieldInsertOp(Node))
4877 return;
4878 if (trySelectXAR(Node))
4879 return;
4880 break;
4881
4883 if (trySelectCastScalableToFixedLengthVector(Node))
4884 return;
4885 break;
4886 }
4887
4888 case ISD::INSERT_SUBVECTOR: {
4889 if (trySelectCastFixedLengthToScalableVector(Node))
4890 return;
4891 break;
4892 }
4893
4894 case ISD::Constant: {
4895 // Materialize zero constants as copies from WZR/XZR. This allows
4896 // the coalescer to propagate these into other instructions.
4897 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4898 if (ConstNode->isZero()) {
4899 if (VT == MVT::i32) {
4900 SDValue New = CurDAG->getCopyFromReg(
4901 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4902 ReplaceNode(Node, New.getNode());
4903 return;
4904 } else if (VT == MVT::i64) {
4905 SDValue New = CurDAG->getCopyFromReg(
4906 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4907 ReplaceNode(Node, New.getNode());
4908 return;
4909 }
4910 }
4911 break;
4912 }
4913
4914 case ISD::FrameIndex: {
4915 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4916 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4917 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4918 const TargetLowering *TLI = getTargetLowering();
4919 SDValue TFI = CurDAG->getTargetFrameIndex(
4920 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4921 SDLoc DL(Node);
4922 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4923 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4924 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4925 return;
4926 }
4928 unsigned IntNo = Node->getConstantOperandVal(1);
4929 switch (IntNo) {
4930 default:
4931 break;
4932 case Intrinsic::aarch64_gcsss: {
4933 SDLoc DL(Node);
4934 SDValue Chain = Node->getOperand(0);
4935 SDValue Val = Node->getOperand(2);
4936 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4937 SDNode *SS1 =
4938 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4939 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4940 MVT::Other, Zero, SDValue(SS1, 0));
4941 ReplaceNode(Node, SS2);
4942 return;
4943 }
4944 case Intrinsic::aarch64_ldaxp:
4945 case Intrinsic::aarch64_ldxp: {
4946 unsigned Op =
4947 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4948 SDValue MemAddr = Node->getOperand(2);
4949 SDLoc DL(Node);
4950 SDValue Chain = Node->getOperand(0);
4951
4952 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4953 MVT::Other, MemAddr, Chain);
4954
4955 // Transfer memoperands.
4956 MachineMemOperand *MemOp =
4957 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4958 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4959 ReplaceNode(Node, Ld);
4960 return;
4961 }
4962 case Intrinsic::aarch64_stlxp:
4963 case Intrinsic::aarch64_stxp: {
4964 unsigned Op =
4965 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4966 SDLoc DL(Node);
4967 SDValue Chain = Node->getOperand(0);
4968 SDValue ValLo = Node->getOperand(2);
4969 SDValue ValHi = Node->getOperand(3);
4970 SDValue MemAddr = Node->getOperand(4);
4971
4972 // Place arguments in the right order.
4973 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4974
4975 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4976 // Transfer memoperands.
4977 MachineMemOperand *MemOp =
4978 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4979 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4980
4981 ReplaceNode(Node, St);
4982 return;
4983 }
4984 case Intrinsic::aarch64_neon_ld1x2:
4985 if (VT == MVT::v8i8) {
4986 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4987 return;
4988 } else if (VT == MVT::v16i8) {
4989 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4990 return;
4991 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4992 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4993 return;
4994 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4995 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4996 return;
4997 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4998 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4999 return;
5000 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5001 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
5002 return;
5003 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5004 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5005 return;
5006 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5007 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
5008 return;
5009 }
5010 break;
5011 case Intrinsic::aarch64_neon_ld1x3:
5012 if (VT == MVT::v8i8) {
5013 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5014 return;
5015 } else if (VT == MVT::v16i8) {
5016 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5017 return;
5018 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5019 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5020 return;
5021 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5022 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5023 return;
5024 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5025 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5026 return;
5027 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5028 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5029 return;
5030 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5031 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5032 return;
5033 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5034 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5035 return;
5036 }
5037 break;
5038 case Intrinsic::aarch64_neon_ld1x4:
5039 if (VT == MVT::v8i8) {
5040 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5041 return;
5042 } else if (VT == MVT::v16i8) {
5043 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5044 return;
5045 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5046 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5047 return;
5048 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5049 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5050 return;
5051 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5052 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5053 return;
5054 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5055 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5056 return;
5057 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5058 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5059 return;
5060 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5061 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5062 return;
5063 }
5064 break;
5065 case Intrinsic::aarch64_neon_ld2:
5066 if (VT == MVT::v8i8) {
5067 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5068 return;
5069 } else if (VT == MVT::v16i8) {
5070 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5071 return;
5072 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5073 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5074 return;
5075 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5076 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5077 return;
5078 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5079 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5080 return;
5081 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5082 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5083 return;
5084 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5085 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5086 return;
5087 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5088 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5089 return;
5090 }
5091 break;
5092 case Intrinsic::aarch64_neon_ld3:
5093 if (VT == MVT::v8i8) {
5094 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5095 return;
5096 } else if (VT == MVT::v16i8) {
5097 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5098 return;
5099 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5100 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5101 return;
5102 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5103 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5104 return;
5105 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5106 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5107 return;
5108 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5109 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5110 return;
5111 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5112 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5113 return;
5114 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5115 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5116 return;
5117 }
5118 break;
5119 case Intrinsic::aarch64_neon_ld4:
5120 if (VT == MVT::v8i8) {
5121 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5122 return;
5123 } else if (VT == MVT::v16i8) {
5124 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5125 return;
5126 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5127 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5128 return;
5129 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5130 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5131 return;
5132 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5133 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5134 return;
5135 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5136 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5137 return;
5138 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5139 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5140 return;
5141 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5142 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5143 return;
5144 }
5145 break;
5146 case Intrinsic::aarch64_neon_ld2r:
5147 if (VT == MVT::v8i8) {
5148 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5149 return;
5150 } else if (VT == MVT::v16i8) {
5151 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5152 return;
5153 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5154 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5155 return;
5156 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5157 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5158 return;
5159 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5160 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5161 return;
5162 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5163 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5164 return;
5165 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5166 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5167 return;
5168 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5169 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5170 return;
5171 }
5172 break;
5173 case Intrinsic::aarch64_neon_ld3r:
5174 if (VT == MVT::v8i8) {
5175 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5176 return;
5177 } else if (VT == MVT::v16i8) {
5178 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5179 return;
5180 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5181 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5182 return;
5183 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5184 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5185 return;
5186 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5187 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5188 return;
5189 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5190 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5191 return;
5192 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5193 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5194 return;
5195 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5196 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5197 return;
5198 }
5199 break;
5200 case Intrinsic::aarch64_neon_ld4r:
5201 if (VT == MVT::v8i8) {
5202 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5203 return;
5204 } else if (VT == MVT::v16i8) {
5205 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5206 return;
5207 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5208 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5209 return;
5210 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5211 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5212 return;
5213 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5214 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5215 return;
5216 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5217 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5218 return;
5219 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5220 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5221 return;
5222 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5223 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5224 return;
5225 }
5226 break;
5227 case Intrinsic::aarch64_neon_ld2lane:
5228 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5229 SelectLoadLane(Node, 2, AArch64::LD2i8);
5230 return;
5231 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5232 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5233 SelectLoadLane(Node, 2, AArch64::LD2i16);
5234 return;
5235 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5236 VT == MVT::v2f32) {
5237 SelectLoadLane(Node, 2, AArch64::LD2i32);
5238 return;
5239 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5240 VT == MVT::v1f64) {
5241 SelectLoadLane(Node, 2, AArch64::LD2i64);
5242 return;
5243 }
5244 break;
5245 case Intrinsic::aarch64_neon_ld3lane:
5246 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5247 SelectLoadLane(Node, 3, AArch64::LD3i8);
5248 return;
5249 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5250 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5251 SelectLoadLane(Node, 3, AArch64::LD3i16);
5252 return;
5253 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5254 VT == MVT::v2f32) {
5255 SelectLoadLane(Node, 3, AArch64::LD3i32);
5256 return;
5257 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5258 VT == MVT::v1f64) {
5259 SelectLoadLane(Node, 3, AArch64::LD3i64);
5260 return;
5261 }
5262 break;
5263 case Intrinsic::aarch64_neon_ld4lane:
5264 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5265 SelectLoadLane(Node, 4, AArch64::LD4i8);
5266 return;
5267 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5268 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5269 SelectLoadLane(Node, 4, AArch64::LD4i16);
5270 return;
5271 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5272 VT == MVT::v2f32) {
5273 SelectLoadLane(Node, 4, AArch64::LD4i32);
5274 return;
5275 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5276 VT == MVT::v1f64) {
5277 SelectLoadLane(Node, 4, AArch64::LD4i64);
5278 return;
5279 }
5280 break;
5281 case Intrinsic::aarch64_ld64b:
5282 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5283 return;
5284 case Intrinsic::aarch64_sve_ld2q_sret: {
5285 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5286 return;
5287 }
5288 case Intrinsic::aarch64_sve_ld3q_sret: {
5289 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5290 return;
5291 }
5292 case Intrinsic::aarch64_sve_ld4q_sret: {
5293 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5294 return;
5295 }
5296 case Intrinsic::aarch64_sve_ld2_sret: {
5297 if (VT == MVT::nxv16i8) {
5298 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5299 true);
5300 return;
5301 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5302 VT == MVT::nxv8bf16) {
5303 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5304 true);
5305 return;
5306 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5307 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5308 true);
5309 return;
5310 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5311 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5312 true);
5313 return;
5314 }
5315 break;
5316 }
5317 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5318 if (VT == MVT::nxv16i8) {
5319 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5320 SelectContiguousMultiVectorLoad(
5321 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5322 else if (Subtarget->hasSVE2p1())
5323 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5324 AArch64::LD1B_2Z);
5325 else
5326 break;
5327 return;
5328 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5329 VT == MVT::nxv8bf16) {
5330 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5331 SelectContiguousMultiVectorLoad(
5332 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5333 else if (Subtarget->hasSVE2p1())
5334 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5335 AArch64::LD1H_2Z);
5336 else
5337 break;
5338 return;
5339 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5340 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5341 SelectContiguousMultiVectorLoad(
5342 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5343 else if (Subtarget->hasSVE2p1())
5344 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5345 AArch64::LD1W_2Z);
5346 else
5347 break;
5348 return;
5349 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5350 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5351 SelectContiguousMultiVectorLoad(
5352 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5353 else if (Subtarget->hasSVE2p1())
5354 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5355 AArch64::LD1D_2Z);
5356 else
5357 break;
5358 return;
5359 }
5360 break;
5361 }
5362 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5363 if (VT == MVT::nxv16i8) {
5364 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5365 SelectContiguousMultiVectorLoad(
5366 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5367 else if (Subtarget->hasSVE2p1())
5368 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5369 AArch64::LD1B_4Z);
5370 else
5371 break;
5372 return;
5373 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5374 VT == MVT::nxv8bf16) {
5375 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5376 SelectContiguousMultiVectorLoad(
5377 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5378 else if (Subtarget->hasSVE2p1())
5379 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5380 AArch64::LD1H_4Z);
5381 else
5382 break;
5383 return;
5384 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5385 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5386 SelectContiguousMultiVectorLoad(
5387 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5388 else if (Subtarget->hasSVE2p1())
5389 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5390 AArch64::LD1W_4Z);
5391 else
5392 break;
5393 return;
5394 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5395 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5396 SelectContiguousMultiVectorLoad(
5397 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5398 else if (Subtarget->hasSVE2p1())
5399 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5400 AArch64::LD1D_4Z);
5401 else
5402 break;
5403 return;
5404 }
5405 break;
5406 }
5407 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5408 if (VT == MVT::nxv16i8) {
5409 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5410 SelectContiguousMultiVectorLoad(Node, 2, 0,
5411 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5412 AArch64::LDNT1B_2Z_PSEUDO);
5413 else if (Subtarget->hasSVE2p1())
5414 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5415 AArch64::LDNT1B_2Z);
5416 else
5417 break;
5418 return;
5419 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5420 VT == MVT::nxv8bf16) {
5421 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5422 SelectContiguousMultiVectorLoad(Node, 2, 1,
5423 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5424 AArch64::LDNT1H_2Z_PSEUDO);
5425 else if (Subtarget->hasSVE2p1())
5426 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5427 AArch64::LDNT1H_2Z);
5428 else
5429 break;
5430 return;
5431 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5432 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5433 SelectContiguousMultiVectorLoad(Node, 2, 2,
5434 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5435 AArch64::LDNT1W_2Z_PSEUDO);
5436 else if (Subtarget->hasSVE2p1())
5437 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5438 AArch64::LDNT1W_2Z);
5439 else
5440 break;
5441 return;
5442 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5443 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5444 SelectContiguousMultiVectorLoad(Node, 2, 3,
5445 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5446 AArch64::LDNT1D_2Z_PSEUDO);
5447 else if (Subtarget->hasSVE2p1())
5448 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5449 AArch64::LDNT1D_2Z);
5450 else
5451 break;
5452 return;
5453 }
5454 break;
5455 }
5456 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5457 if (VT == MVT::nxv16i8) {
5458 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5459 SelectContiguousMultiVectorLoad(Node, 4, 0,
5460 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5461 AArch64::LDNT1B_4Z_PSEUDO);
5462 else if (Subtarget->hasSVE2p1())
5463 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5464 AArch64::LDNT1B_4Z);
5465 else
5466 break;
5467 return;
5468 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5469 VT == MVT::nxv8bf16) {
5470 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5471 SelectContiguousMultiVectorLoad(Node, 4, 1,
5472 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5473 AArch64::LDNT1H_4Z_PSEUDO);
5474 else if (Subtarget->hasSVE2p1())
5475 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5476 AArch64::LDNT1H_4Z);
5477 else
5478 break;
5479 return;
5480 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5481 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5482 SelectContiguousMultiVectorLoad(Node, 4, 2,
5483 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5484 AArch64::LDNT1W_4Z_PSEUDO);
5485 else if (Subtarget->hasSVE2p1())
5486 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5487 AArch64::LDNT1W_4Z);
5488 else
5489 break;
5490 return;
5491 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5492 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5493 SelectContiguousMultiVectorLoad(Node, 4, 3,
5494 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5495 AArch64::LDNT1D_4Z_PSEUDO);
5496 else if (Subtarget->hasSVE2p1())
5497 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5498 AArch64::LDNT1D_4Z);
5499 else
5500 break;
5501 return;
5502 }
5503 break;
5504 }
5505 case Intrinsic::aarch64_sve_ld3_sret: {
5506 if (VT == MVT::nxv16i8) {
5507 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5508 true);
5509 return;
5510 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5511 VT == MVT::nxv8bf16) {
5512 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5513 true);
5514 return;
5515 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5516 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5517 true);
5518 return;
5519 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5520 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5521 true);
5522 return;
5523 }
5524 break;
5525 }
5526 case Intrinsic::aarch64_sve_ld4_sret: {
5527 if (VT == MVT::nxv16i8) {
5528 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5529 true);
5530 return;
5531 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5532 VT == MVT::nxv8bf16) {
5533 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5534 true);
5535 return;
5536 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5537 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5538 true);
5539 return;
5540 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5541 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5542 true);
5543 return;
5544 }
5545 break;
5546 }
5547 case Intrinsic::aarch64_sme_read_hor_vg2: {
5548 if (VT == MVT::nxv16i8) {
5549 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5550 AArch64::MOVA_2ZMXI_H_B);
5551 return;
5552 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5553 VT == MVT::nxv8bf16) {
5554 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5555 AArch64::MOVA_2ZMXI_H_H);
5556 return;
5557 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5558 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5559 AArch64::MOVA_2ZMXI_H_S);
5560 return;
5561 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5562 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5563 AArch64::MOVA_2ZMXI_H_D);
5564 return;
5565 }
5566 break;
5567 }
5568 case Intrinsic::aarch64_sme_read_ver_vg2: {
5569 if (VT == MVT::nxv16i8) {
5570 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5571 AArch64::MOVA_2ZMXI_V_B);
5572 return;
5573 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5574 VT == MVT::nxv8bf16) {
5575 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5576 AArch64::MOVA_2ZMXI_V_H);
5577 return;
5578 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5579 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5580 AArch64::MOVA_2ZMXI_V_S);
5581 return;
5582 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5583 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5584 AArch64::MOVA_2ZMXI_V_D);
5585 return;
5586 }
5587 break;
5588 }
5589 case Intrinsic::aarch64_sme_read_hor_vg4: {
5590 if (VT == MVT::nxv16i8) {
5591 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5592 AArch64::MOVA_4ZMXI_H_B);
5593 return;
5594 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5595 VT == MVT::nxv8bf16) {
5596 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5597 AArch64::MOVA_4ZMXI_H_H);
5598 return;
5599 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5600 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5601 AArch64::MOVA_4ZMXI_H_S);
5602 return;
5603 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5604 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5605 AArch64::MOVA_4ZMXI_H_D);
5606 return;
5607 }
5608 break;
5609 }
5610 case Intrinsic::aarch64_sme_read_ver_vg4: {
5611 if (VT == MVT::nxv16i8) {
5612 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5613 AArch64::MOVA_4ZMXI_V_B);
5614 return;
5615 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5616 VT == MVT::nxv8bf16) {
5617 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5618 AArch64::MOVA_4ZMXI_V_H);
5619 return;
5620 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5621 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5622 AArch64::MOVA_4ZMXI_V_S);
5623 return;
5624 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5625 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5626 AArch64::MOVA_4ZMXI_V_D);
5627 return;
5628 }
5629 break;
5630 }
5631 case Intrinsic::aarch64_sme_read_vg1x2: {
5632 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5633 AArch64::MOVA_VG2_2ZMXI);
5634 return;
5635 }
5636 case Intrinsic::aarch64_sme_read_vg1x4: {
5637 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5638 AArch64::MOVA_VG4_4ZMXI);
5639 return;
5640 }
5641 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5642 if (VT == MVT::nxv16i8) {
5643 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5644 return;
5645 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5646 VT == MVT::nxv8bf16) {
5647 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5648 return;
5649 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5650 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5651 return;
5652 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5653 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5654 return;
5655 }
5656 break;
5657 }
5658 case Intrinsic::aarch64_sme_readz_vert_x2: {
5659 if (VT == MVT::nxv16i8) {
5660 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5661 return;
5662 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5663 VT == MVT::nxv8bf16) {
5664 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5665 return;
5666 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5667 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5668 return;
5669 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5670 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5671 return;
5672 }
5673 break;
5674 }
5675 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5676 if (VT == MVT::nxv16i8) {
5677 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5678 return;
5679 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5680 VT == MVT::nxv8bf16) {
5681 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5682 return;
5683 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5684 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5685 return;
5686 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5687 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5688 return;
5689 }
5690 break;
5691 }
5692 case Intrinsic::aarch64_sme_readz_vert_x4: {
5693 if (VT == MVT::nxv16i8) {
5694 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5695 return;
5696 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5697 VT == MVT::nxv8bf16) {
5698 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5699 return;
5700 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5701 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5702 return;
5703 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5704 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5705 return;
5706 }
5707 break;
5708 }
5709 case Intrinsic::aarch64_sme_readz_x2: {
5710 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5711 AArch64::ZA);
5712 return;
5713 }
5714 case Intrinsic::aarch64_sme_readz_x4: {
5715 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5716 AArch64::ZA);
5717 return;
5718 }
5719 case Intrinsic::swift_async_context_addr: {
5720 SDLoc DL(Node);
5721 SDValue Chain = Node->getOperand(0);
5722 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5723 SDValue Res = SDValue(
5724 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5725 CurDAG->getTargetConstant(8, DL, MVT::i32),
5726 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5727 0);
5728 ReplaceUses(SDValue(Node, 0), Res);
5729 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5730 CurDAG->RemoveDeadNode(Node);
5731
5732 auto &MF = CurDAG->getMachineFunction();
5733 MF.getFrameInfo().setFrameAddressIsTaken(true);
5734 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5735 return;
5736 }
5737 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5739 Node->getValueType(0),
5740 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5741 AArch64::LUTI2_4ZTZI_S}))
5742 // Second Immediate must be <= 3:
5743 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5744 return;
5745 }
5746 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5748 Node->getValueType(0),
5749 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5750 // Second Immediate must be <= 1:
5751 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5752 return;
5753 }
5754 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5756 Node->getValueType(0),
5757 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5758 AArch64::LUTI2_2ZTZI_S}))
5759 // Second Immediate must be <= 7:
5760 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5761 return;
5762 }
5763 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5765 Node->getValueType(0),
5766 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5767 AArch64::LUTI4_2ZTZI_S}))
5768 // Second Immediate must be <= 3:
5769 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5770 return;
5771 }
5772 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5773 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5774 return;
5775 }
5776 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5778 Node->getValueType(0),
5779 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5780 SelectCVTIntrinsicFP8(Node, 2, Opc);
5781 return;
5782 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5784 Node->getValueType(0),
5785 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5786 SelectCVTIntrinsicFP8(Node, 2, Opc);
5787 return;
5788 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5790 Node->getValueType(0),
5791 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5792 SelectCVTIntrinsicFP8(Node, 2, Opc);
5793 return;
5794 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5796 Node->getValueType(0),
5797 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5798 SelectCVTIntrinsicFP8(Node, 2, Opc);
5799 return;
5800 }
5801 } break;
5803 unsigned IntNo = Node->getConstantOperandVal(0);
5804 switch (IntNo) {
5805 default:
5806 break;
5807 case Intrinsic::aarch64_tagp:
5808 SelectTagP(Node);
5809 return;
5810
5811 case Intrinsic::ptrauth_auth:
5812 SelectPtrauthAuth(Node);
5813 return;
5814
5815 case Intrinsic::ptrauth_resign:
5816 SelectPtrauthResign(Node);
5817 return;
5818
5819 case Intrinsic::aarch64_neon_tbl2:
5820 SelectTable(Node, 2,
5821 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5822 false);
5823 return;
5824 case Intrinsic::aarch64_neon_tbl3:
5825 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5826 : AArch64::TBLv16i8Three,
5827 false);
5828 return;
5829 case Intrinsic::aarch64_neon_tbl4:
5830 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5831 : AArch64::TBLv16i8Four,
5832 false);
5833 return;
5834 case Intrinsic::aarch64_neon_tbx2:
5835 SelectTable(Node, 2,
5836 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5837 true);
5838 return;
5839 case Intrinsic::aarch64_neon_tbx3:
5840 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5841 : AArch64::TBXv16i8Three,
5842 true);
5843 return;
5844 case Intrinsic::aarch64_neon_tbx4:
5845 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5846 : AArch64::TBXv16i8Four,
5847 true);
5848 return;
5849 case Intrinsic::aarch64_sve_srshl_single_x2:
5851 Node->getValueType(0),
5852 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5853 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5854 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5855 return;
5856 case Intrinsic::aarch64_sve_srshl_single_x4:
5858 Node->getValueType(0),
5859 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5860 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5861 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5862 return;
5863 case Intrinsic::aarch64_sve_urshl_single_x2:
5865 Node->getValueType(0),
5866 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5867 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5868 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5869 return;
5870 case Intrinsic::aarch64_sve_urshl_single_x4:
5872 Node->getValueType(0),
5873 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5874 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5875 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5876 return;
5877 case Intrinsic::aarch64_sve_srshl_x2:
5879 Node->getValueType(0),
5880 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5881 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5882 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5883 return;
5884 case Intrinsic::aarch64_sve_srshl_x4:
5886 Node->getValueType(0),
5887 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5888 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5889 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5890 return;
5891 case Intrinsic::aarch64_sve_urshl_x2:
5893 Node->getValueType(0),
5894 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5895 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5896 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5897 return;
5898 case Intrinsic::aarch64_sve_urshl_x4:
5900 Node->getValueType(0),
5901 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5902 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5903 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5904 return;
5905 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5907 Node->getValueType(0),
5908 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5909 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5910 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5911 return;
5912 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5914 Node->getValueType(0),
5915 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5916 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5917 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5918 return;
5919 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5921 Node->getValueType(0),
5922 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5923 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5924 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5925 return;
5926 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5928 Node->getValueType(0),
5929 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5930 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5931 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5932 return;
5933 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5935 Node->getValueType(0),
5936 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5937 AArch64::FSCALE_2ZZ_D}))
5938 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5939 return;
5940 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5942 Node->getValueType(0),
5943 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5944 AArch64::FSCALE_4ZZ_D}))
5945 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5946 return;
5947 case Intrinsic::aarch64_sme_fp8_scale_x2:
5949 Node->getValueType(0),
5950 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5951 AArch64::FSCALE_2Z2Z_D}))
5952 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5953 return;
5954 case Intrinsic::aarch64_sme_fp8_scale_x4:
5956 Node->getValueType(0),
5957 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5958 AArch64::FSCALE_4Z4Z_D}))
5959 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5960 return;
5961 case Intrinsic::aarch64_sve_whilege_x2:
5963 Node->getValueType(0),
5964 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5965 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5966 SelectWhilePair(Node, Op);
5967 return;
5968 case Intrinsic::aarch64_sve_whilegt_x2:
5970 Node->getValueType(0),
5971 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5972 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5973 SelectWhilePair(Node, Op);
5974 return;
5975 case Intrinsic::aarch64_sve_whilehi_x2:
5977 Node->getValueType(0),
5978 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5979 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5980 SelectWhilePair(Node, Op);
5981 return;
5982 case Intrinsic::aarch64_sve_whilehs_x2:
5984 Node->getValueType(0),
5985 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5986 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5987 SelectWhilePair(Node, Op);
5988 return;
5989 case Intrinsic::aarch64_sve_whilele_x2:
5991 Node->getValueType(0),
5992 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5993 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5994 SelectWhilePair(Node, Op);
5995 return;
5996 case Intrinsic::aarch64_sve_whilelo_x2:
5998 Node->getValueType(0),
5999 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6000 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6001 SelectWhilePair(Node, Op);
6002 return;
6003 case Intrinsic::aarch64_sve_whilels_x2:
6005 Node->getValueType(0),
6006 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6007 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6008 SelectWhilePair(Node, Op);
6009 return;
6010 case Intrinsic::aarch64_sve_whilelt_x2:
6012 Node->getValueType(0),
6013 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6014 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6015 SelectWhilePair(Node, Op);
6016 return;
6017 case Intrinsic::aarch64_sve_smax_single_x2:
6019 Node->getValueType(0),
6020 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6021 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6022 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6023 return;
6024 case Intrinsic::aarch64_sve_umax_single_x2:
6026 Node->getValueType(0),
6027 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6028 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6029 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6030 return;
6031 case Intrinsic::aarch64_sve_fmax_single_x2:
6033 Node->getValueType(0),
6034 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6035 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6036 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6037 return;
6038 case Intrinsic::aarch64_sve_smax_single_x4:
6040 Node->getValueType(0),
6041 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6042 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6043 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6044 return;
6045 case Intrinsic::aarch64_sve_umax_single_x4:
6047 Node->getValueType(0),
6048 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6049 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6050 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6051 return;
6052 case Intrinsic::aarch64_sve_fmax_single_x4:
6054 Node->getValueType(0),
6055 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6056 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6057 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6058 return;
6059 case Intrinsic::aarch64_sve_smin_single_x2:
6061 Node->getValueType(0),
6062 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6063 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6064 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6065 return;
6066 case Intrinsic::aarch64_sve_umin_single_x2:
6068 Node->getValueType(0),
6069 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6070 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6071 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6072 return;
6073 case Intrinsic::aarch64_sve_fmin_single_x2:
6075 Node->getValueType(0),
6076 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6077 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6078 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6079 return;
6080 case Intrinsic::aarch64_sve_smin_single_x4:
6082 Node->getValueType(0),
6083 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6084 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6085 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6086 return;
6087 case Intrinsic::aarch64_sve_umin_single_x4:
6089 Node->getValueType(0),
6090 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6091 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6092 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6093 return;
6094 case Intrinsic::aarch64_sve_fmin_single_x4:
6096 Node->getValueType(0),
6097 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6098 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6099 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6100 return;
6101 case Intrinsic::aarch64_sve_smax_x2:
6103 Node->getValueType(0),
6104 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6105 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6106 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6107 return;
6108 case Intrinsic::aarch64_sve_umax_x2:
6110 Node->getValueType(0),
6111 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6112 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6113 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6114 return;
6115 case Intrinsic::aarch64_sve_fmax_x2:
6117 Node->getValueType(0),
6118 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6119 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6120 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6121 return;
6122 case Intrinsic::aarch64_sve_smax_x4:
6124 Node->getValueType(0),
6125 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6126 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6127 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6128 return;
6129 case Intrinsic::aarch64_sve_umax_x4:
6131 Node->getValueType(0),
6132 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6133 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6134 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6135 return;
6136 case Intrinsic::aarch64_sve_fmax_x4:
6138 Node->getValueType(0),
6139 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6140 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6141 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6142 return;
6143 case Intrinsic::aarch64_sme_famax_x2:
6145 Node->getValueType(0),
6146 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6147 AArch64::FAMAX_2Z2Z_D}))
6148 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6149 return;
6150 case Intrinsic::aarch64_sme_famax_x4:
6152 Node->getValueType(0),
6153 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6154 AArch64::FAMAX_4Z4Z_D}))
6155 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6156 return;
6157 case Intrinsic::aarch64_sme_famin_x2:
6159 Node->getValueType(0),
6160 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6161 AArch64::FAMIN_2Z2Z_D}))
6162 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6163 return;
6164 case Intrinsic::aarch64_sme_famin_x4:
6166 Node->getValueType(0),
6167 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6168 AArch64::FAMIN_4Z4Z_D}))
6169 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6170 return;
6171 case Intrinsic::aarch64_sve_smin_x2:
6173 Node->getValueType(0),
6174 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6175 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6176 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6177 return;
6178 case Intrinsic::aarch64_sve_umin_x2:
6180 Node->getValueType(0),
6181 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6182 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6183 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6184 return;
6185 case Intrinsic::aarch64_sve_fmin_x2:
6187 Node->getValueType(0),
6188 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6189 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6190 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6191 return;
6192 case Intrinsic::aarch64_sve_smin_x4:
6194 Node->getValueType(0),
6195 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6196 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6197 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6198 return;
6199 case Intrinsic::aarch64_sve_umin_x4:
6201 Node->getValueType(0),
6202 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6203 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6204 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6205 return;
6206 case Intrinsic::aarch64_sve_fmin_x4:
6208 Node->getValueType(0),
6209 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6210 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6211 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6212 return;
6213 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6215 Node->getValueType(0),
6216 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6217 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6218 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6219 return;
6220 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6222 Node->getValueType(0),
6223 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6224 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6225 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6226 return;
6227 case Intrinsic::aarch64_sve_fminnm_single_x2:
6229 Node->getValueType(0),
6230 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6231 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6232 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6233 return;
6234 case Intrinsic::aarch64_sve_fminnm_single_x4:
6236 Node->getValueType(0),
6237 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6238 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6239 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6240 return;
6241 case Intrinsic::aarch64_sve_fmaxnm_x2:
6243 Node->getValueType(0),
6244 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6245 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6246 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6247 return;
6248 case Intrinsic::aarch64_sve_fmaxnm_x4:
6250 Node->getValueType(0),
6251 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6252 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6253 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6254 return;
6255 case Intrinsic::aarch64_sve_fminnm_x2:
6257 Node->getValueType(0),
6258 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6259 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6260 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6261 return;
6262 case Intrinsic::aarch64_sve_fminnm_x4:
6264 Node->getValueType(0),
6265 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6266 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6267 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6268 return;
6269 case Intrinsic::aarch64_sve_fcvtzs_x2:
6270 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6271 return;
6272 case Intrinsic::aarch64_sve_scvtf_x2:
6273 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6274 return;
6275 case Intrinsic::aarch64_sve_fcvtzu_x2:
6276 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6277 return;
6278 case Intrinsic::aarch64_sve_ucvtf_x2:
6279 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6280 return;
6281 case Intrinsic::aarch64_sve_fcvtzs_x4:
6282 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6283 return;
6284 case Intrinsic::aarch64_sve_scvtf_x4:
6285 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6286 return;
6287 case Intrinsic::aarch64_sve_fcvtzu_x4:
6288 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6289 return;
6290 case Intrinsic::aarch64_sve_ucvtf_x4:
6291 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6292 return;
6293 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6294 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6295 return;
6296 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6297 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6298 return;
6299 case Intrinsic::aarch64_sve_sclamp_single_x2:
6301 Node->getValueType(0),
6302 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6303 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6304 SelectClamp(Node, 2, Op);
6305 return;
6306 case Intrinsic::aarch64_sve_uclamp_single_x2:
6308 Node->getValueType(0),
6309 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6310 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6311 SelectClamp(Node, 2, Op);
6312 return;
6313 case Intrinsic::aarch64_sve_fclamp_single_x2:
6315 Node->getValueType(0),
6316 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6317 AArch64::FCLAMP_VG2_2Z2Z_D}))
6318 SelectClamp(Node, 2, Op);
6319 return;
6320 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6321 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6322 return;
6323 case Intrinsic::aarch64_sve_sclamp_single_x4:
6325 Node->getValueType(0),
6326 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6327 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6328 SelectClamp(Node, 4, Op);
6329 return;
6330 case Intrinsic::aarch64_sve_uclamp_single_x4:
6332 Node->getValueType(0),
6333 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6334 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6335 SelectClamp(Node, 4, Op);
6336 return;
6337 case Intrinsic::aarch64_sve_fclamp_single_x4:
6339 Node->getValueType(0),
6340 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6341 AArch64::FCLAMP_VG4_4Z4Z_D}))
6342 SelectClamp(Node, 4, Op);
6343 return;
6344 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6345 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6346 return;
6347 case Intrinsic::aarch64_sve_add_single_x2:
6349 Node->getValueType(0),
6350 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6351 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6352 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6353 return;
6354 case Intrinsic::aarch64_sve_add_single_x4:
6356 Node->getValueType(0),
6357 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6358 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6359 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6360 return;
6361 case Intrinsic::aarch64_sve_zip_x2:
6363 Node->getValueType(0),
6364 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6365 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6366 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6367 return;
6368 case Intrinsic::aarch64_sve_zipq_x2:
6369 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6370 AArch64::ZIP_VG2_2ZZZ_Q);
6371 return;
6372 case Intrinsic::aarch64_sve_zip_x4:
6374 Node->getValueType(0),
6375 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6376 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6377 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6378 return;
6379 case Intrinsic::aarch64_sve_zipq_x4:
6380 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6381 AArch64::ZIP_VG4_4Z4Z_Q);
6382 return;
6383 case Intrinsic::aarch64_sve_uzp_x2:
6385 Node->getValueType(0),
6386 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6387 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6388 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6389 return;
6390 case Intrinsic::aarch64_sve_uzpq_x2:
6391 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6392 AArch64::UZP_VG2_2ZZZ_Q);
6393 return;
6394 case Intrinsic::aarch64_sve_uzp_x4:
6396 Node->getValueType(0),
6397 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6398 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6399 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6400 return;
6401 case Intrinsic::aarch64_sve_uzpq_x4:
6402 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6403 AArch64::UZP_VG4_4Z4Z_Q);
6404 return;
6405 case Intrinsic::aarch64_sve_sel_x2:
6407 Node->getValueType(0),
6408 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6409 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6410 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6411 return;
6412 case Intrinsic::aarch64_sve_sel_x4:
6414 Node->getValueType(0),
6415 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6416 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6417 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6418 return;
6419 case Intrinsic::aarch64_sve_frinta_x2:
6420 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6421 return;
6422 case Intrinsic::aarch64_sve_frinta_x4:
6423 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6424 return;
6425 case Intrinsic::aarch64_sve_frintm_x2:
6426 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6427 return;
6428 case Intrinsic::aarch64_sve_frintm_x4:
6429 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6430 return;
6431 case Intrinsic::aarch64_sve_frintn_x2:
6432 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6433 return;
6434 case Intrinsic::aarch64_sve_frintn_x4:
6435 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6436 return;
6437 case Intrinsic::aarch64_sve_frintp_x2:
6438 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6439 return;
6440 case Intrinsic::aarch64_sve_frintp_x4:
6441 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6442 return;
6443 case Intrinsic::aarch64_sve_sunpk_x2:
6445 Node->getValueType(0),
6446 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6447 AArch64::SUNPK_VG2_2ZZ_D}))
6448 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6449 return;
6450 case Intrinsic::aarch64_sve_uunpk_x2:
6452 Node->getValueType(0),
6453 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6454 AArch64::UUNPK_VG2_2ZZ_D}))
6455 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6456 return;
6457 case Intrinsic::aarch64_sve_sunpk_x4:
6459 Node->getValueType(0),
6460 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6461 AArch64::SUNPK_VG4_4Z2Z_D}))
6462 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6463 return;
6464 case Intrinsic::aarch64_sve_uunpk_x4:
6466 Node->getValueType(0),
6467 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6468 AArch64::UUNPK_VG4_4Z2Z_D}))
6469 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6470 return;
6471 case Intrinsic::aarch64_sve_pext_x2: {
6473 Node->getValueType(0),
6474 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6475 AArch64::PEXT_2PCI_D}))
6476 SelectPExtPair(Node, Op);
6477 return;
6478 }
6479 }
6480 break;
6481 }
6482 case ISD::INTRINSIC_VOID: {
6483 unsigned IntNo = Node->getConstantOperandVal(1);
6484 if (Node->getNumOperands() >= 3)
6485 VT = Node->getOperand(2)->getValueType(0);
6486 switch (IntNo) {
6487 default:
6488 break;
6489 case Intrinsic::aarch64_neon_st1x2: {
6490 if (VT == MVT::v8i8) {
6491 SelectStore(Node, 2, AArch64::ST1Twov8b);
6492 return;
6493 } else if (VT == MVT::v16i8) {
6494 SelectStore(Node, 2, AArch64::ST1Twov16b);
6495 return;
6496 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6497 VT == MVT::v4bf16) {
6498 SelectStore(Node, 2, AArch64::ST1Twov4h);
6499 return;
6500 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6501 VT == MVT::v8bf16) {
6502 SelectStore(Node, 2, AArch64::ST1Twov8h);
6503 return;
6504 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6505 SelectStore(Node, 2, AArch64::ST1Twov2s);
6506 return;
6507 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6508 SelectStore(Node, 2, AArch64::ST1Twov4s);
6509 return;
6510 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6511 SelectStore(Node, 2, AArch64::ST1Twov2d);
6512 return;
6513 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6514 SelectStore(Node, 2, AArch64::ST1Twov1d);
6515 return;
6516 }
6517 break;
6518 }
6519 case Intrinsic::aarch64_neon_st1x3: {
6520 if (VT == MVT::v8i8) {
6521 SelectStore(Node, 3, AArch64::ST1Threev8b);
6522 return;
6523 } else if (VT == MVT::v16i8) {
6524 SelectStore(Node, 3, AArch64::ST1Threev16b);
6525 return;
6526 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6527 VT == MVT::v4bf16) {
6528 SelectStore(Node, 3, AArch64::ST1Threev4h);
6529 return;
6530 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6531 VT == MVT::v8bf16) {
6532 SelectStore(Node, 3, AArch64::ST1Threev8h);
6533 return;
6534 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6535 SelectStore(Node, 3, AArch64::ST1Threev2s);
6536 return;
6537 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6538 SelectStore(Node, 3, AArch64::ST1Threev4s);
6539 return;
6540 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6541 SelectStore(Node, 3, AArch64::ST1Threev2d);
6542 return;
6543 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6544 SelectStore(Node, 3, AArch64::ST1Threev1d);
6545 return;
6546 }
6547 break;
6548 }
6549 case Intrinsic::aarch64_neon_st1x4: {
6550 if (VT == MVT::v8i8) {
6551 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6552 return;
6553 } else if (VT == MVT::v16i8) {
6554 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6555 return;
6556 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6557 VT == MVT::v4bf16) {
6558 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6559 return;
6560 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6561 VT == MVT::v8bf16) {
6562 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6563 return;
6564 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6565 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6566 return;
6567 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6568 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6569 return;
6570 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6571 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6572 return;
6573 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6574 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6575 return;
6576 }
6577 break;
6578 }
6579 case Intrinsic::aarch64_neon_st2: {
6580 if (VT == MVT::v8i8) {
6581 SelectStore(Node, 2, AArch64::ST2Twov8b);
6582 return;
6583 } else if (VT == MVT::v16i8) {
6584 SelectStore(Node, 2, AArch64::ST2Twov16b);
6585 return;
6586 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6587 VT == MVT::v4bf16) {
6588 SelectStore(Node, 2, AArch64::ST2Twov4h);
6589 return;
6590 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6591 VT == MVT::v8bf16) {
6592 SelectStore(Node, 2, AArch64::ST2Twov8h);
6593 return;
6594 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6595 SelectStore(Node, 2, AArch64::ST2Twov2s);
6596 return;
6597 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6598 SelectStore(Node, 2, AArch64::ST2Twov4s);
6599 return;
6600 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6601 SelectStore(Node, 2, AArch64::ST2Twov2d);
6602 return;
6603 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6604 SelectStore(Node, 2, AArch64::ST1Twov1d);
6605 return;
6606 }
6607 break;
6608 }
6609 case Intrinsic::aarch64_neon_st3: {
6610 if (VT == MVT::v8i8) {
6611 SelectStore(Node, 3, AArch64::ST3Threev8b);
6612 return;
6613 } else if (VT == MVT::v16i8) {
6614 SelectStore(Node, 3, AArch64::ST3Threev16b);
6615 return;
6616 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6617 VT == MVT::v4bf16) {
6618 SelectStore(Node, 3, AArch64::ST3Threev4h);
6619 return;
6620 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6621 VT == MVT::v8bf16) {
6622 SelectStore(Node, 3, AArch64::ST3Threev8h);
6623 return;
6624 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6625 SelectStore(Node, 3, AArch64::ST3Threev2s);
6626 return;
6627 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6628 SelectStore(Node, 3, AArch64::ST3Threev4s);
6629 return;
6630 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6631 SelectStore(Node, 3, AArch64::ST3Threev2d);
6632 return;
6633 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6634 SelectStore(Node, 3, AArch64::ST1Threev1d);
6635 return;
6636 }
6637 break;
6638 }
6639 case Intrinsic::aarch64_neon_st4: {
6640 if (VT == MVT::v8i8) {
6641 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6642 return;
6643 } else if (VT == MVT::v16i8) {
6644 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6645 return;
6646 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6647 VT == MVT::v4bf16) {
6648 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6649 return;
6650 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6651 VT == MVT::v8bf16) {
6652 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6653 return;
6654 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6655 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6656 return;
6657 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6658 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6659 return;
6660 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6661 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6662 return;
6663 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6664 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6665 return;
6666 }
6667 break;
6668 }
6669 case Intrinsic::aarch64_neon_st2lane: {
6670 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6671 SelectStoreLane(Node, 2, AArch64::ST2i8);
6672 return;
6673 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6674 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6675 SelectStoreLane(Node, 2, AArch64::ST2i16);
6676 return;
6677 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6678 VT == MVT::v2f32) {
6679 SelectStoreLane(Node, 2, AArch64::ST2i32);
6680 return;
6681 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6682 VT == MVT::v1f64) {
6683 SelectStoreLane(Node, 2, AArch64::ST2i64);
6684 return;
6685 }
6686 break;
6687 }
6688 case Intrinsic::aarch64_neon_st3lane: {
6689 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6690 SelectStoreLane(Node, 3, AArch64::ST3i8);
6691 return;
6692 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6693 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6694 SelectStoreLane(Node, 3, AArch64::ST3i16);
6695 return;
6696 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6697 VT == MVT::v2f32) {
6698 SelectStoreLane(Node, 3, AArch64::ST3i32);
6699 return;
6700 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6701 VT == MVT::v1f64) {
6702 SelectStoreLane(Node, 3, AArch64::ST3i64);
6703 return;
6704 }
6705 break;
6706 }
6707 case Intrinsic::aarch64_neon_st4lane: {
6708 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6709 SelectStoreLane(Node, 4, AArch64::ST4i8);
6710 return;
6711 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6712 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6713 SelectStoreLane(Node, 4, AArch64::ST4i16);
6714 return;
6715 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6716 VT == MVT::v2f32) {
6717 SelectStoreLane(Node, 4, AArch64::ST4i32);
6718 return;
6719 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6720 VT == MVT::v1f64) {
6721 SelectStoreLane(Node, 4, AArch64::ST4i64);
6722 return;
6723 }
6724 break;
6725 }
6726 case Intrinsic::aarch64_sve_st2q: {
6727 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6728 return;
6729 }
6730 case Intrinsic::aarch64_sve_st3q: {
6731 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6732 return;
6733 }
6734 case Intrinsic::aarch64_sve_st4q: {
6735 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6736 return;
6737 }
6738 case Intrinsic::aarch64_sve_st2: {
6739 if (VT == MVT::nxv16i8) {
6740 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6741 return;
6742 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6743 VT == MVT::nxv8bf16) {
6744 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6745 return;
6746 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6747 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6748 return;
6749 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6750 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6751 return;
6752 }
6753 break;
6754 }
6755 case Intrinsic::aarch64_sve_st3: {
6756 if (VT == MVT::nxv16i8) {
6757 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6758 return;
6759 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6760 VT == MVT::nxv8bf16) {
6761 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6762 return;
6763 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6764 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6765 return;
6766 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6767 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6768 return;
6769 }
6770 break;
6771 }
6772 case Intrinsic::aarch64_sve_st4: {
6773 if (VT == MVT::nxv16i8) {
6774 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6775 return;
6776 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6777 VT == MVT::nxv8bf16) {
6778 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6779 return;
6780 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6781 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6782 return;
6783 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6784 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6785 return;
6786 }
6787 break;
6788 }
6789 }
6790 break;
6791 }
6792 case AArch64ISD::LD2post: {
6793 if (VT == MVT::v8i8) {
6794 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6795 return;
6796 } else if (VT == MVT::v16i8) {
6797 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6798 return;
6799 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6800 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6801 return;
6802 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6803 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6804 return;
6805 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6806 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6807 return;
6808 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6809 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6810 return;
6811 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6812 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6813 return;
6814 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6815 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6816 return;
6817 }
6818 break;
6819 }
6820 case AArch64ISD::LD3post: {
6821 if (VT == MVT::v8i8) {
6822 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6823 return;
6824 } else if (VT == MVT::v16i8) {
6825 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6826 return;
6827 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6828 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6829 return;
6830 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6831 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6832 return;
6833 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6834 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6835 return;
6836 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6837 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6838 return;
6839 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6840 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6841 return;
6842 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6843 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6844 return;
6845 }
6846 break;
6847 }
6848 case AArch64ISD::LD4post: {
6849 if (VT == MVT::v8i8) {
6850 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6851 return;
6852 } else if (VT == MVT::v16i8) {
6853 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6854 return;
6855 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6856 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6857 return;
6858 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6859 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6860 return;
6861 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6862 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6863 return;
6864 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6865 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6866 return;
6867 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6868 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6869 return;
6870 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6871 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6872 return;
6873 }
6874 break;
6875 }
6876 case AArch64ISD::LD1x2post: {
6877 if (VT == MVT::v8i8) {
6878 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6879 return;
6880 } else if (VT == MVT::v16i8) {
6881 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6882 return;
6883 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6884 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6885 return;
6886 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6887 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6888 return;
6889 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6890 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6891 return;
6892 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6893 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6894 return;
6895 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6896 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6897 return;
6898 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6899 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6900 return;
6901 }
6902 break;
6903 }
6904 case AArch64ISD::LD1x3post: {
6905 if (VT == MVT::v8i8) {
6906 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6907 return;
6908 } else if (VT == MVT::v16i8) {
6909 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6910 return;
6911 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6912 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6913 return;
6914 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6915 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6916 return;
6917 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6918 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6919 return;
6920 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6921 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6922 return;
6923 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6924 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6925 return;
6926 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6927 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6928 return;
6929 }
6930 break;
6931 }
6932 case AArch64ISD::LD1x4post: {
6933 if (VT == MVT::v8i8) {
6934 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6935 return;
6936 } else if (VT == MVT::v16i8) {
6937 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6938 return;
6939 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6940 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6941 return;
6942 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6943 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6944 return;
6945 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6946 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6947 return;
6948 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6949 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6950 return;
6951 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6952 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6953 return;
6954 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6955 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6956 return;
6957 }
6958 break;
6959 }
6960 case AArch64ISD::LD1DUPpost: {
6961 if (VT == MVT::v8i8) {
6962 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6963 return;
6964 } else if (VT == MVT::v16i8) {
6965 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6966 return;
6967 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6968 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6969 return;
6970 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6971 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6972 return;
6973 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6974 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6975 return;
6976 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6977 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6978 return;
6979 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6980 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6981 return;
6982 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6983 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6984 return;
6985 }
6986 break;
6987 }
6988 case AArch64ISD::LD2DUPpost: {
6989 if (VT == MVT::v8i8) {
6990 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6991 return;
6992 } else if (VT == MVT::v16i8) {
6993 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6994 return;
6995 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6996 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6997 return;
6998 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6999 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7000 return;
7001 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7002 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7003 return;
7004 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7005 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7006 return;
7007 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7008 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7009 return;
7010 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7011 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7012 return;
7013 }
7014 break;
7015 }
7016 case AArch64ISD::LD3DUPpost: {
7017 if (VT == MVT::v8i8) {
7018 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7019 return;
7020 } else if (VT == MVT::v16i8) {
7021 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7022 return;
7023 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7024 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7025 return;
7026 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7027 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7028 return;
7029 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7030 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7031 return;
7032 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7033 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7034 return;
7035 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7036 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7037 return;
7038 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7039 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7040 return;
7041 }
7042 break;
7043 }
7044 case AArch64ISD::LD4DUPpost: {
7045 if (VT == MVT::v8i8) {
7046 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7047 return;
7048 } else if (VT == MVT::v16i8) {
7049 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7050 return;
7051 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7052 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7053 return;
7054 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7055 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7056 return;
7057 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7058 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7059 return;
7060 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7061 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7062 return;
7063 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7064 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7065 return;
7066 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7067 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7068 return;
7069 }
7070 break;
7071 }
7072 case AArch64ISD::LD1LANEpost: {
7073 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7074 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7075 return;
7076 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7077 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7078 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7079 return;
7080 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7081 VT == MVT::v2f32) {
7082 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7083 return;
7084 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7085 VT == MVT::v1f64) {
7086 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7087 return;
7088 }
7089 break;
7090 }
7091 case AArch64ISD::LD2LANEpost: {
7092 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7093 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7094 return;
7095 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7096 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7097 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7098 return;
7099 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7100 VT == MVT::v2f32) {
7101 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7102 return;
7103 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7104 VT == MVT::v1f64) {
7105 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7106 return;
7107 }
7108 break;
7109 }
7110 case AArch64ISD::LD3LANEpost: {
7111 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7112 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7113 return;
7114 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7115 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7116 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7117 return;
7118 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7119 VT == MVT::v2f32) {
7120 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7121 return;
7122 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7123 VT == MVT::v1f64) {
7124 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7125 return;
7126 }
7127 break;
7128 }
7129 case AArch64ISD::LD4LANEpost: {
7130 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7131 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7132 return;
7133 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7134 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7135 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7136 return;
7137 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7138 VT == MVT::v2f32) {
7139 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7140 return;
7141 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7142 VT == MVT::v1f64) {
7143 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7144 return;
7145 }
7146 break;
7147 }
7148 case AArch64ISD::ST2post: {
7149 VT = Node->getOperand(1).getValueType();
7150 if (VT == MVT::v8i8) {
7151 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7152 return;
7153 } else if (VT == MVT::v16i8) {
7154 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7155 return;
7156 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7157 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7158 return;
7159 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7160 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7161 return;
7162 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7163 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7164 return;
7165 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7166 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7167 return;
7168 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7169 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7170 return;
7171 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7172 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7173 return;
7174 }
7175 break;
7176 }
7177 case AArch64ISD::ST3post: {
7178 VT = Node->getOperand(1).getValueType();
7179 if (VT == MVT::v8i8) {
7180 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7181 return;
7182 } else if (VT == MVT::v16i8) {
7183 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7184 return;
7185 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7186 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7187 return;
7188 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7189 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7190 return;
7191 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7192 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7193 return;
7194 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7195 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7196 return;
7197 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7198 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7199 return;
7200 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7201 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7202 return;
7203 }
7204 break;
7205 }
7206 case AArch64ISD::ST4post: {
7207 VT = Node->getOperand(1).getValueType();
7208 if (VT == MVT::v8i8) {
7209 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7210 return;
7211 } else if (VT == MVT::v16i8) {
7212 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7213 return;
7214 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7215 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7216 return;
7217 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7218 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7219 return;
7220 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7221 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7222 return;
7223 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7224 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7225 return;
7226 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7227 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7228 return;
7229 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7230 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7231 return;
7232 }
7233 break;
7234 }
7235 case AArch64ISD::ST1x2post: {
7236 VT = Node->getOperand(1).getValueType();
7237 if (VT == MVT::v8i8) {
7238 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7239 return;
7240 } else if (VT == MVT::v16i8) {
7241 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7242 return;
7243 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7244 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7245 return;
7246 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7247 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7248 return;
7249 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7250 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7251 return;
7252 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7253 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7254 return;
7255 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7256 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7257 return;
7258 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7259 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7260 return;
7261 }
7262 break;
7263 }
7264 case AArch64ISD::ST1x3post: {
7265 VT = Node->getOperand(1).getValueType();
7266 if (VT == MVT::v8i8) {
7267 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7268 return;
7269 } else if (VT == MVT::v16i8) {
7270 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7271 return;
7272 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7273 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7274 return;
7275 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7276 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7277 return;
7278 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7279 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7280 return;
7281 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7282 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7283 return;
7284 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7285 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7286 return;
7287 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7288 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7289 return;
7290 }
7291 break;
7292 }
7293 case AArch64ISD::ST1x4post: {
7294 VT = Node->getOperand(1).getValueType();
7295 if (VT == MVT::v8i8) {
7296 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7297 return;
7298 } else if (VT == MVT::v16i8) {
7299 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7300 return;
7301 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7302 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7303 return;
7304 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7305 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7306 return;
7307 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7308 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7309 return;
7310 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7311 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7312 return;
7313 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7314 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7315 return;
7316 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7317 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7318 return;
7319 }
7320 break;
7321 }
7322 case AArch64ISD::ST2LANEpost: {
7323 VT = Node->getOperand(1).getValueType();
7324 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7325 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7326 return;
7327 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7328 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7329 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7330 return;
7331 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7332 VT == MVT::v2f32) {
7333 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7334 return;
7335 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7336 VT == MVT::v1f64) {
7337 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7338 return;
7339 }
7340 break;
7341 }
7342 case AArch64ISD::ST3LANEpost: {
7343 VT = Node->getOperand(1).getValueType();
7344 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7345 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7346 return;
7347 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7348 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7349 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7350 return;
7351 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7352 VT == MVT::v2f32) {
7353 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7354 return;
7355 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7356 VT == MVT::v1f64) {
7357 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7358 return;
7359 }
7360 break;
7361 }
7362 case AArch64ISD::ST4LANEpost: {
7363 VT = Node->getOperand(1).getValueType();
7364 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7365 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7366 return;
7367 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7368 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7369 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7370 return;
7371 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7372 VT == MVT::v2f32) {
7373 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7374 return;
7375 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7376 VT == MVT::v1f64) {
7377 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7378 return;
7379 }
7380 break;
7381 }
7382 }
7383
7384 // Select the default instruction
7385 SelectCode(Node);
7386}
7387
7388/// createAArch64ISelDag - This pass converts a legalized DAG into a
7389/// AArch64-specific DAG, ready for instruction scheduling.
7391 CodeGenOptLevel OptLevel) {
7392 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7393}
7394
7395/// When \p PredVT is a scalable vector predicate in the form
7396/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7397/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7398/// structured vectors (NumVec >1), the output data type is
7399/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7400/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7401/// EVT.
7403 unsigned NumVec) {
7404 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7405 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7406 return EVT();
7407
7408 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7409 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7410 return EVT();
7411
7412 ElementCount EC = PredVT.getVectorElementCount();
7413 EVT ScalarVT =
7414 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7415 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7416
7417 return MemVT;
7418}
7419
7420/// Return the EVT of the data associated to a memory operation in \p
7421/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7423 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7424 return MemIntr->getMemoryVT();
7425
7426 if (isa<MemSDNode>(Root)) {
7427 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7428
7429 EVT DataVT;
7430 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7431 DataVT = Load->getValueType(0);
7432 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7433 DataVT = Load->getValueType(0);
7434 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7435 DataVT = Store->getValue().getValueType();
7436 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7437 DataVT = Store->getValue().getValueType();
7438 else
7439 llvm_unreachable("Unexpected MemSDNode!");
7440
7441 return DataVT.changeVectorElementType(MemVT.getVectorElementType());
7442 }
7443
7444 const unsigned Opcode = Root->getOpcode();
7445 // For custom ISD nodes, we have to look at them individually to extract the
7446 // type of the data moved to/from memory.
7447 switch (Opcode) {
7448 case AArch64ISD::LD1_MERGE_ZERO:
7449 case AArch64ISD::LD1S_MERGE_ZERO:
7450 case AArch64ISD::LDNF1_MERGE_ZERO:
7451 case AArch64ISD::LDNF1S_MERGE_ZERO:
7452 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7453 case AArch64ISD::ST1_PRED:
7454 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7455 default:
7456 break;
7457 }
7458
7459 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7460 return EVT();
7461
7462 switch (Root->getConstantOperandVal(1)) {
7463 default:
7464 return EVT();
7465 case Intrinsic::aarch64_sme_ldr:
7466 case Intrinsic::aarch64_sme_str:
7467 return MVT::nxv16i8;
7468 case Intrinsic::aarch64_sve_prf:
7469 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7470 // width of the predicate.
7472 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7473 case Intrinsic::aarch64_sve_ld2_sret:
7474 case Intrinsic::aarch64_sve_ld2q_sret:
7476 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7477 case Intrinsic::aarch64_sve_st2q:
7479 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7480 case Intrinsic::aarch64_sve_ld3_sret:
7481 case Intrinsic::aarch64_sve_ld3q_sret:
7483 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7484 case Intrinsic::aarch64_sve_st3q:
7486 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7487 case Intrinsic::aarch64_sve_ld4_sret:
7488 case Intrinsic::aarch64_sve_ld4q_sret:
7490 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7491 case Intrinsic::aarch64_sve_st4q:
7493 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7494 case Intrinsic::aarch64_sve_ld1udq:
7495 case Intrinsic::aarch64_sve_st1dq:
7496 return EVT(MVT::nxv1i64);
7497 case Intrinsic::aarch64_sve_ld1uwq:
7498 case Intrinsic::aarch64_sve_st1wq:
7499 return EVT(MVT::nxv1i32);
7500 }
7501}
7502
7503/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7504/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7505/// where Root is the memory access using N for its address.
7506template <int64_t Min, int64_t Max>
7507bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7508 SDValue &Base,
7509 SDValue &OffImm) {
7510 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7511 const DataLayout &DL = CurDAG->getDataLayout();
7512 const MachineFrameInfo &MFI = MF->getFrameInfo();
7513
7514 if (N.getOpcode() == ISD::FrameIndex) {
7515 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7516 // We can only encode VL scaled offsets, so only fold in frame indexes
7517 // referencing SVE objects.
7519 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7520 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7521 return true;
7522 }
7523
7524 return false;
7525 }
7526
7527 if (MemVT == EVT())
7528 return false;
7529
7530 if (N.getOpcode() != ISD::ADD)
7531 return false;
7532
7533 SDValue VScale = N.getOperand(1);
7534 int64_t MulImm = std::numeric_limits<int64_t>::max();
7535 if (VScale.getOpcode() == ISD::VSCALE) {
7536 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7537 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7538 int64_t ByteOffset = C->getSExtValue();
7539 const auto KnownVScale =
7541
7542 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7543 return false;
7544
7545 MulImm = ByteOffset / KnownVScale;
7546 } else
7547 return false;
7548
7549 TypeSize TS = MemVT.getSizeInBits();
7550 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7551
7552 if ((MulImm % MemWidthBytes) != 0)
7553 return false;
7554
7555 int64_t Offset = MulImm / MemWidthBytes;
7557 return false;
7558
7559 Base = N.getOperand(0);
7560 if (Base.getOpcode() == ISD::FrameIndex) {
7561 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7562 // We can only encode VL scaled offsets, so only fold in frame indexes
7563 // referencing SVE objects.
7565 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7566 }
7567
7568 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7569 return true;
7570}
7571
7572/// Select register plus register addressing mode for SVE, with scaled
7573/// offset.
7574bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7575 SDValue &Base,
7576 SDValue &Offset) {
7577 if (N.getOpcode() != ISD::ADD)
7578 return false;
7579
7580 // Process an ADD node.
7581 const SDValue LHS = N.getOperand(0);
7582 const SDValue RHS = N.getOperand(1);
7583
7584 // 8 bit data does not come with the SHL node, so it is treated
7585 // separately.
7586 if (Scale == 0) {
7587 Base = LHS;
7588 Offset = RHS;
7589 return true;
7590 }
7591
7592 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7593 int64_t ImmOff = C->getSExtValue();
7594 unsigned Size = 1 << Scale;
7595
7596 // To use the reg+reg addressing mode, the immediate must be a multiple of
7597 // the vector element's byte size.
7598 if (ImmOff % Size)
7599 return false;
7600
7601 SDLoc DL(N);
7602 Base = LHS;
7603 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7604 SDValue Ops[] = {Offset};
7605 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7606 Offset = SDValue(MI, 0);
7607 return true;
7608 }
7609
7610 // Check if the RHS is a shift node with a constant.
7611 if (RHS.getOpcode() != ISD::SHL)
7612 return false;
7613
7614 const SDValue ShiftRHS = RHS.getOperand(1);
7615 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7616 if (C->getZExtValue() == Scale) {
7617 Base = LHS;
7618 Offset = RHS.getOperand(0);
7619 return true;
7620 }
7621
7622 return false;
7623}
7624
7625bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7626 const AArch64TargetLowering *TLI =
7627 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7628
7629 return TLI->isAllActivePredicate(*CurDAG, N);
7630}
7631
7632bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7633 EVT VT = N.getValueType();
7634 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7635}
7636
7637bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7639 unsigned Scale) {
7640 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7641 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7642 int64_t ImmOff = C->getSExtValue();
7643 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7644 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7645 }
7646 return SDValue();
7647 };
7648
7649 if (SDValue C = MatchConstantOffset(N)) {
7650 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7651 Offset = C;
7652 return true;
7653 }
7654
7655 // Try to untangle an ADD node into a 'reg + offset'
7656 if (CurDAG->isBaseWithConstantOffset(N)) {
7657 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7658 Base = N.getOperand(0);
7659 Offset = C;
7660 return true;
7661 }
7662 }
7663
7664 // By default, just match reg + 0.
7665 Base = N;
7666 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7667 return true;
7668}
7669
7670bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7671 SDValue &Imm) {
7673 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7674 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7675 // Check conservatively if the immediate fits the valid range [0, 64).
7676 // Immediate variants for GE and HS definitely need to be decremented
7677 // when lowering the pseudos later, so an immediate of 1 would become 0.
7678 // For the inverse conditions LT and LO we don't know for sure if they
7679 // will need a decrement but should the decision be made to reverse the
7680 // branch condition, we again end up with the need to decrement.
7681 // The same argument holds for LE, LS, GT and HI and possibly
7682 // incremented immediates. This can lead to slightly less optimal
7683 // codegen, e.g. we never codegen the legal case
7684 // cblt w0, #63, A
7685 // because we could end up with the illegal case
7686 // cbge w0, #64, B
7687 // should the decision to reverse the branch direction be made. For the
7688 // lower bound cases this is no problem since we can express comparisons
7689 // against 0 with either tbz/tnbz or using wzr/xzr.
7690 uint64_t LowerBound = 0, UpperBound = 64;
7691 switch (CC) {
7692 case AArch64CC::GE:
7693 case AArch64CC::HS:
7694 case AArch64CC::LT:
7695 case AArch64CC::LO:
7696 LowerBound = 1;
7697 break;
7698 case AArch64CC::LE:
7699 case AArch64CC::LS:
7700 case AArch64CC::GT:
7701 case AArch64CC::HI:
7702 UpperBound = 63;
7703 break;
7704 default:
7705 break;
7706 }
7707
7708 if (CN->getAPIntValue().uge(LowerBound) &&
7709 CN->getAPIntValue().ult(UpperBound)) {
7710 SDLoc DL(N);
7711 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7712 return true;
7713 }
7714 }
7715
7716 return false;
7717}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5999
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1670
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1598
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1452
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
iterator begin() const
Definition ArrayRef.h:135
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
uint8_t getStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:260
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:276
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:157
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1950
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
static constexpr roundingMode rmTowardZero
Definition APFloat.h:308
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.