LLVM 22.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/IntrinsicsRISCV.h"
46#include "llvm/Support/Debug.h"
52#include <optional>
53
54using namespace llvm;
55
56#define DEBUG_TYPE "riscv-lower"
57
58STATISTIC(NumTailCalls, "Number of tail calls");
59
61 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
62 cl::desc("Give the maximum size (in number of nodes) of the web of "
63 "instructions that we will consider for VW expansion"),
64 cl::init(18));
65
66static cl::opt<bool>
67 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
68 cl::desc("Allow the formation of VW_W operations (e.g., "
69 "VWADD_W) with splat constants"),
70 cl::init(false));
71
73 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
74 cl::desc("Set the minimum number of repetitions of a divisor to allow "
75 "transformation to multiplications by the reciprocal"),
76 cl::init(2));
77
78static cl::opt<int>
80 cl::desc("Give the maximum number of instructions that we will "
81 "use for creating a floating-point immediate value"),
82 cl::init(2));
83
84static cl::opt<bool>
85 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
86 cl::desc("Swap add and addi in cases where the add may "
87 "be combined with a shift"),
88 cl::init(true));
89
91 const RISCVSubtarget &STI)
92 : TargetLowering(TM), Subtarget(STI) {
93
94 RISCVABI::ABI ABI = Subtarget.getTargetABI();
95 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
96
97 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
98 !Subtarget.hasStdExtF()) {
99 errs() << "Hard-float 'f' ABI can't be used for a target that "
100 "doesn't support the F instruction set extension (ignoring "
101 "target-abi)\n";
102 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
103 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
104 !Subtarget.hasStdExtD()) {
105 errs() << "Hard-float 'd' ABI can't be used for a target that "
106 "doesn't support the D instruction set extension (ignoring "
107 "target-abi)\n";
108 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
109 }
110
111 switch (ABI) {
112 default:
113 reportFatalUsageError("Don't know how to lower this ABI");
122 break;
123 }
124
125 MVT XLenVT = Subtarget.getXLenVT();
126
127 // Set up the register classes.
128 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
129
130 if (Subtarget.hasStdExtZfhmin())
131 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
132 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
133 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
134 if (Subtarget.hasStdExtF())
135 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
136 if (Subtarget.hasStdExtD())
137 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
138 if (Subtarget.hasStdExtZhinxmin())
139 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
140 if (Subtarget.hasStdExtZfinx())
141 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
142 if (Subtarget.hasStdExtZdinx()) {
143 if (Subtarget.is64Bit())
144 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
145 else
146 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
147 }
148
149 static const MVT::SimpleValueType BoolVecVTs[] = {
150 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
151 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
152 static const MVT::SimpleValueType IntVecVTs[] = {
153 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
154 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
155 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
156 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
157 MVT::nxv4i64, MVT::nxv8i64};
158 static const MVT::SimpleValueType F16VecVTs[] = {
159 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
160 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
161 static const MVT::SimpleValueType BF16VecVTs[] = {
162 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
163 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
164 static const MVT::SimpleValueType F32VecVTs[] = {
165 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
166 static const MVT::SimpleValueType F64VecVTs[] = {
167 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
168 static const MVT::SimpleValueType VecTupleVTs[] = {
169 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
170 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
171 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
172 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
173 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
174 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
175 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
176 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
177 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
178 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
179 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
180
181 if (Subtarget.hasVInstructions()) {
182 auto addRegClassForRVV = [this](MVT VT) {
183 // Disable the smallest fractional LMUL types if ELEN is less than
184 // RVVBitsPerBlock.
185 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
186 if (VT.getVectorMinNumElements() < MinElts)
187 return;
188
189 unsigned Size = VT.getSizeInBits().getKnownMinValue();
190 const TargetRegisterClass *RC;
192 RC = &RISCV::VRRegClass;
193 else if (Size == 2 * RISCV::RVVBitsPerBlock)
194 RC = &RISCV::VRM2RegClass;
195 else if (Size == 4 * RISCV::RVVBitsPerBlock)
196 RC = &RISCV::VRM4RegClass;
197 else if (Size == 8 * RISCV::RVVBitsPerBlock)
198 RC = &RISCV::VRM8RegClass;
199 else
200 llvm_unreachable("Unexpected size");
201
202 addRegisterClass(VT, RC);
203 };
204
205 for (MVT VT : BoolVecVTs)
206 addRegClassForRVV(VT);
207 for (MVT VT : IntVecVTs) {
208 if (VT.getVectorElementType() == MVT::i64 &&
209 !Subtarget.hasVInstructionsI64())
210 continue;
211 addRegClassForRVV(VT);
212 }
213
214 if (Subtarget.hasVInstructionsF16Minimal() ||
215 Subtarget.hasVendorXAndesVPackFPH())
216 for (MVT VT : F16VecVTs)
217 addRegClassForRVV(VT);
218
219 if (Subtarget.hasVInstructionsBF16Minimal() ||
220 Subtarget.hasVendorXAndesVBFHCvt())
221 for (MVT VT : BF16VecVTs)
222 addRegClassForRVV(VT);
223
224 if (Subtarget.hasVInstructionsF32())
225 for (MVT VT : F32VecVTs)
226 addRegClassForRVV(VT);
227
228 if (Subtarget.hasVInstructionsF64())
229 for (MVT VT : F64VecVTs)
230 addRegClassForRVV(VT);
231
232 if (Subtarget.useRVVForFixedLengthVectors()) {
233 auto addRegClassForFixedVectors = [this](MVT VT) {
234 MVT ContainerVT = getContainerForFixedLengthVector(VT);
235 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
236 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
237 addRegisterClass(VT, TRI.getRegClass(RCID));
238 };
240 if (useRVVForFixedLengthVectorVT(VT))
241 addRegClassForFixedVectors(VT);
242
244 if (useRVVForFixedLengthVectorVT(VT))
245 addRegClassForFixedVectors(VT);
246 }
247
248 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
249 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
250 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
251 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
252 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
253 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
254 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
255 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
256 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
257 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
258 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
259 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
260 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
261 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
262 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
263 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
264 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
265 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
266 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
267 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
268 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
269 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
270 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
271 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
272 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
273 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
274 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
275 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
276 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
277 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
278 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
279 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
280 }
281
282 // Compute derived properties from the register classes.
284
286
288 MVT::i1, Promote);
289 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
291 MVT::i1, Promote);
292
293 // TODO: add all necessary setOperationAction calls.
294 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Custom);
295
296 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
297 setOperationAction(ISD::BR_CC, XLenVT, Expand);
298 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
300
305 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
308 }
309
310 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
311
312 setOperationAction(ISD::VASTART, MVT::Other, Custom);
313 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
314
315 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
316 !Subtarget.hasVendorXAndesPerf())
318
320
321 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
322 !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
323 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
324 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
325
326 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
327 setOperationAction(ISD::LOAD, MVT::i64, Custom);
328 setOperationAction(ISD::STORE, MVT::i64, Custom);
329 }
330
331 if (Subtarget.is64Bit()) {
333
334 setOperationAction(ISD::LOAD, MVT::i32, Custom);
336 MVT::i32, Custom);
338 if (!Subtarget.hasStdExtZbb())
341 Custom);
343 }
344 if (!Subtarget.hasStdExtZmmul()) {
346 } else if (Subtarget.is64Bit()) {
349 } else {
351 }
352
353 if (!Subtarget.hasStdExtM()) {
355 Expand);
356 } else if (Subtarget.is64Bit()) {
358 {MVT::i8, MVT::i16, MVT::i32}, Custom);
359 }
360
363 Expand);
364
366 Custom);
367
368 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
369 if (Subtarget.is64Bit())
371 } else if (Subtarget.hasVendorXTHeadBb()) {
372 if (Subtarget.is64Bit())
375 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
377 } else {
379 }
380
382 Subtarget.hasREV8Like() ? Legal : Expand);
383
384 if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
385 !Subtarget.is64Bit()) {
387 } else {
388 // Zbkb can use rev8+brev8 to implement bitreverse.
390 Subtarget.hasStdExtZbkb() ? Custom : Expand);
391 if (Subtarget.hasStdExtZbkb())
393 }
394
395 if (Subtarget.hasStdExtZbb() ||
396 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
398 Legal);
399 }
400
401 if (Subtarget.hasCTZLike()) {
402 if (Subtarget.is64Bit())
404 } else {
406 }
407
408 if (!Subtarget.hasCPOPLike()) {
409 // TODO: These should be set to LibCall, but this currently breaks
410 // the Linux kernel build. See #101786. Lacks i128 tests, too.
411 if (Subtarget.is64Bit())
413 else
416 }
417
418 if (Subtarget.hasCLZLike()) {
419 // We need the custom lowering to make sure that the resulting sequence
420 // for the 32bit case is efficient on 64bit targets.
421 // Use default promotion for i32 without Zbb.
422 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbb())
424 } else {
426 }
427
428 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
430 } else if (Subtarget.hasShortForwardBranchOpt()) {
431 // We can use PseudoCCSUB to implement ABS.
433 } else if (Subtarget.is64Bit()) {
435 }
436
437 if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov() &&
438 !Subtarget.hasVendorXqcicm() && !Subtarget.hasVendorXqcics())
440
441 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
448 }
449
450 static const unsigned FPLegalNodeTypes[] = {
451 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM,
452 ISD::FMAXIMUMNUM, ISD::LRINT, ISD::LLRINT,
453 ISD::LROUND, ISD::LLROUND, ISD::STRICT_LRINT,
458
459 static const ISD::CondCode FPCCToExpand[] = {
463
464 static const unsigned FPOpToExpand[] = {
465 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
466 ISD::FREM};
467
468 static const unsigned FPRndMode[] = {
469 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
470 ISD::FROUNDEVEN};
471
472 static const unsigned ZfhminZfbfminPromoteOps[] = {
473 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
474 ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
479 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
480 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
481 ISD::FROUNDEVEN, ISD::FCANONICALIZE};
482
483 if (Subtarget.hasStdExtZfbfmin()) {
484 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
488 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
489 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
491 setOperationAction(ISD::FABS, MVT::bf16, Custom);
492 setOperationAction(ISD::FNEG, MVT::bf16, Custom);
496 }
497
498 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
499 if (Subtarget.hasStdExtZfhOrZhinx()) {
500 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
501 setOperationAction(FPRndMode, MVT::f16,
502 Subtarget.hasStdExtZfa() ? Legal : Custom);
504 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
505 Subtarget.hasStdExtZfa() ? Legal : Custom);
506 if (Subtarget.hasStdExtZfa())
508 } else {
509 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
510 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Promote);
511 for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
514 setOperationAction(Op, MVT::f16, Custom);
515 setOperationAction(ISD::FABS, MVT::f16, Custom);
516 setOperationAction(ISD::FNEG, MVT::f16, Custom);
520 }
521
522 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
523
526 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
529 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
530
532 ISD::FNEARBYINT, MVT::f16,
533 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
534 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
535 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
536 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
537 ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP},
538 MVT::f16, Promote);
539
540 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
541 // complete support for all operations in LegalizeDAG.
546 MVT::f16, Promote);
547
548 // We need to custom promote this.
549 if (Subtarget.is64Bit())
550 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
551 }
552
553 if (Subtarget.hasStdExtFOrZfinx()) {
554 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
555 setOperationAction(FPRndMode, MVT::f32,
556 Subtarget.hasStdExtZfa() ? Legal : Custom);
557 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
560 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
561 setOperationAction(FPOpToExpand, MVT::f32, Expand);
562 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
563 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
564 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
565 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
567 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
568 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
569 Subtarget.isSoftFPABI() ? LibCall : Custom);
570 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
571 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
572 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
573 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
574
575 if (Subtarget.hasStdExtZfa()) {
577 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
578 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
579 } else {
580 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
581 }
582 }
583
584 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
585 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
586
587 if (Subtarget.hasStdExtDOrZdinx()) {
588 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
589
590 if (!Subtarget.is64Bit())
591 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
592
593 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
594 !Subtarget.is64Bit()) {
595 setOperationAction(ISD::LOAD, MVT::f64, Custom);
596 setOperationAction(ISD::STORE, MVT::f64, Custom);
597 }
598
599 if (Subtarget.hasStdExtZfa()) {
601 setOperationAction(FPRndMode, MVT::f64, Legal);
602 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
603 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
604 } else {
605 if (Subtarget.is64Bit())
606 setOperationAction(FPRndMode, MVT::f64, Custom);
607
608 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
609 }
610
613 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
616 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
617 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
618 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
619 setOperationAction(FPOpToExpand, MVT::f64, Expand);
620 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
621 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
622 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
623 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
625 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
626 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
627 Subtarget.isSoftFPABI() ? LibCall : Custom);
628 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
629 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
630 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Custom);
631 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
632 }
633
634 if (Subtarget.is64Bit()) {
637 MVT::i32, Custom);
638 setOperationAction(ISD::LROUND, MVT::i32, Custom);
639 }
640
641 if (Subtarget.hasStdExtFOrZfinx()) {
643 Custom);
644
645 // f16/bf16 require custom handling.
647 Custom);
649 Custom);
650
652 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
653 setOperationAction(ISD::GET_FPENV, XLenVT, Custom);
654 setOperationAction(ISD::SET_FPENV, XLenVT, Custom);
655 setOperationAction(ISD::RESET_FPENV, MVT::Other, Custom);
656 setOperationAction(ISD::GET_FPMODE, XLenVT, Custom);
657 setOperationAction(ISD::SET_FPMODE, XLenVT, Custom);
658 setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
659 }
660
663 XLenVT, Custom);
664
666
667 if (Subtarget.is64Bit())
669
670 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
671 // Unfortunately this can't be determined just from the ISA naming string.
672 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
673 Subtarget.is64Bit() ? Legal : Custom);
674 setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
675 Subtarget.is64Bit() ? Legal : Custom);
676
677 if (Subtarget.is64Bit()) {
678 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
679 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
680 }
681
682 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
684 if (Subtarget.is64Bit())
686
687 if (Subtarget.hasVendorXMIPSCBOP())
688 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
689 else if (Subtarget.hasStdExtZicbop())
690 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
691
692 if (Subtarget.hasStdExtA()) {
693 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
694 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
696 else
698 } else if (Subtarget.hasForcedAtomics()) {
699 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
700 } else {
702 }
703
704 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
705
707
708 if (getTargetMachine().getTargetTriple().isOSLinux()) {
709 // Custom lowering of llvm.clear_cache.
711 }
712
713 if (Subtarget.hasVInstructions()) {
715
716 setOperationAction(ISD::VSCALE, XLenVT, Custom);
717
718 // RVV intrinsics may have illegal operands.
719 // We also need to custom legalize vmv.x.s.
722 {MVT::i8, MVT::i16}, Custom);
723 if (Subtarget.is64Bit())
725 MVT::i32, Custom);
726 else
728 MVT::i64, Custom);
729
731 MVT::Other, Custom);
732
733 static const unsigned IntegerVPOps[] = {
734 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
735 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
736 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
737 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
738 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
739 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
740 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
741 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
742 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
743 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
744 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
745 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
746 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
747 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
748 ISD::EXPERIMENTAL_VP_SPLAT};
749
750 static const unsigned FloatingPointVPOps[] = {
751 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
752 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
753 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
754 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
755 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
756 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
757 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
758 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
759 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
760 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
761 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
762 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
763 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
764
765 static const unsigned IntegerVecReduceOps[] = {
766 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
767 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
768 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
769
770 static const unsigned FloatingPointVecReduceOps[] = {
771 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
772 ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
773
774 static const unsigned FloatingPointLibCallOps[] = {
775 ISD::FREM, ISD::FPOW, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
776 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, ISD::FLOG10};
777
778 if (!Subtarget.is64Bit()) {
779 // We must custom-lower certain vXi64 operations on RV32 due to the vector
780 // element type being illegal.
782 MVT::i64, Custom);
783
784 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
785
786 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
787 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
788 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
789 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
790 MVT::i64, Custom);
791 }
792
793 for (MVT VT : BoolVecVTs) {
794 if (!isTypeLegal(VT))
795 continue;
796
798
799 // Mask VTs are custom-expanded into a series of standard nodes
803 VT, Custom);
804
806 Custom);
807
809 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
810 Expand);
811 setOperationAction(ISD::VP_MERGE, VT, Custom);
812
813 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
814 Custom);
815
816 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
817
819 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
820 Custom);
821
823 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
824 Custom);
825
826 // RVV has native int->float & float->int conversions where the
827 // element type sizes are within one power-of-two of each other. Any
828 // wider distances between type sizes have to be lowered as sequences
829 // which progressively narrow the gap in stages.
834 VT, Custom);
836 Custom);
837
838 // Expand all extending loads to types larger than this, and truncating
839 // stores from types larger than this.
841 setTruncStoreAction(VT, OtherVT, Expand);
843 OtherVT, Expand);
844 }
845
846 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
847 ISD::VP_TRUNCATE, ISD::VP_SETCC},
848 VT, Custom);
849
852
854
855 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
856 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
857 setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
858
861 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
862 }
863
864 for (MVT VT : IntVecVTs) {
865 if (!isTypeLegal(VT))
866 continue;
867
870
871 // Vectors implement MULHS/MULHU.
873
874 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
875 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
877
879 Legal);
880
882
883 // Custom-lower extensions and truncations from/to mask types.
885 VT, Custom);
886
887 // RVV has native int->float & float->int conversions where the
888 // element type sizes are within one power-of-two of each other. Any
889 // wider distances between type sizes have to be lowered as sequences
890 // which progressively narrow the gap in stages.
895 VT, Custom);
897 Custom);
901 VT, Legal);
902
903 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
904 // nodes which truncate by one power of two at a time.
907 Custom);
908
909 // Custom-lower insert/extract operations to simplify patterns.
911 Custom);
912
913 // Custom-lower reduction operations to set up the corresponding custom
914 // nodes' operands.
915 setOperationAction(IntegerVecReduceOps, VT, Custom);
916
917 setOperationAction(IntegerVPOps, VT, Custom);
918
919 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
920
921 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
922 VT, Custom);
923
925 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
926 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
927 VT, Custom);
928 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
929
932 VT, Custom);
933
936
938
940 setTruncStoreAction(VT, OtherVT, Expand);
942 OtherVT, Expand);
943 }
944
947
948 // Splice
950
951 if (Subtarget.hasStdExtZvkb()) {
953 setOperationAction(ISD::VP_BSWAP, VT, Custom);
954 } else {
955 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
957 }
958
959 if (Subtarget.hasStdExtZvbb()) {
961 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
962 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
963 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
964 VT, Custom);
965 } else {
966 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
968 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
969 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
970 VT, Expand);
971
972 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
973 // range of f32.
974 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
975 if (isTypeLegal(FloatVT)) {
977 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
978 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
979 VT, Custom);
980 }
981 }
982
984 }
985
986 for (MVT VT : VecTupleVTs) {
987 if (!isTypeLegal(VT))
988 continue;
989
990 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
991 }
992
993 // Expand various CCs to best match the RVV ISA, which natively supports UNE
994 // but no other unordered comparisons, and supports all ordered comparisons
995 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
996 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
997 // and we pattern-match those back to the "original", swapping operands once
998 // more. This way we catch both operations and both "vf" and "fv" forms with
999 // fewer patterns.
1000 static const ISD::CondCode VFPCCToExpand[] = {
1004 };
1005
1006 // TODO: support more ops.
1007 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1008 ISD::FMINNUM,
1009 ISD::FMAXNUM,
1010 ISD::FMINIMUMNUM,
1011 ISD::FMAXIMUMNUM,
1012 ISD::FADD,
1013 ISD::FSUB,
1014 ISD::FMUL,
1015 ISD::FMA,
1016 ISD::FDIV,
1017 ISD::FSQRT,
1018 ISD::FCEIL,
1019 ISD::FTRUNC,
1020 ISD::FFLOOR,
1021 ISD::FROUND,
1022 ISD::FROUNDEVEN,
1023 ISD::FRINT,
1024 ISD::FNEARBYINT,
1026 ISD::SETCC,
1027 ISD::FMAXIMUM,
1028 ISD::FMINIMUM,
1035 ISD::VECREDUCE_FMIN,
1036 ISD::VECREDUCE_FMAX,
1037 ISD::VECREDUCE_FMINIMUM,
1038 ISD::VECREDUCE_FMAXIMUM};
1039
1040 // TODO: support more vp ops.
1041 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1042 ISD::VP_FADD,
1043 ISD::VP_FSUB,
1044 ISD::VP_FMUL,
1045 ISD::VP_FDIV,
1046 ISD::VP_FMA,
1047 ISD::VP_REDUCE_FMIN,
1048 ISD::VP_REDUCE_FMAX,
1049 ISD::VP_SQRT,
1050 ISD::VP_FMINNUM,
1051 ISD::VP_FMAXNUM,
1052 ISD::VP_FCEIL,
1053 ISD::VP_FFLOOR,
1054 ISD::VP_FROUND,
1055 ISD::VP_FROUNDEVEN,
1056 ISD::VP_FROUNDTOZERO,
1057 ISD::VP_FRINT,
1058 ISD::VP_FNEARBYINT,
1059 ISD::VP_SETCC,
1060 ISD::VP_FMINIMUM,
1061 ISD::VP_FMAXIMUM,
1062 ISD::VP_REDUCE_FMINIMUM,
1063 ISD::VP_REDUCE_FMAXIMUM};
1064
1065 // Sets common operation actions on RVV floating-point vector types.
1066 const auto SetCommonVFPActions = [&](MVT VT) {
1068 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1069 // sizes are within one power-of-two of each other. Therefore conversions
1070 // between vXf16 and vXf64 must be lowered as sequences which convert via
1071 // vXf32.
1072 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1073 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1074 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1075 // Custom-lower insert/extract operations to simplify patterns.
1077 Custom);
1078 // Expand various condition codes (explained above).
1079 setCondCodeAction(VFPCCToExpand, VT, Expand);
1080
1082 {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
1083 Legal);
1084 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
1085
1086 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1087 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
1089 VT, Custom);
1090
1091 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1092
1093 // Expand FP operations that need libcalls.
1094 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1095
1097
1098 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1099
1100 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1101 VT, Custom);
1102
1104 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1105 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1106 VT, Custom);
1107 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1108
1111
1114 VT, Custom);
1115
1118
1120 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1121 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1122
1123 setOperationAction(FloatingPointVPOps, VT, Custom);
1124
1126 Custom);
1129 VT, Legal);
1134 VT, Custom);
1135
1137 };
1138
1139 // Sets common extload/truncstore actions on RVV floating-point vector
1140 // types.
1141 const auto SetCommonVFPExtLoadTruncStoreActions =
1142 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1143 for (auto SmallVT : SmallerVTs) {
1144 setTruncStoreAction(VT, SmallVT, Expand);
1145 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1146 }
1147 };
1148
1149 // Sets common actions for f16 and bf16 for when there's only
1150 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1151 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1152 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1154 Custom);
1155 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1156 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1157 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1158 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1159 Custom);
1161 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1167 VT, Custom);
1168 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1169 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1170 MVT EltVT = VT.getVectorElementType();
1171 if (isTypeLegal(EltVT))
1172 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1174 VT, Custom);
1175 else
1176 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1177 EltVT, Custom);
1178 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1179 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1180 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1181 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1182 ISD::VP_SCATTER},
1183 VT, Custom);
1184 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1185
1186 setOperationAction(ISD::FNEG, VT, Expand);
1187 setOperationAction(ISD::FABS, VT, Expand);
1189
1190 // Expand FP operations that need libcalls.
1191 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1192
1193 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1194 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1195 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1196 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1197 } else {
1198 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1199 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1200 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1201 }
1202 };
1203
1204 if (Subtarget.hasVInstructionsF16()) {
1205 for (MVT VT : F16VecVTs) {
1206 if (!isTypeLegal(VT))
1207 continue;
1208 SetCommonVFPActions(VT);
1209 }
1210 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1211 for (MVT VT : F16VecVTs) {
1212 if (!isTypeLegal(VT))
1213 continue;
1214 SetCommonPromoteToF32Actions(VT);
1215 }
1216 }
1217
1218 if (Subtarget.hasVInstructionsBF16Minimal()) {
1219 for (MVT VT : BF16VecVTs) {
1220 if (!isTypeLegal(VT))
1221 continue;
1222 SetCommonPromoteToF32Actions(VT);
1223 }
1224 }
1225
1226 if (Subtarget.hasVInstructionsF32()) {
1227 for (MVT VT : F32VecVTs) {
1228 if (!isTypeLegal(VT))
1229 continue;
1230 SetCommonVFPActions(VT);
1231 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1232 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1233 }
1234 }
1235
1236 if (Subtarget.hasVInstructionsF64()) {
1237 for (MVT VT : F64VecVTs) {
1238 if (!isTypeLegal(VT))
1239 continue;
1240 SetCommonVFPActions(VT);
1241 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1242 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1243 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1244 }
1245 }
1246
1247 if (Subtarget.useRVVForFixedLengthVectors()) {
1249 if (!useRVVForFixedLengthVectorVT(VT))
1250 continue;
1251
1252 // By default everything must be expanded.
1253 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1256 setTruncStoreAction(VT, OtherVT, Expand);
1258 OtherVT, Expand);
1259 }
1260
1261 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1262 // expansion to a build_vector of 0s.
1264
1265 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1267 Custom);
1268
1271 Custom);
1272
1274 VT, Custom);
1275
1277 VT, Custom);
1278
1280
1281 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1282
1284
1286
1289 Custom);
1290
1291 setOperationAction(ISD::BITCAST, VT, Custom);
1292
1294 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1295 Custom);
1296
1298 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1299 Custom);
1300
1302 {
1311 },
1312 VT, Custom);
1314 Custom);
1315
1317
1318 // Operations below are different for between masks and other vectors.
1319 if (VT.getVectorElementType() == MVT::i1) {
1320 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1321 ISD::OR, ISD::XOR},
1322 VT, Custom);
1323
1324 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1325 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1326 VT, Custom);
1327
1328 setOperationAction(ISD::VP_MERGE, VT, Custom);
1329
1330 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1331 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1332 continue;
1333 }
1334
1335 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1336 // it before type legalization for i64 vectors on RV32. It will then be
1337 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1338 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1339 // improvements first.
1340 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1343
1344 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1346 }
1347
1349 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1350
1351 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1352 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1353 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1354 ISD::VP_SCATTER},
1355 VT, Custom);
1356 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1357
1361 VT, Custom);
1362
1365
1367
1368 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1369 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1371
1375 VT, Custom);
1376
1378
1381
1382 // Custom-lower reduction operations to set up the corresponding custom
1383 // nodes' operands.
1384 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1385 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1386 ISD::VECREDUCE_UMIN},
1387 VT, Custom);
1388
1389 setOperationAction(IntegerVPOps, VT, Custom);
1390
1391 if (Subtarget.hasStdExtZvkb())
1393
1394 if (Subtarget.hasStdExtZvbb()) {
1397 VT, Custom);
1398 } else {
1399 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1400 // range of f32.
1401 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1402 if (isTypeLegal(FloatVT))
1405 Custom);
1406 }
1407
1409 }
1410
1412 // There are no extending loads or truncating stores.
1413 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1414 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1415 setTruncStoreAction(VT, InnerVT, Expand);
1416 }
1417
1418 if (!useRVVForFixedLengthVectorVT(VT))
1419 continue;
1420
1421 // By default everything must be expanded.
1422 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1424
1425 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1426 // expansion to a build_vector of 0s.
1428
1433 VT, Custom);
1434 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1435 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1436
1438 VT, Custom);
1439
1440 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1441 ISD::MGATHER, ISD::MSCATTER},
1442 VT, Custom);
1443 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1444 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1445 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1446 VT, Custom);
1447 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1448
1449 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1451 Custom);
1452
1453 if (VT.getVectorElementType() == MVT::f16 &&
1454 !Subtarget.hasVInstructionsF16()) {
1455 setOperationAction(ISD::BITCAST, VT, Custom);
1456 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1458 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1459 Custom);
1460 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1461 Custom);
1462 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1463 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1464 if (Subtarget.hasStdExtZfhmin()) {
1466 } else {
1467 // We need to custom legalize f16 build vectors if Zfhmin isn't
1468 // available.
1470 }
1471 setOperationAction(ISD::FNEG, VT, Expand);
1472 setOperationAction(ISD::FABS, VT, Expand);
1474 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1475 // Don't promote f16 vector operations to f32 if f32 vector type is
1476 // not legal.
1477 // TODO: could split the f16 vector into two vectors and do promotion.
1478 if (!isTypeLegal(F32VecVT))
1479 continue;
1480 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1481 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1482 continue;
1483 }
1484
1485 if (VT.getVectorElementType() == MVT::bf16) {
1486 setOperationAction(ISD::BITCAST, VT, Custom);
1487 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1488 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1489 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1490 if (Subtarget.hasStdExtZfbfmin()) {
1492 } else {
1493 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1494 // available.
1496 }
1498 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1499 Custom);
1500 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1501 // Don't promote f16 vector operations to f32 if f32 vector type is
1502 // not legal.
1503 // TODO: could split the f16 vector into two vectors and do promotion.
1504 if (!isTypeLegal(F32VecVT))
1505 continue;
1506 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1507 // TODO: Promote VP ops to fp32.
1508 continue;
1509 }
1510
1512 Custom);
1513
1515 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1516 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1517 ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::IS_FPCLASS,
1518 ISD::FMAXIMUM, ISD::FMINIMUM},
1519 VT, Custom);
1520
1521 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1522 ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
1523 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
1524 ISD::FNEARBYINT},
1525 VT, Custom);
1526
1527 setCondCodeAction(VFPCCToExpand, VT, Expand);
1528
1531
1532 setOperationAction(ISD::BITCAST, VT, Custom);
1533
1534 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1535
1536 setOperationAction(FloatingPointVPOps, VT, Custom);
1537
1544 VT, Custom);
1545 }
1546
1547 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1548 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1549 if (Subtarget.is64Bit())
1550 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1551 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1552 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1553 if (Subtarget.hasStdExtZfbfmin())
1554 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
1555 if (Subtarget.hasStdExtFOrZfinx())
1556 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1557 if (Subtarget.hasStdExtDOrZdinx())
1558 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1559 }
1560 }
1561
1562 if (Subtarget.hasStdExtA())
1563 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1564
1565 if (Subtarget.hasForcedAtomics()) {
1566 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1568 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1569 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1570 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1571 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1572 XLenVT, LibCall);
1573 }
1574
1575 if (Subtarget.hasVendorXTHeadMemIdx()) {
1576 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1577 setIndexedLoadAction(im, MVT::i8, Legal);
1578 setIndexedStoreAction(im, MVT::i8, Legal);
1579 setIndexedLoadAction(im, MVT::i16, Legal);
1580 setIndexedStoreAction(im, MVT::i16, Legal);
1581 setIndexedLoadAction(im, MVT::i32, Legal);
1582 setIndexedStoreAction(im, MVT::i32, Legal);
1583
1584 if (Subtarget.is64Bit()) {
1585 setIndexedLoadAction(im, MVT::i64, Legal);
1586 setIndexedStoreAction(im, MVT::i64, Legal);
1587 }
1588 }
1589 }
1590
1591 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1595
1599 }
1600
1601 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1602 if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1603 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1604 ISD::PARTIAL_REDUCE_UMLA,
1605 ISD::PARTIAL_REDUCE_SUMLA};
1606 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1607 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1608 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1609 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1610 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1611
1612 if (Subtarget.useRVVForFixedLengthVectors()) {
1614 if (VT.getVectorElementType() != MVT::i32 ||
1615 !useRVVForFixedLengthVectorVT(VT))
1616 continue;
1617 ElementCount EC = VT.getVectorElementCount();
1618 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1619 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1620 }
1621 }
1622 }
1623
1624 // Customize load and store operation for bf16 if zfh isn't enabled.
1625 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1626 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1627 setOperationAction(ISD::STORE, MVT::bf16, Custom);
1628 }
1629
1630 // Function alignments.
1631 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1632 setMinFunctionAlignment(FunctionAlignment);
1633 // Set preferred alignments.
1634 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1635 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1636
1642
1643 if (Subtarget.hasStdExtFOrZfinx())
1644 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM, ISD::FMUL});
1645
1646 if (Subtarget.hasStdExtZbb())
1648
1649 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1650 Subtarget.hasVInstructions())
1652
1653 if (Subtarget.hasStdExtZbkb())
1655
1656 if (Subtarget.hasStdExtFOrZfinx())
1659 if (Subtarget.hasVInstructions())
1661 {ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1662 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1663 ISD::SRL, ISD::SHL, ISD::STORE,
1665 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1669 ISD::VSELECT, ISD::VECREDUCE_ADD});
1670
1671 if (Subtarget.hasVendorXTHeadMemPair())
1672 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1673 if (Subtarget.useRVVForFixedLengthVectors())
1674 setTargetDAGCombine(ISD::BITCAST);
1675
1676 // Disable strict node mutation.
1677 IsStrictFPEnabled = true;
1678 EnableExtLdPromotion = true;
1679
1680 // Let the subtarget decide if a predictable select is more expensive than the
1681 // corresponding branch. This information is used in CGP/SelectOpt to decide
1682 // when to convert selects into branches.
1683 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1684
1685 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1686 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1687
1688 MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1689 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1690 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1691
1693 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1694 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1695
1696 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1697 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1698}
1699
1701 LLVMContext &Context,
1702 EVT VT) const {
1703 if (!VT.isVector())
1704 return getPointerTy(DL);
1705 if (Subtarget.hasVInstructions() &&
1706 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1707 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1709}
1710
1712 return Subtarget.getXLenVT();
1713}
1714
1715// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1716bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1717 unsigned VF,
1718 bool IsScalable) const {
1719 if (!Subtarget.hasVInstructions())
1720 return true;
1721
1722 if (!IsScalable)
1723 return true;
1724
1725 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1726 return true;
1727
1728 // Don't allow VF=1 if those types are't legal.
1729 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1730 return true;
1731
1732 // VLEN=32 support is incomplete.
1733 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1734 return true;
1735
1736 // The maximum VF is for the smallest element width with LMUL=8.
1737 // VF must be a power of 2.
1738 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1739 return VF > MaxVF || !isPowerOf2_32(VF);
1740}
1741
1743 return !Subtarget.hasVInstructions() ||
1744 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1745}
1746
1748 const CallInst &I,
1749 MachineFunction &MF,
1750 unsigned Intrinsic) const {
1751 auto &DL = I.getDataLayout();
1752
1753 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1754 bool IsUnitStrided, bool UsePtrVal = false) {
1755 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1756 // We can't use ptrVal if the intrinsic can access memory before the
1757 // pointer. This means we can't use it for strided or indexed intrinsics.
1758 if (UsePtrVal)
1759 Info.ptrVal = I.getArgOperand(PtrOp);
1760 else
1761 Info.fallbackAddressSpace =
1762 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1763 Type *MemTy;
1764 if (IsStore) {
1765 // Store value is the first operand.
1766 MemTy = I.getArgOperand(0)->getType();
1767 } else {
1768 // Use return type. If it's segment load, return type is a struct.
1769 MemTy = I.getType();
1770 if (MemTy->isStructTy())
1771 MemTy = MemTy->getStructElementType(0);
1772 }
1773 if (!IsUnitStrided)
1774 MemTy = MemTy->getScalarType();
1775
1776 Info.memVT = getValueType(DL, MemTy);
1777 if (MemTy->isTargetExtTy()) {
1778 // RISC-V vector tuple type's alignment type should be its element type.
1779 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1780 MemTy = Type::getIntNTy(
1781 MemTy->getContext(),
1782 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1783 ->getZExtValue());
1784 Info.align = DL.getABITypeAlign(MemTy);
1785 } else {
1786 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
1787 }
1788 Info.size = MemoryLocation::UnknownSize;
1789 Info.flags |=
1791 return true;
1792 };
1793
1794 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1796
1798 switch (Intrinsic) {
1799 default:
1800 return false;
1801 case Intrinsic::riscv_masked_atomicrmw_xchg:
1802 case Intrinsic::riscv_masked_atomicrmw_add:
1803 case Intrinsic::riscv_masked_atomicrmw_sub:
1804 case Intrinsic::riscv_masked_atomicrmw_nand:
1805 case Intrinsic::riscv_masked_atomicrmw_max:
1806 case Intrinsic::riscv_masked_atomicrmw_min:
1807 case Intrinsic::riscv_masked_atomicrmw_umax:
1808 case Intrinsic::riscv_masked_atomicrmw_umin:
1809 case Intrinsic::riscv_masked_cmpxchg:
1810 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
1811 // narrow atomic operation. These will be expanded to an LR/SC loop that
1812 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
1813 // will be used to modify the appropriate part of the 4 byte data and
1814 // preserve the rest.
1815 Info.opc = ISD::INTRINSIC_W_CHAIN;
1816 Info.memVT = MVT::i32;
1817 Info.ptrVal = I.getArgOperand(0);
1818 Info.offset = 0;
1819 Info.align = Align(4);
1822 return true;
1823 case Intrinsic::riscv_seg2_load_mask:
1824 case Intrinsic::riscv_seg3_load_mask:
1825 case Intrinsic::riscv_seg4_load_mask:
1826 case Intrinsic::riscv_seg5_load_mask:
1827 case Intrinsic::riscv_seg6_load_mask:
1828 case Intrinsic::riscv_seg7_load_mask:
1829 case Intrinsic::riscv_seg8_load_mask:
1830 case Intrinsic::riscv_sseg2_load_mask:
1831 case Intrinsic::riscv_sseg3_load_mask:
1832 case Intrinsic::riscv_sseg4_load_mask:
1833 case Intrinsic::riscv_sseg5_load_mask:
1834 case Intrinsic::riscv_sseg6_load_mask:
1835 case Intrinsic::riscv_sseg7_load_mask:
1836 case Intrinsic::riscv_sseg8_load_mask:
1837 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1838 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1839 case Intrinsic::riscv_seg2_store_mask:
1840 case Intrinsic::riscv_seg3_store_mask:
1841 case Intrinsic::riscv_seg4_store_mask:
1842 case Intrinsic::riscv_seg5_store_mask:
1843 case Intrinsic::riscv_seg6_store_mask:
1844 case Intrinsic::riscv_seg7_store_mask:
1845 case Intrinsic::riscv_seg8_store_mask:
1846 // Operands are (vec, ..., vec, ptr, mask, vl)
1847 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1848 /*IsStore*/ true,
1849 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1850 case Intrinsic::riscv_sseg2_store_mask:
1851 case Intrinsic::riscv_sseg3_store_mask:
1852 case Intrinsic::riscv_sseg4_store_mask:
1853 case Intrinsic::riscv_sseg5_store_mask:
1854 case Intrinsic::riscv_sseg6_store_mask:
1855 case Intrinsic::riscv_sseg7_store_mask:
1856 case Intrinsic::riscv_sseg8_store_mask:
1857 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
1858 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1859 /*IsStore*/ true,
1860 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1861 case Intrinsic::riscv_vlm:
1862 return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
1863 /*IsStore*/ false,
1864 /*IsUnitStrided*/ true,
1865 /*UsePtrVal*/ true);
1866 case Intrinsic::riscv_vle:
1867 case Intrinsic::riscv_vle_mask:
1868 case Intrinsic::riscv_vleff:
1869 case Intrinsic::riscv_vleff_mask:
1870 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1871 /*IsStore*/ false,
1872 /*IsUnitStrided*/ true,
1873 /*UsePtrVal*/ true);
1874 case Intrinsic::riscv_vsm:
1875 case Intrinsic::riscv_vse:
1876 case Intrinsic::riscv_vse_mask:
1877 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1878 /*IsStore*/ true,
1879 /*IsUnitStrided*/ true,
1880 /*UsePtrVal*/ true);
1881 case Intrinsic::riscv_vlse:
1882 case Intrinsic::riscv_vlse_mask:
1883 case Intrinsic::riscv_vloxei:
1884 case Intrinsic::riscv_vloxei_mask:
1885 case Intrinsic::riscv_vluxei:
1886 case Intrinsic::riscv_vluxei_mask:
1887 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1888 /*IsStore*/ false,
1889 /*IsUnitStrided*/ false);
1890 case Intrinsic::riscv_vsse:
1891 case Intrinsic::riscv_vsse_mask:
1892 case Intrinsic::riscv_vsoxei:
1893 case Intrinsic::riscv_vsoxei_mask:
1894 case Intrinsic::riscv_vsuxei:
1895 case Intrinsic::riscv_vsuxei_mask:
1896 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1897 /*IsStore*/ true,
1898 /*IsUnitStrided*/ false);
1899 case Intrinsic::riscv_vlseg2:
1900 case Intrinsic::riscv_vlseg3:
1901 case Intrinsic::riscv_vlseg4:
1902 case Intrinsic::riscv_vlseg5:
1903 case Intrinsic::riscv_vlseg6:
1904 case Intrinsic::riscv_vlseg7:
1905 case Intrinsic::riscv_vlseg8:
1906 case Intrinsic::riscv_vlseg2ff:
1907 case Intrinsic::riscv_vlseg3ff:
1908 case Intrinsic::riscv_vlseg4ff:
1909 case Intrinsic::riscv_vlseg5ff:
1910 case Intrinsic::riscv_vlseg6ff:
1911 case Intrinsic::riscv_vlseg7ff:
1912 case Intrinsic::riscv_vlseg8ff:
1913 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1914 /*IsStore*/ false,
1915 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1916 case Intrinsic::riscv_vlseg2_mask:
1917 case Intrinsic::riscv_vlseg3_mask:
1918 case Intrinsic::riscv_vlseg4_mask:
1919 case Intrinsic::riscv_vlseg5_mask:
1920 case Intrinsic::riscv_vlseg6_mask:
1921 case Intrinsic::riscv_vlseg7_mask:
1922 case Intrinsic::riscv_vlseg8_mask:
1923 case Intrinsic::riscv_vlseg2ff_mask:
1924 case Intrinsic::riscv_vlseg3ff_mask:
1925 case Intrinsic::riscv_vlseg4ff_mask:
1926 case Intrinsic::riscv_vlseg5ff_mask:
1927 case Intrinsic::riscv_vlseg6ff_mask:
1928 case Intrinsic::riscv_vlseg7ff_mask:
1929 case Intrinsic::riscv_vlseg8ff_mask:
1930 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1931 /*IsStore*/ false,
1932 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1933 case Intrinsic::riscv_vlsseg2:
1934 case Intrinsic::riscv_vlsseg3:
1935 case Intrinsic::riscv_vlsseg4:
1936 case Intrinsic::riscv_vlsseg5:
1937 case Intrinsic::riscv_vlsseg6:
1938 case Intrinsic::riscv_vlsseg7:
1939 case Intrinsic::riscv_vlsseg8:
1940 case Intrinsic::riscv_vloxseg2:
1941 case Intrinsic::riscv_vloxseg3:
1942 case Intrinsic::riscv_vloxseg4:
1943 case Intrinsic::riscv_vloxseg5:
1944 case Intrinsic::riscv_vloxseg6:
1945 case Intrinsic::riscv_vloxseg7:
1946 case Intrinsic::riscv_vloxseg8:
1947 case Intrinsic::riscv_vluxseg2:
1948 case Intrinsic::riscv_vluxseg3:
1949 case Intrinsic::riscv_vluxseg4:
1950 case Intrinsic::riscv_vluxseg5:
1951 case Intrinsic::riscv_vluxseg6:
1952 case Intrinsic::riscv_vluxseg7:
1953 case Intrinsic::riscv_vluxseg8:
1954 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1955 /*IsStore*/ false,
1956 /*IsUnitStrided*/ false);
1957 case Intrinsic::riscv_vlsseg2_mask:
1958 case Intrinsic::riscv_vlsseg3_mask:
1959 case Intrinsic::riscv_vlsseg4_mask:
1960 case Intrinsic::riscv_vlsseg5_mask:
1961 case Intrinsic::riscv_vlsseg6_mask:
1962 case Intrinsic::riscv_vlsseg7_mask:
1963 case Intrinsic::riscv_vlsseg8_mask:
1964 case Intrinsic::riscv_vloxseg2_mask:
1965 case Intrinsic::riscv_vloxseg3_mask:
1966 case Intrinsic::riscv_vloxseg4_mask:
1967 case Intrinsic::riscv_vloxseg5_mask:
1968 case Intrinsic::riscv_vloxseg6_mask:
1969 case Intrinsic::riscv_vloxseg7_mask:
1970 case Intrinsic::riscv_vloxseg8_mask:
1971 case Intrinsic::riscv_vluxseg2_mask:
1972 case Intrinsic::riscv_vluxseg3_mask:
1973 case Intrinsic::riscv_vluxseg4_mask:
1974 case Intrinsic::riscv_vluxseg5_mask:
1975 case Intrinsic::riscv_vluxseg6_mask:
1976 case Intrinsic::riscv_vluxseg7_mask:
1977 case Intrinsic::riscv_vluxseg8_mask:
1978 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1979 /*IsStore*/ false,
1980 /*IsUnitStrided*/ false);
1981 case Intrinsic::riscv_vsseg2:
1982 case Intrinsic::riscv_vsseg3:
1983 case Intrinsic::riscv_vsseg4:
1984 case Intrinsic::riscv_vsseg5:
1985 case Intrinsic::riscv_vsseg6:
1986 case Intrinsic::riscv_vsseg7:
1987 case Intrinsic::riscv_vsseg8:
1988 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1989 /*IsStore*/ true,
1990 /*IsUnitStrided*/ false);
1991 case Intrinsic::riscv_vsseg2_mask:
1992 case Intrinsic::riscv_vsseg3_mask:
1993 case Intrinsic::riscv_vsseg4_mask:
1994 case Intrinsic::riscv_vsseg5_mask:
1995 case Intrinsic::riscv_vsseg6_mask:
1996 case Intrinsic::riscv_vsseg7_mask:
1997 case Intrinsic::riscv_vsseg8_mask:
1998 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1999 /*IsStore*/ true,
2000 /*IsUnitStrided*/ false);
2001 case Intrinsic::riscv_vssseg2:
2002 case Intrinsic::riscv_vssseg3:
2003 case Intrinsic::riscv_vssseg4:
2004 case Intrinsic::riscv_vssseg5:
2005 case Intrinsic::riscv_vssseg6:
2006 case Intrinsic::riscv_vssseg7:
2007 case Intrinsic::riscv_vssseg8:
2008 case Intrinsic::riscv_vsoxseg2:
2009 case Intrinsic::riscv_vsoxseg3:
2010 case Intrinsic::riscv_vsoxseg4:
2011 case Intrinsic::riscv_vsoxseg5:
2012 case Intrinsic::riscv_vsoxseg6:
2013 case Intrinsic::riscv_vsoxseg7:
2014 case Intrinsic::riscv_vsoxseg8:
2015 case Intrinsic::riscv_vsuxseg2:
2016 case Intrinsic::riscv_vsuxseg3:
2017 case Intrinsic::riscv_vsuxseg4:
2018 case Intrinsic::riscv_vsuxseg5:
2019 case Intrinsic::riscv_vsuxseg6:
2020 case Intrinsic::riscv_vsuxseg7:
2021 case Intrinsic::riscv_vsuxseg8:
2022 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2023 /*IsStore*/ true,
2024 /*IsUnitStrided*/ false);
2025 case Intrinsic::riscv_vssseg2_mask:
2026 case Intrinsic::riscv_vssseg3_mask:
2027 case Intrinsic::riscv_vssseg4_mask:
2028 case Intrinsic::riscv_vssseg5_mask:
2029 case Intrinsic::riscv_vssseg6_mask:
2030 case Intrinsic::riscv_vssseg7_mask:
2031 case Intrinsic::riscv_vssseg8_mask:
2032 case Intrinsic::riscv_vsoxseg2_mask:
2033 case Intrinsic::riscv_vsoxseg3_mask:
2034 case Intrinsic::riscv_vsoxseg4_mask:
2035 case Intrinsic::riscv_vsoxseg5_mask:
2036 case Intrinsic::riscv_vsoxseg6_mask:
2037 case Intrinsic::riscv_vsoxseg7_mask:
2038 case Intrinsic::riscv_vsoxseg8_mask:
2039 case Intrinsic::riscv_vsuxseg2_mask:
2040 case Intrinsic::riscv_vsuxseg3_mask:
2041 case Intrinsic::riscv_vsuxseg4_mask:
2042 case Intrinsic::riscv_vsuxseg5_mask:
2043 case Intrinsic::riscv_vsuxseg6_mask:
2044 case Intrinsic::riscv_vsuxseg7_mask:
2045 case Intrinsic::riscv_vsuxseg8_mask:
2046 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2047 /*IsStore*/ true,
2048 /*IsUnitStrided*/ false);
2049 }
2050}
2051
2053 const AddrMode &AM, Type *Ty,
2054 unsigned AS,
2055 Instruction *I) const {
2056 // No global is ever allowed as a base.
2057 if (AM.BaseGV)
2058 return false;
2059
2060 // None of our addressing modes allows a scalable offset
2061 if (AM.ScalableOffset)
2062 return false;
2063
2064 // RVV instructions only support register addressing.
2065 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2066 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2067
2068 // Require a 12-bit signed offset.
2069 if (!isInt<12>(AM.BaseOffs))
2070 return false;
2071
2072 switch (AM.Scale) {
2073 case 0: // "r+i" or just "i", depending on HasBaseReg.
2074 break;
2075 case 1:
2076 if (!AM.HasBaseReg) // allow "r+i".
2077 break;
2078 return false; // disallow "r+r" or "r+r+i".
2079 default:
2080 return false;
2081 }
2082
2083 return true;
2084}
2085
2087 return isInt<12>(Imm);
2088}
2089
2091 return isInt<12>(Imm);
2092}
2093
2094// On RV32, 64-bit integers are split into their high and low parts and held
2095// in two different registers, so the trunc is free since the low register can
2096// just be used.
2097// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2098// isTruncateFree?
2100 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2101 return false;
2102 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2103 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2104 return (SrcBits == 64 && DestBits == 32);
2105}
2106
2108 // We consider i64->i32 free on RV64 since we have good selection of W
2109 // instructions that make promoting operations back to i64 free in many cases.
2110 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2111 !DstVT.isInteger())
2112 return false;
2113 unsigned SrcBits = SrcVT.getSizeInBits();
2114 unsigned DestBits = DstVT.getSizeInBits();
2115 return (SrcBits == 64 && DestBits == 32);
2116}
2117
2119 EVT SrcVT = Val.getValueType();
2120 // free truncate from vnsrl and vnsra
2121 if (Subtarget.hasVInstructions() &&
2122 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2123 SrcVT.isVector() && VT2.isVector()) {
2124 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2125 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2126 if (SrcBits == DestBits * 2) {
2127 return true;
2128 }
2129 }
2130 return TargetLowering::isTruncateFree(Val, VT2);
2131}
2132
2134 // Zexts are free if they can be combined with a load.
2135 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2136 // poorly with type legalization of compares preferring sext.
2137 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2138 EVT MemVT = LD->getMemoryVT();
2139 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2140 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2141 LD->getExtensionType() == ISD::ZEXTLOAD))
2142 return true;
2143 }
2144
2145 return TargetLowering::isZExtFree(Val, VT2);
2146}
2147
2149 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2150}
2151
2153 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2154}
2155
2157 return Subtarget.hasCTZLike();
2158}
2159
2161 return Subtarget.hasCLZLike();
2162}
2163
2165 const Instruction &AndI) const {
2166 // We expect to be able to match a bit extraction instruction if the Zbs
2167 // extension is supported and the mask is a power of two. However, we
2168 // conservatively return false if the mask would fit in an ANDI instruction,
2169 // on the basis that it's possible the sinking+duplication of the AND in
2170 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2171 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2172 if (!Subtarget.hasBEXTILike())
2173 return false;
2175 if (!Mask)
2176 return false;
2177 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2178}
2179
2181 EVT VT = Y.getValueType();
2182
2183 if (VT.isVector())
2184 return false;
2185
2186 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2187 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2188}
2189
2191 EVT VT = Y.getValueType();
2192
2193 if (!VT.isVector())
2194 return hasAndNotCompare(Y);
2195
2196 return Subtarget.hasStdExtZvkb();
2197}
2198
2200 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2201 if (Subtarget.hasStdExtZbs())
2202 return X.getValueType().isScalarInteger();
2203 auto *C = dyn_cast<ConstantSDNode>(Y);
2204 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2205 if (Subtarget.hasVendorXTHeadBs())
2206 return C != nullptr;
2207 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2208 return C && C->getAPIntValue().ule(10);
2209}
2210
2212 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2213 SDValue Y) const {
2214 if (SelectOpcode != ISD::VSELECT)
2215 return false;
2216
2217 // Only enable for rvv.
2218 if (!VT.isVector() || !Subtarget.hasVInstructions())
2219 return false;
2220
2221 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2222 return false;
2223
2224 return true;
2225}
2226
2228 Type *Ty) const {
2229 assert(Ty->isIntegerTy());
2230
2231 unsigned BitSize = Ty->getIntegerBitWidth();
2232 if (BitSize > Subtarget.getXLen())
2233 return false;
2234
2235 // Fast path, assume 32-bit immediates are cheap.
2236 int64_t Val = Imm.getSExtValue();
2237 if (isInt<32>(Val))
2238 return true;
2239
2240 // A constant pool entry may be more aligned than the load we're trying to
2241 // replace. If we don't support unaligned scalar mem, prefer the constant
2242 // pool.
2243 // TODO: Can the caller pass down the alignment?
2244 if (!Subtarget.enableUnalignedScalarMem())
2245 return true;
2246
2247 // Prefer to keep the load if it would require many instructions.
2248 // This uses the same threshold we use for constant pools but doesn't
2249 // check useConstantPoolForLargeInts.
2250 // TODO: Should we keep the load only when we're definitely going to emit a
2251 // constant pool?
2252
2254 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2255}
2256
2260 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2261 SelectionDAG &DAG) const {
2262 // One interesting pattern that we'd want to form is 'bit extract':
2263 // ((1 >> Y) & 1) ==/!= 0
2264 // But we also need to be careful not to try to reverse that fold.
2265
2266 // Is this '((1 >> Y) & 1)'?
2267 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2268 return false; // Keep the 'bit extract' pattern.
2269
2270 // Will this be '((1 >> Y) & 1)' after the transform?
2271 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2272 return true; // Do form the 'bit extract' pattern.
2273
2274 // If 'X' is a constant, and we transform, then we will immediately
2275 // try to undo the fold, thus causing endless combine loop.
2276 // So only do the transform if X is not a constant. This matches the default
2277 // implementation of this function.
2278 return !XC;
2279}
2280
2282 unsigned Opc = VecOp.getOpcode();
2283
2284 // Assume target opcodes can't be scalarized.
2285 // TODO - do we have any exceptions?
2286 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2287 return false;
2288
2289 // If the vector op is not supported, try to convert to scalar.
2290 EVT VecVT = VecOp.getValueType();
2292 return true;
2293
2294 // If the vector op is supported, but the scalar op is not, the transform may
2295 // not be worthwhile.
2296 // Permit a vector binary operation can be converted to scalar binary
2297 // operation which is custom lowered with illegal type.
2298 EVT ScalarVT = VecVT.getScalarType();
2299 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2300 isOperationCustom(Opc, ScalarVT);
2301}
2302
2304 const GlobalAddressSDNode *GA) const {
2305 // In order to maximise the opportunity for common subexpression elimination,
2306 // keep a separate ADD node for the global address offset instead of folding
2307 // it in the global address node. Later peephole optimisations may choose to
2308 // fold it back in when profitable.
2309 return false;
2310}
2311
2312// Returns 0-31 if the fli instruction is available for the type and this is
2313// legal FP immediate for the type. Returns -1 otherwise.
2315 if (!Subtarget.hasStdExtZfa())
2316 return -1;
2317
2318 bool IsSupportedVT = false;
2319 if (VT == MVT::f16) {
2320 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2321 } else if (VT == MVT::f32) {
2322 IsSupportedVT = true;
2323 } else if (VT == MVT::f64) {
2324 assert(Subtarget.hasStdExtD() && "Expect D extension");
2325 IsSupportedVT = true;
2326 }
2327
2328 if (!IsSupportedVT)
2329 return -1;
2330
2331 return RISCVLoadFPImm::getLoadFPImm(Imm);
2332}
2333
2335 bool ForCodeSize) const {
2336 bool IsLegalVT = false;
2337 if (VT == MVT::f16)
2338 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2339 else if (VT == MVT::f32)
2340 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2341 else if (VT == MVT::f64)
2342 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2343 else if (VT == MVT::bf16)
2344 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2345
2346 if (!IsLegalVT)
2347 return false;
2348
2349 if (getLegalZfaFPImm(Imm, VT) >= 0)
2350 return true;
2351
2352 // Some constants can be produced by fli+fneg.
2353 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2354 return true;
2355
2356 // Cannot create a 64 bit floating-point immediate value for rv32.
2357 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2358 // td can handle +0.0 or -0.0 already.
2359 // -0.0 can be created by fmv + fneg.
2360 return Imm.isZero();
2361 }
2362
2363 // Special case: fmv + fneg
2364 if (Imm.isNegZero())
2365 return true;
2366
2367 // Building an integer and then converting requires a fmv at the end of
2368 // the integer sequence. The fmv is not required for Zfinx.
2369 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2370 const int Cost =
2371 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2372 Subtarget.getXLen(), Subtarget);
2373 return Cost <= FPImmCost;
2374}
2375
2376// TODO: This is very conservative.
2378 unsigned Index) const {
2380 return false;
2381
2382 // Extracts from index 0 are just subreg extracts.
2383 if (Index == 0)
2384 return true;
2385
2386 // Only support extracting a fixed from a fixed vector for now.
2387 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2388 return false;
2389
2390 EVT EltVT = ResVT.getVectorElementType();
2391 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2392
2393 // The smallest type we can slide is i8.
2394 // TODO: We can extract index 0 from a mask vector without a slide.
2395 if (EltVT == MVT::i1)
2396 return false;
2397
2398 unsigned ResElts = ResVT.getVectorNumElements();
2399 unsigned SrcElts = SrcVT.getVectorNumElements();
2400
2401 unsigned MinVLen = Subtarget.getRealMinVLen();
2402 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2403
2404 // If we're extracting only data from the first VLEN bits of the source
2405 // then we can always do this with an m1 vslidedown.vx. Restricting the
2406 // Index ensures we can use a vslidedown.vi.
2407 // TODO: We can generalize this when the exact VLEN is known.
2408 if (Index + ResElts <= MinVLMAX && Index < 31)
2409 return true;
2410
2411 // Convervatively only handle extracting half of a vector.
2412 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2413 // the upper half of a vector until we have more test coverage.
2414 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2415 // a cheap extract. However, this case is important in practice for
2416 // shuffled extracts of longer vectors. How resolve?
2417 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2418}
2419
2421 CallingConv::ID CC,
2422 EVT VT) const {
2423 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2424 // We might still end up using a GPR but that will be decided based on ABI.
2425 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2426 !Subtarget.hasStdExtZfhminOrZhinxmin())
2427 return MVT::f32;
2428
2429 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2430
2431 return PartVT;
2432}
2433
2434unsigned
2436 std::optional<MVT> RegisterVT) const {
2437 // Pair inline assembly operand
2438 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2439 *RegisterVT == MVT::Untyped)
2440 return 1;
2441
2442 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2443}
2444
2446 CallingConv::ID CC,
2447 EVT VT) const {
2448 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2449 // We might still end up using a GPR but that will be decided based on ABI.
2450 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2451 !Subtarget.hasStdExtZfhminOrZhinxmin())
2452 return 1;
2453
2454 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2455}
2456
2458 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2459 unsigned &NumIntermediates, MVT &RegisterVT) const {
2461 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2462
2463 return NumRegs;
2464}
2465
2466// Changes the condition code and swaps operands if necessary, so the SetCC
2467// operation matches one of the comparisons supported directly by branches
2468// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2469// with 1/-1.
2471 ISD::CondCode &CC, SelectionDAG &DAG,
2472 const RISCVSubtarget &Subtarget) {
2473 // If this is a single bit test that can't be handled by ANDI, shift the
2474 // bit to be tested to the MSB and perform a signed compare with 0.
2475 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2476 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2477 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2478 // XAndesPerf supports branch on test bit.
2479 !Subtarget.hasVendorXAndesPerf()) {
2480 uint64_t Mask = LHS.getConstantOperandVal(1);
2481 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2482 unsigned ShAmt = 0;
2483 if (isPowerOf2_64(Mask)) {
2484 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2485 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2486 } else {
2487 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2488 }
2489
2490 LHS = LHS.getOperand(0);
2491 if (ShAmt != 0)
2492 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2493 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2494 return;
2495 }
2496 }
2497
2498 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2499 int64_t C = RHSC->getSExtValue();
2500 switch (CC) {
2501 default: break;
2502 case ISD::SETGT:
2503 // Convert X > -1 to X >= 0.
2504 if (C == -1) {
2505 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2506 CC = ISD::SETGE;
2507 return;
2508 }
2509 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2510 // We have a branch immediate instruction for SETGE but not SETGT.
2511 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2512 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2513 CC = ISD::SETGE;
2514 return;
2515 }
2516 break;
2517 case ISD::SETLT:
2518 // Convert X < 1 to 0 >= X.
2519 if (C == 1) {
2520 RHS = LHS;
2521 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2522 CC = ISD::SETGE;
2523 return;
2524 }
2525 break;
2526 case ISD::SETUGT:
2527 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2528 // We have a branch immediate instruction for SETUGE but not SETUGT.
2529 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2530 // immediate.
2531 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2532 CC = ISD::SETUGE;
2533 return;
2534 }
2535 break;
2536 }
2537 }
2538
2539 switch (CC) {
2540 default:
2541 break;
2542 case ISD::SETGT:
2543 case ISD::SETLE:
2544 case ISD::SETUGT:
2545 case ISD::SETULE:
2547 std::swap(LHS, RHS);
2548 break;
2549 }
2550}
2551
2553 if (VT.isRISCVVectorTuple()) {
2554 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2555 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2556 return RISCVVType::LMUL_F8;
2557 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2558 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2559 return RISCVVType::LMUL_F4;
2560 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2561 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2562 return RISCVVType::LMUL_F2;
2563 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2564 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2565 return RISCVVType::LMUL_1;
2566 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2567 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2568 return RISCVVType::LMUL_2;
2569 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2570 return RISCVVType::LMUL_4;
2571 llvm_unreachable("Invalid vector tuple type LMUL.");
2572 }
2573
2574 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2575 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2576 if (VT.getVectorElementType() == MVT::i1)
2577 KnownSize *= 8;
2578
2579 switch (KnownSize) {
2580 default:
2581 llvm_unreachable("Invalid LMUL.");
2582 case 8:
2583 return RISCVVType::LMUL_F8;
2584 case 16:
2585 return RISCVVType::LMUL_F4;
2586 case 32:
2587 return RISCVVType::LMUL_F2;
2588 case 64:
2589 return RISCVVType::LMUL_1;
2590 case 128:
2591 return RISCVVType::LMUL_2;
2592 case 256:
2593 return RISCVVType::LMUL_4;
2594 case 512:
2595 return RISCVVType::LMUL_8;
2596 }
2597}
2598
2600 switch (LMul) {
2601 default:
2602 llvm_unreachable("Invalid LMUL.");
2606 case RISCVVType::LMUL_1:
2607 return RISCV::VRRegClassID;
2608 case RISCVVType::LMUL_2:
2609 return RISCV::VRM2RegClassID;
2610 case RISCVVType::LMUL_4:
2611 return RISCV::VRM4RegClassID;
2612 case RISCVVType::LMUL_8:
2613 return RISCV::VRM8RegClassID;
2614 }
2615}
2616
2617unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2618 RISCVVType::VLMUL LMUL = getLMUL(VT);
2619 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2620 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2621 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2622 "Unexpected subreg numbering");
2623 return RISCV::sub_vrm1_0 + Index;
2624 }
2625 if (LMUL == RISCVVType::LMUL_2) {
2626 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2627 "Unexpected subreg numbering");
2628 return RISCV::sub_vrm2_0 + Index;
2629 }
2630 if (LMUL == RISCVVType::LMUL_4) {
2631 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2632 "Unexpected subreg numbering");
2633 return RISCV::sub_vrm4_0 + Index;
2634 }
2635 llvm_unreachable("Invalid vector type.");
2636}
2637
2639 if (VT.isRISCVVectorTuple()) {
2640 unsigned NF = VT.getRISCVVectorTupleNumFields();
2641 unsigned RegsPerField =
2642 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2643 (NF * RISCV::RVVBitsPerBlock));
2644 switch (RegsPerField) {
2645 case 1:
2646 if (NF == 2)
2647 return RISCV::VRN2M1RegClassID;
2648 if (NF == 3)
2649 return RISCV::VRN3M1RegClassID;
2650 if (NF == 4)
2651 return RISCV::VRN4M1RegClassID;
2652 if (NF == 5)
2653 return RISCV::VRN5M1RegClassID;
2654 if (NF == 6)
2655 return RISCV::VRN6M1RegClassID;
2656 if (NF == 7)
2657 return RISCV::VRN7M1RegClassID;
2658 if (NF == 8)
2659 return RISCV::VRN8M1RegClassID;
2660 break;
2661 case 2:
2662 if (NF == 2)
2663 return RISCV::VRN2M2RegClassID;
2664 if (NF == 3)
2665 return RISCV::VRN3M2RegClassID;
2666 if (NF == 4)
2667 return RISCV::VRN4M2RegClassID;
2668 break;
2669 case 4:
2670 assert(NF == 2);
2671 return RISCV::VRN2M4RegClassID;
2672 default:
2673 break;
2674 }
2675 llvm_unreachable("Invalid vector tuple type RegClass.");
2676 }
2677
2678 if (VT.getVectorElementType() == MVT::i1)
2679 return RISCV::VRRegClassID;
2680 return getRegClassIDForLMUL(getLMUL(VT));
2681}
2682
2683// Attempt to decompose a subvector insert/extract between VecVT and
2684// SubVecVT via subregister indices. Returns the subregister index that
2685// can perform the subvector insert/extract with the given element index, as
2686// well as the index corresponding to any leftover subvectors that must be
2687// further inserted/extracted within the register class for SubVecVT.
2688std::pair<unsigned, unsigned>
2690 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2691 const RISCVRegisterInfo *TRI) {
2692 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2693 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2694 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2695 "Register classes not ordered");
2696 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2697 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2698
2699 // If VecVT is a vector tuple type, either it's the tuple type with same
2700 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2701 if (VecVT.isRISCVVectorTuple()) {
2702 if (VecRegClassID == SubRegClassID)
2703 return {RISCV::NoSubRegister, 0};
2704
2705 assert(SubVecVT.isScalableVector() &&
2706 "Only allow scalable vector subvector.");
2707 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2708 "Invalid vector tuple insert/extract for vector and subvector with "
2709 "different LMUL.");
2710 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2711 }
2712
2713 // Try to compose a subregister index that takes us from the incoming
2714 // LMUL>1 register class down to the outgoing one. At each step we half
2715 // the LMUL:
2716 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2717 // Note that this is not guaranteed to find a subregister index, such as
2718 // when we are extracting from one VR type to another.
2719 unsigned SubRegIdx = RISCV::NoSubRegister;
2720 for (const unsigned RCID :
2721 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2722 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2723 VecVT = VecVT.getHalfNumVectorElementsVT();
2724 bool IsHi =
2725 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2726 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2727 getSubregIndexByMVT(VecVT, IsHi));
2728 if (IsHi)
2729 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2730 }
2731 return {SubRegIdx, InsertExtractIdx};
2732}
2733
2734// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2735// stores for those types.
2736bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2737 return !Subtarget.useRVVForFixedLengthVectors() ||
2738 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2739}
2740
2742 if (!ScalarTy.isSimple())
2743 return false;
2744 switch (ScalarTy.getSimpleVT().SimpleTy) {
2745 case MVT::iPTR:
2746 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2747 case MVT::i8:
2748 case MVT::i16:
2749 case MVT::i32:
2750 return Subtarget.hasVInstructions();
2751 case MVT::i64:
2752 return Subtarget.hasVInstructionsI64();
2753 case MVT::f16:
2754 return Subtarget.hasVInstructionsF16Minimal();
2755 case MVT::bf16:
2756 return Subtarget.hasVInstructionsBF16Minimal();
2757 case MVT::f32:
2758 return Subtarget.hasVInstructionsF32();
2759 case MVT::f64:
2760 return Subtarget.hasVInstructionsF64();
2761 default:
2762 return false;
2763 }
2764}
2765
2766
2768 return NumRepeatedDivisors;
2769}
2770
2772 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2773 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2774 "Unexpected opcode");
2775 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2776 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2778 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2779 if (!II)
2780 return SDValue();
2781 return Op.getOperand(II->VLOperand + 1 + HasChain);
2782}
2783
2785 const RISCVSubtarget &Subtarget) {
2786 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2787 if (!Subtarget.useRVVForFixedLengthVectors())
2788 return false;
2789
2790 // We only support a set of vector types with a consistent maximum fixed size
2791 // across all supported vector element types to avoid legalization issues.
2792 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2793 // fixed-length vector type we support is 1024 bytes.
2794 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
2795 return false;
2796
2797 unsigned MinVLen = Subtarget.getRealMinVLen();
2798
2799 MVT EltVT = VT.getVectorElementType();
2800
2801 // Don't use RVV for vectors we cannot scalarize if required.
2802 switch (EltVT.SimpleTy) {
2803 // i1 is supported but has different rules.
2804 default:
2805 return false;
2806 case MVT::i1:
2807 // Masks can only use a single register.
2808 if (VT.getVectorNumElements() > MinVLen)
2809 return false;
2810 MinVLen /= 8;
2811 break;
2812 case MVT::i8:
2813 case MVT::i16:
2814 case MVT::i32:
2815 break;
2816 case MVT::i64:
2817 if (!Subtarget.hasVInstructionsI64())
2818 return false;
2819 break;
2820 case MVT::f16:
2821 if (!Subtarget.hasVInstructionsF16Minimal())
2822 return false;
2823 break;
2824 case MVT::bf16:
2825 if (!Subtarget.hasVInstructionsBF16Minimal())
2826 return false;
2827 break;
2828 case MVT::f32:
2829 if (!Subtarget.hasVInstructionsF32())
2830 return false;
2831 break;
2832 case MVT::f64:
2833 if (!Subtarget.hasVInstructionsF64())
2834 return false;
2835 break;
2836 }
2837
2838 // Reject elements larger than ELEN.
2839 if (EltVT.getSizeInBits() > Subtarget.getELen())
2840 return false;
2841
2842 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2843 // Don't use RVV for types that don't fit.
2844 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2845 return false;
2846
2847 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2848 // the base fixed length RVV support in place.
2849 if (!VT.isPow2VectorType())
2850 return false;
2851
2852 return true;
2853}
2854
2855bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2856 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2857}
2858
2859// Return the largest legal scalable vector type that matches VT's element type.
2861 const RISCVSubtarget &Subtarget) {
2862 // This may be called before legal types are setup.
2863 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2864 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2865 "Expected legal fixed length vector!");
2866
2867 unsigned MinVLen = Subtarget.getRealMinVLen();
2868 unsigned MaxELen = Subtarget.getELen();
2869
2870 MVT EltVT = VT.getVectorElementType();
2871 switch (EltVT.SimpleTy) {
2872 default:
2873 llvm_unreachable("unexpected element type for RVV container");
2874 case MVT::i1:
2875 case MVT::i8:
2876 case MVT::i16:
2877 case MVT::i32:
2878 case MVT::i64:
2879 case MVT::bf16:
2880 case MVT::f16:
2881 case MVT::f32:
2882 case MVT::f64: {
2883 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2884 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2885 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2886 unsigned NumElts =
2888 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2889 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2890 return MVT::getScalableVectorVT(EltVT, NumElts);
2891 }
2892 }
2893}
2894
2896 const RISCVSubtarget &Subtarget) {
2898 Subtarget);
2899}
2900
2902 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2903}
2904
2905// Grow V to consume an entire RVV register.
2907 const RISCVSubtarget &Subtarget) {
2908 assert(VT.isScalableVector() &&
2909 "Expected to convert into a scalable vector!");
2910 assert(V.getValueType().isFixedLengthVector() &&
2911 "Expected a fixed length vector operand!");
2912 SDLoc DL(V);
2913 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
2914}
2915
2916// Shrink V so it's just big enough to maintain a VT's worth of data.
2918 const RISCVSubtarget &Subtarget) {
2920 "Expected to convert into a fixed length vector!");
2921 assert(V.getValueType().isScalableVector() &&
2922 "Expected a scalable vector operand!");
2923 SDLoc DL(V);
2924 return DAG.getExtractSubvector(DL, VT, V, 0);
2925}
2926
2927/// Return the type of the mask type suitable for masking the provided
2928/// vector type. This is simply an i1 element type vector of the same
2929/// (possibly scalable) length.
2930static MVT getMaskTypeFor(MVT VecVT) {
2931 assert(VecVT.isVector());
2933 return MVT::getVectorVT(MVT::i1, EC);
2934}
2935
2936/// Creates an all ones mask suitable for masking a vector of type VecTy with
2937/// vector length VL. .
2938static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2939 SelectionDAG &DAG) {
2940 MVT MaskVT = getMaskTypeFor(VecVT);
2941 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2942}
2943
2944static std::pair<SDValue, SDValue>
2946 const RISCVSubtarget &Subtarget) {
2947 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2948 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2949 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2950 return {Mask, VL};
2951}
2952
2953static std::pair<SDValue, SDValue>
2954getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2955 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2956 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2957 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2958 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2959 return {Mask, VL};
2960}
2961
2962// Gets the two common "VL" operands: an all-ones mask and the vector length.
2963// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2964// the vector type that the fixed-length vector is contained in. Otherwise if
2965// VecVT is scalable, then ContainerVT should be the same as VecVT.
2966static std::pair<SDValue, SDValue>
2967getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2968 const RISCVSubtarget &Subtarget) {
2969 if (VecVT.isFixedLengthVector())
2970 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2971 Subtarget);
2972 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2973 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2974}
2975
2977 SelectionDAG &DAG) const {
2978 assert(VecVT.isScalableVector() && "Expected scalable vector");
2979 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2980 VecVT.getVectorElementCount());
2981}
2982
2983std::pair<unsigned, unsigned>
2985 const RISCVSubtarget &Subtarget) {
2986 assert(VecVT.isScalableVector() && "Expected scalable vector");
2987
2988 unsigned EltSize = VecVT.getScalarSizeInBits();
2989 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2990
2991 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2992 unsigned MaxVLMAX =
2993 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2994
2995 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2996 unsigned MinVLMAX =
2997 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2998
2999 return std::make_pair(MinVLMAX, MaxVLMAX);
3000}
3001
3002// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3003// of either is (currently) supported. This can get us into an infinite loop
3004// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3005// as a ..., etc.
3006// Until either (or both) of these can reliably lower any node, reporting that
3007// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3008// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3009// which is not desirable.
3011 EVT VT, unsigned DefinedValues) const {
3012 return false;
3013}
3014
3016 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3017 // implementation-defined.
3018 if (!VT.isVector())
3020 unsigned DLenFactor = Subtarget.getDLenFactor();
3021 unsigned Cost;
3022 if (VT.isScalableVector()) {
3023 unsigned LMul;
3024 bool Fractional;
3025 std::tie(LMul, Fractional) =
3027 if (Fractional)
3028 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3029 else
3030 Cost = (LMul * DLenFactor);
3031 } else {
3032 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3033 }
3034 return Cost;
3035}
3036
3037
3038/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3039/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3040/// be by default. VRGatherCostModel reflects available options. Note that
3041/// operand (index and possibly mask) are handled separately.
3043 auto LMULCost = getLMULCost(VT);
3044 bool Log2CostModel =
3045 Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
3046 if (Log2CostModel && LMULCost.isValid()) {
3047 unsigned Log = Log2_64(LMULCost.getValue());
3048 if (Log > 0)
3049 return LMULCost * Log;
3050 }
3051 return LMULCost * LMULCost;
3052}
3053
3054/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3055/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3056/// or may track the vrgather.vv cost. It is implementation-dependent.
3060
3061/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3062/// for the type VT. (This does not cover the vslide1up or vslide1down
3063/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3064/// or may track the vrgather.vv cost. It is implementation-dependent.
3068
3069/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3070/// for the type VT. (This does not cover the vslide1up or vslide1down
3071/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3072/// or may track the vrgather.vv cost. It is implementation-dependent.
3076
3078 const RISCVSubtarget &Subtarget) {
3079 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3080 // bf16 conversions are always promoted to f32.
3081 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3082 Op.getValueType() == MVT::bf16) {
3083 bool IsStrict = Op->isStrictFPOpcode();
3084
3085 SDLoc DL(Op);
3086 if (IsStrict) {
3087 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3088 {Op.getOperand(0), Op.getOperand(1)});
3089 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3090 {Op.getValueType(), MVT::Other},
3091 {Val.getValue(1), Val.getValue(0),
3092 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3093 }
3094 return DAG.getNode(
3095 ISD::FP_ROUND, DL, Op.getValueType(),
3096 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3097 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3098 }
3099
3100 // Other operations are legal.
3101 return Op;
3102}
3103
3105 const RISCVSubtarget &Subtarget) {
3106 // RISC-V FP-to-int conversions saturate to the destination register size, but
3107 // don't produce 0 for nan. We can use a conversion instruction and fix the
3108 // nan case with a compare and a select.
3109 SDValue Src = Op.getOperand(0);
3110
3111 MVT DstVT = Op.getSimpleValueType();
3112 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3113
3114 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3115
3116 if (!DstVT.isVector()) {
3117 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3118 // the result.
3119 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3120 Src.getValueType() == MVT::bf16) {
3121 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3122 }
3123
3124 unsigned Opc;
3125 if (SatVT == DstVT)
3126 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3127 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3128 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3129 else
3130 return SDValue();
3131 // FIXME: Support other SatVTs by clamping before or after the conversion.
3132
3133 SDLoc DL(Op);
3134 SDValue FpToInt = DAG.getNode(
3135 Opc, DL, DstVT, Src,
3137
3138 if (Opc == RISCVISD::FCVT_WU_RV64)
3139 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3140
3141 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3142 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3144 }
3145
3146 // Vectors.
3147
3148 MVT DstEltVT = DstVT.getVectorElementType();
3149 MVT SrcVT = Src.getSimpleValueType();
3150 MVT SrcEltVT = SrcVT.getVectorElementType();
3151 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3152 unsigned DstEltSize = DstEltVT.getSizeInBits();
3153
3154 // Only handle saturating to the destination type.
3155 if (SatVT != DstEltVT)
3156 return SDValue();
3157
3158 MVT DstContainerVT = DstVT;
3159 MVT SrcContainerVT = SrcVT;
3160 if (DstVT.isFixedLengthVector()) {
3161 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3162 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3163 assert(DstContainerVT.getVectorElementCount() ==
3164 SrcContainerVT.getVectorElementCount() &&
3165 "Expected same element count");
3166 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3167 }
3168
3169 SDLoc DL(Op);
3170
3171 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3172
3173 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3174 {Src, Src, DAG.getCondCode(ISD::SETNE),
3175 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3176
3177 // Need to widen by more than 1 step, promote the FP type, then do a widening
3178 // convert.
3179 if (DstEltSize > (2 * SrcEltSize)) {
3180 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3181 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3182 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3183 }
3184
3185 MVT CvtContainerVT = DstContainerVT;
3186 MVT CvtEltVT = DstEltVT;
3187 if (SrcEltSize > (2 * DstEltSize)) {
3188 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3189 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3190 }
3191
3192 unsigned RVVOpc =
3193 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3194 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3195
3196 while (CvtContainerVT != DstContainerVT) {
3197 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3198 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3199 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3200 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3201 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3202 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3203 }
3204
3205 SDValue SplatZero = DAG.getNode(
3206 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3207 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3208 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3209 Res, DAG.getUNDEF(DstContainerVT), VL);
3210
3211 if (DstVT.isFixedLengthVector())
3212 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3213
3214 return Res;
3215}
3216
3218 const RISCVSubtarget &Subtarget) {
3219 bool IsStrict = Op->isStrictFPOpcode();
3220 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3221
3222 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3223 // bf16 conversions are always promoted to f32.
3224 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3225 SrcVal.getValueType() == MVT::bf16) {
3226 SDLoc DL(Op);
3227 if (IsStrict) {
3228 SDValue Ext =
3229 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3230 {Op.getOperand(0), SrcVal});
3231 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3232 {Ext.getValue(1), Ext.getValue(0)});
3233 }
3234 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3235 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3236 }
3237
3238 // Other operations are legal.
3239 return Op;
3240}
3241
3243 switch (Opc) {
3244 case ISD::FROUNDEVEN:
3246 case ISD::VP_FROUNDEVEN:
3247 return RISCVFPRndMode::RNE;
3248 case ISD::FTRUNC:
3249 case ISD::STRICT_FTRUNC:
3250 case ISD::VP_FROUNDTOZERO:
3251 return RISCVFPRndMode::RTZ;
3252 case ISD::FFLOOR:
3253 case ISD::STRICT_FFLOOR:
3254 case ISD::VP_FFLOOR:
3255 return RISCVFPRndMode::RDN;
3256 case ISD::FCEIL:
3257 case ISD::STRICT_FCEIL:
3258 case ISD::VP_FCEIL:
3259 return RISCVFPRndMode::RUP;
3260 case ISD::FROUND:
3261 case ISD::LROUND:
3262 case ISD::LLROUND:
3263 case ISD::STRICT_FROUND:
3264 case ISD::STRICT_LROUND:
3266 case ISD::VP_FROUND:
3267 return RISCVFPRndMode::RMM;
3268 case ISD::FRINT:
3269 case ISD::LRINT:
3270 case ISD::LLRINT:
3271 case ISD::STRICT_FRINT:
3272 case ISD::STRICT_LRINT:
3273 case ISD::STRICT_LLRINT:
3274 case ISD::VP_FRINT:
3275 case ISD::VP_LRINT:
3276 case ISD::VP_LLRINT:
3277 return RISCVFPRndMode::DYN;
3278 }
3279
3281}
3282
3283// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3284// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3285// the integer domain and back. Taking care to avoid converting values that are
3286// nan or already correct.
3287static SDValue
3289 const RISCVSubtarget &Subtarget) {
3290 MVT VT = Op.getSimpleValueType();
3291 assert(VT.isVector() && "Unexpected type");
3292
3293 SDLoc DL(Op);
3294
3295 SDValue Src = Op.getOperand(0);
3296
3297 // Freeze the source since we are increasing the number of uses.
3298 Src = DAG.getFreeze(Src);
3299
3300 MVT ContainerVT = VT;
3301 if (VT.isFixedLengthVector()) {
3302 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3303 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3304 }
3305
3306 SDValue Mask, VL;
3307 if (Op->isVPOpcode()) {
3308 Mask = Op.getOperand(1);
3309 if (VT.isFixedLengthVector())
3310 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3311 Subtarget);
3312 VL = Op.getOperand(2);
3313 } else {
3314 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3315 }
3316
3317 // We do the conversion on the absolute value and fix the sign at the end.
3318 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3319
3320 // Determine the largest integer that can be represented exactly. This and
3321 // values larger than it don't have any fractional bits so don't need to
3322 // be converted.
3323 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3324 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3325 APFloat MaxVal = APFloat(FltSem);
3326 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3327 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3328 SDValue MaxValNode =
3329 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3330 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3331 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3332
3333 // If abs(Src) was larger than MaxVal or nan, keep it.
3334 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3335 Mask =
3336 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3337 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3338 Mask, Mask, VL});
3339
3340 // Truncate to integer and convert back to FP.
3341 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3342 MVT XLenVT = Subtarget.getXLenVT();
3343 SDValue Truncated;
3344
3345 switch (Op.getOpcode()) {
3346 default:
3347 llvm_unreachable("Unexpected opcode");
3348 case ISD::FRINT:
3349 case ISD::VP_FRINT:
3350 case ISD::FCEIL:
3351 case ISD::VP_FCEIL:
3352 case ISD::FFLOOR:
3353 case ISD::VP_FFLOOR:
3354 case ISD::FROUND:
3355 case ISD::FROUNDEVEN:
3356 case ISD::VP_FROUND:
3357 case ISD::VP_FROUNDEVEN:
3358 case ISD::VP_FROUNDTOZERO: {
3361 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3362 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3363 break;
3364 }
3365 case ISD::FTRUNC:
3366 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3367 Mask, VL);
3368 break;
3369 case ISD::FNEARBYINT:
3370 case ISD::VP_FNEARBYINT:
3371 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3372 Mask, VL);
3373 break;
3374 }
3375
3376 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3377 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3378 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3379 Mask, VL);
3380
3381 // Restore the original sign so that -0.0 is preserved.
3382 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3383 Src, Src, Mask, VL);
3384
3385 if (!VT.isFixedLengthVector())
3386 return Truncated;
3387
3388 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3389}
3390
3391// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3392// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3393// qNan and converting the new source to integer and back to FP.
3394static SDValue
3396 const RISCVSubtarget &Subtarget) {
3397 SDLoc DL(Op);
3398 MVT VT = Op.getSimpleValueType();
3399 SDValue Chain = Op.getOperand(0);
3400 SDValue Src = Op.getOperand(1);
3401
3402 MVT ContainerVT = VT;
3403 if (VT.isFixedLengthVector()) {
3404 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3405 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3406 }
3407
3408 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3409
3410 // Freeze the source since we are increasing the number of uses.
3411 Src = DAG.getFreeze(Src);
3412
3413 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3414 MVT MaskVT = Mask.getSimpleValueType();
3415 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3416 DAG.getVTList(MaskVT, MVT::Other),
3417 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3418 DAG.getUNDEF(MaskVT), Mask, VL});
3419 Chain = Unorder.getValue(1);
3420 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3421 DAG.getVTList(ContainerVT, MVT::Other),
3422 {Chain, Src, Src, Src, Unorder, VL});
3423 Chain = Src.getValue(1);
3424
3425 // We do the conversion on the absolute value and fix the sign at the end.
3426 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3427
3428 // Determine the largest integer that can be represented exactly. This and
3429 // values larger than it don't have any fractional bits so don't need to
3430 // be converted.
3431 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3432 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3433 APFloat MaxVal = APFloat(FltSem);
3434 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3435 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3436 SDValue MaxValNode =
3437 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3438 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3439 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3440
3441 // If abs(Src) was larger than MaxVal or nan, keep it.
3442 Mask = DAG.getNode(
3443 RISCVISD::SETCC_VL, DL, MaskVT,
3444 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3445
3446 // Truncate to integer and convert back to FP.
3447 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3448 MVT XLenVT = Subtarget.getXLenVT();
3449 SDValue Truncated;
3450
3451 switch (Op.getOpcode()) {
3452 default:
3453 llvm_unreachable("Unexpected opcode");
3454 case ISD::STRICT_FCEIL:
3455 case ISD::STRICT_FFLOOR:
3456 case ISD::STRICT_FROUND:
3460 Truncated = DAG.getNode(
3461 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3462 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3463 break;
3464 }
3465 case ISD::STRICT_FTRUNC:
3466 Truncated =
3467 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3468 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3469 break;
3471 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3472 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3473 Mask, VL);
3474 break;
3475 }
3476 Chain = Truncated.getValue(1);
3477
3478 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3479 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3480 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3481 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3482 Truncated, Mask, VL);
3483 Chain = Truncated.getValue(1);
3484 }
3485
3486 // Restore the original sign so that -0.0 is preserved.
3487 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3488 Src, Src, Mask, VL);
3489
3490 if (VT.isFixedLengthVector())
3491 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3492 return DAG.getMergeValues({Truncated, Chain}, DL);
3493}
3494
3495static SDValue
3497 const RISCVSubtarget &Subtarget) {
3498 MVT VT = Op.getSimpleValueType();
3499 if (VT.isVector())
3500 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3501
3502 if (DAG.shouldOptForSize())
3503 return SDValue();
3504
3505 SDLoc DL(Op);
3506 SDValue Src = Op.getOperand(0);
3507
3508 // Create an integer the size of the mantissa with the MSB set. This and all
3509 // values larger than it don't have any fractional bits so don't need to be
3510 // converted.
3511 const fltSemantics &FltSem = VT.getFltSemantics();
3512 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3513 APFloat MaxVal = APFloat(FltSem);
3514 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3515 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3516 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3517
3519 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3520 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3521}
3522
3523// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3525 const RISCVSubtarget &Subtarget) {
3526 SDLoc DL(Op);
3527 MVT DstVT = Op.getSimpleValueType();
3528 SDValue Src = Op.getOperand(0);
3529 MVT SrcVT = Src.getSimpleValueType();
3530 assert(SrcVT.isVector() && DstVT.isVector() &&
3531 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3532 "Unexpected type");
3533
3534 MVT DstContainerVT = DstVT;
3535 MVT SrcContainerVT = SrcVT;
3536
3537 if (DstVT.isFixedLengthVector()) {
3538 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3539 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3540 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3541 }
3542
3543 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3544
3545 // [b]f16 -> f32
3546 MVT SrcElemType = SrcVT.getVectorElementType();
3547 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3548 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3549 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3550 }
3551
3552 SDValue Res =
3553 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3554 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3555 Subtarget.getXLenVT()),
3556 VL);
3557
3558 if (!DstVT.isFixedLengthVector())
3559 return Res;
3560
3561 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3562}
3563
3564static SDValue
3566 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3567 SDValue Offset, SDValue Mask, SDValue VL,
3569 if (Passthru.isUndef())
3571 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3572 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3573 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3574}
3575
3576static SDValue
3577getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3578 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3579 SDValue VL,
3581 if (Passthru.isUndef())
3583 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3584 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3585 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3586}
3587
3591 int64_t Addend;
3592};
3593
3594static std::optional<APInt> getExactInteger(const APFloat &APF,
3596 // We will use a SINT_TO_FP to materialize this constant so we should use a
3597 // signed APSInt here.
3598 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3599 // We use an arbitrary rounding mode here. If a floating-point is an exact
3600 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3601 // the rounding mode changes the output value, then it is not an exact
3602 // integer.
3604 bool IsExact;
3605 // If it is out of signed integer range, it will return an invalid operation.
3606 // If it is not an exact integer, IsExact is false.
3607 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3609 !IsExact)
3610 return std::nullopt;
3611 return ValInt.extractBits(BitWidth, 0);
3612}
3613
3614// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3615// to the (non-zero) step S and start value X. This can be then lowered as the
3616// RVV sequence (VID * S) + X, for example.
3617// The step S is represented as an integer numerator divided by a positive
3618// denominator. Note that the implementation currently only identifies
3619// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3620// cannot detect 2/3, for example.
3621// Note that this method will also match potentially unappealing index
3622// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3623// determine whether this is worth generating code for.
3624//
3625// EltSizeInBits is the size of the type that the sequence will be calculated
3626// in, i.e. SEW for build_vectors or XLEN for address calculations.
3627static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3628 unsigned EltSizeInBits) {
3629 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3631 return std::nullopt;
3632 bool IsInteger = Op.getValueType().isInteger();
3633
3634 std::optional<unsigned> SeqStepDenom;
3635 std::optional<APInt> SeqStepNum;
3636 std::optional<APInt> SeqAddend;
3637 std::optional<std::pair<APInt, unsigned>> PrevElt;
3638 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3639
3640 // First extract the ops into a list of constant integer values. This may not
3641 // be possible for floats if they're not all representable as integers.
3642 SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3643 const unsigned OpSize = Op.getScalarValueSizeInBits();
3644 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3645 if (Elt.isUndef()) {
3646 Elts[Idx] = std::nullopt;
3647 continue;
3648 }
3649 if (IsInteger) {
3650 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3651 } else {
3652 auto ExactInteger =
3653 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3654 if (!ExactInteger)
3655 return std::nullopt;
3656 Elts[Idx] = *ExactInteger;
3657 }
3658 }
3659
3660 for (auto [Idx, Elt] : enumerate(Elts)) {
3661 // Assume undef elements match the sequence; we just have to be careful
3662 // when interpolating across them.
3663 if (!Elt)
3664 continue;
3665
3666 if (PrevElt) {
3667 // Calculate the step since the last non-undef element, and ensure
3668 // it's consistent across the entire sequence.
3669 unsigned IdxDiff = Idx - PrevElt->second;
3670 APInt ValDiff = *Elt - PrevElt->first;
3671
3672 // A zero-value value difference means that we're somewhere in the middle
3673 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3674 // step change before evaluating the sequence.
3675 if (ValDiff == 0)
3676 continue;
3677
3678 int64_t Remainder = ValDiff.srem(IdxDiff);
3679 // Normalize the step if it's greater than 1.
3680 if (Remainder != ValDiff.getSExtValue()) {
3681 // The difference must cleanly divide the element span.
3682 if (Remainder != 0)
3683 return std::nullopt;
3684 ValDiff = ValDiff.sdiv(IdxDiff);
3685 IdxDiff = 1;
3686 }
3687
3688 if (!SeqStepNum)
3689 SeqStepNum = ValDiff;
3690 else if (ValDiff != SeqStepNum)
3691 return std::nullopt;
3692
3693 if (!SeqStepDenom)
3694 SeqStepDenom = IdxDiff;
3695 else if (IdxDiff != *SeqStepDenom)
3696 return std::nullopt;
3697 }
3698
3699 // Record this non-undef element for later.
3700 if (!PrevElt || PrevElt->first != *Elt)
3701 PrevElt = std::make_pair(*Elt, Idx);
3702 }
3703
3704 // We need to have logged a step for this to count as a legal index sequence.
3705 if (!SeqStepNum || !SeqStepDenom)
3706 return std::nullopt;
3707
3708 // Loop back through the sequence and validate elements we might have skipped
3709 // while waiting for a valid step. While doing this, log any sequence addend.
3710 for (auto [Idx, Elt] : enumerate(Elts)) {
3711 if (!Elt)
3712 continue;
3713 APInt ExpectedVal =
3714 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3715 *SeqStepNum)
3716 .sdiv(*SeqStepDenom);
3717
3718 APInt Addend = *Elt - ExpectedVal;
3719 if (!SeqAddend)
3720 SeqAddend = Addend;
3721 else if (Addend != SeqAddend)
3722 return std::nullopt;
3723 }
3724
3725 assert(SeqAddend && "Must have an addend if we have a step");
3726
3727 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3728 SeqAddend->getSExtValue()};
3729}
3730
3731// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3732// and lower it as a VRGATHER_VX_VL from the source vector.
3733static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3734 SelectionDAG &DAG,
3735 const RISCVSubtarget &Subtarget) {
3736 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3737 return SDValue();
3738 SDValue Src = SplatVal.getOperand(0);
3739 // Don't perform this optimization for i1 vectors, or if the element types are
3740 // different
3741 // FIXME: Support i1 vectors, maybe by promoting to i8?
3742 MVT EltTy = VT.getVectorElementType();
3743 if (EltTy == MVT::i1 ||
3744 !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
3745 return SDValue();
3746 MVT SrcVT = Src.getSimpleValueType();
3747 if (EltTy != SrcVT.getVectorElementType())
3748 return SDValue();
3749 SDValue Idx = SplatVal.getOperand(1);
3750 // The index must be a legal type.
3751 if (Idx.getValueType() != Subtarget.getXLenVT())
3752 return SDValue();
3753
3754 // Check that we know Idx lies within VT
3755 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3756 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3757 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3758 return SDValue();
3759 }
3760
3761 // Convert fixed length vectors to scalable
3762 MVT ContainerVT = VT;
3763 if (VT.isFixedLengthVector())
3764 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3765
3766 MVT SrcContainerVT = SrcVT;
3767 if (SrcVT.isFixedLengthVector()) {
3768 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3769 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3770 }
3771
3772 // Put Vec in a VT sized vector
3773 if (SrcContainerVT.getVectorMinNumElements() <
3774 ContainerVT.getVectorMinNumElements())
3775 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
3776 else
3777 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
3778
3779 // We checked that Idx fits inside VT earlier
3780 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3781 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3782 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3783 if (VT.isFixedLengthVector())
3784 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3785 return Gather;
3786}
3787
3789 const RISCVSubtarget &Subtarget) {
3790 MVT VT = Op.getSimpleValueType();
3791 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3792
3793 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3794
3795 SDLoc DL(Op);
3796 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3797
3798 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3799 int64_t StepNumerator = SimpleVID->StepNumerator;
3800 unsigned StepDenominator = SimpleVID->StepDenominator;
3801 int64_t Addend = SimpleVID->Addend;
3802
3803 assert(StepNumerator != 0 && "Invalid step");
3804 bool Negate = false;
3805 int64_t SplatStepVal = StepNumerator;
3806 unsigned StepOpcode = ISD::MUL;
3807 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3808 // anyway as the shift of 63 won't fit in uimm5.
3809 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3810 isPowerOf2_64(std::abs(StepNumerator))) {
3811 Negate = StepNumerator < 0;
3812 StepOpcode = ISD::SHL;
3813 SplatStepVal = Log2_64(std::abs(StepNumerator));
3814 }
3815
3816 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
3817 // since it's the immediate value many RVV instructions accept. There is
3818 // no vmul.vi instruction so ensure multiply constant can fit in a
3819 // single addi instruction. For the addend, we allow up to 32 bits..
3820 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3821 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3822 isPowerOf2_32(StepDenominator) &&
3823 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
3824 MVT VIDVT =
3826 MVT VIDContainerVT =
3827 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3828 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3829 // Convert right out of the scalable type so we can use standard ISD
3830 // nodes for the rest of the computation. If we used scalable types with
3831 // these, we'd lose the fixed-length vector info and generate worse
3832 // vsetvli code.
3833 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3834 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3835 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3836 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3837 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3838 }
3839 if (StepDenominator != 1) {
3840 SDValue SplatStep =
3841 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3842 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3843 }
3844 if (Addend != 0 || Negate) {
3845 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3846 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3847 VID);
3848 }
3849 if (VT.isFloatingPoint()) {
3850 // TODO: Use vfwcvt to reduce register pressure.
3851 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3852 }
3853 return VID;
3854 }
3855 }
3856
3857 return SDValue();
3858}
3859
3860/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3861/// which constitute a large proportion of the elements. In such cases we can
3862/// splat a vector with the dominant element and make up the shortfall with
3863/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3864/// Note that this includes vectors of 2 elements by association. The
3865/// upper-most element is the "dominant" one, allowing us to use a splat to
3866/// "insert" the upper element, and an insert of the lower element at position
3867/// 0, which improves codegen.
3869 const RISCVSubtarget &Subtarget) {
3870 MVT VT = Op.getSimpleValueType();
3871 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3872
3873 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3874
3875 SDLoc DL(Op);
3876 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3877
3878 MVT XLenVT = Subtarget.getXLenVT();
3879 unsigned NumElts = Op.getNumOperands();
3880
3881 SDValue DominantValue;
3882 unsigned MostCommonCount = 0;
3883 DenseMap<SDValue, unsigned> ValueCounts;
3884 unsigned NumUndefElts =
3885 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3886
3887 // Track the number of scalar loads we know we'd be inserting, estimated as
3888 // any non-zero floating-point constant. Other kinds of element are either
3889 // already in registers or are materialized on demand. The threshold at which
3890 // a vector load is more desirable than several scalar materializion and
3891 // vector-insertion instructions is not known.
3892 unsigned NumScalarLoads = 0;
3893
3894 for (SDValue V : Op->op_values()) {
3895 if (V.isUndef())
3896 continue;
3897
3898 unsigned &Count = ValueCounts[V];
3899 if (0 == Count)
3900 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3901 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3902
3903 // Is this value dominant? In case of a tie, prefer the highest element as
3904 // it's cheaper to insert near the beginning of a vector than it is at the
3905 // end.
3906 if (++Count >= MostCommonCount) {
3907 DominantValue = V;
3908 MostCommonCount = Count;
3909 }
3910 }
3911
3912 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3913 unsigned NumDefElts = NumElts - NumUndefElts;
3914 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3915
3916 // Don't perform this optimization when optimizing for size, since
3917 // materializing elements and inserting them tends to cause code bloat.
3918 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3919 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3920 ((MostCommonCount > DominantValueCountThreshold) ||
3921 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3922 // Start by splatting the most common element.
3923 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3924
3925 DenseSet<SDValue> Processed{DominantValue};
3926
3927 // We can handle an insert into the last element (of a splat) via
3928 // v(f)slide1down. This is slightly better than the vslideup insert
3929 // lowering as it avoids the need for a vector group temporary. It
3930 // is also better than using vmerge.vx as it avoids the need to
3931 // materialize the mask in a vector register.
3932 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3933 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3934 LastOp != DominantValue) {
3935 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3936 auto OpCode =
3937 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3938 if (!VT.isFloatingPoint())
3939 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3940 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3941 LastOp, Mask, VL);
3942 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3943 Processed.insert(LastOp);
3944 }
3945
3946 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3947 for (const auto &OpIdx : enumerate(Op->ops())) {
3948 const SDValue &V = OpIdx.value();
3949 if (V.isUndef() || !Processed.insert(V).second)
3950 continue;
3951 if (ValueCounts[V] == 1) {
3952 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
3953 } else {
3954 // Blend in all instances of this value using a VSELECT, using a
3955 // mask where each bit signals whether that element is the one
3956 // we're after.
3958 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3959 return DAG.getConstant(V == V1, DL, XLenVT);
3960 });
3961 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3962 DAG.getBuildVector(SelMaskTy, DL, Ops),
3963 DAG.getSplatBuildVector(VT, DL, V), Vec);
3964 }
3965 }
3966
3967 return Vec;
3968 }
3969
3970 return SDValue();
3971}
3972
3974 const RISCVSubtarget &Subtarget) {
3975 MVT VT = Op.getSimpleValueType();
3976 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3977
3978 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3979
3980 SDLoc DL(Op);
3981 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3982
3983 MVT XLenVT = Subtarget.getXLenVT();
3984 unsigned NumElts = Op.getNumOperands();
3985
3986 if (VT.getVectorElementType() == MVT::i1) {
3987 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3988 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3989 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3990 }
3991
3992 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3993 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3994 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3995 }
3996
3997 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3998 // scalar integer chunks whose bit-width depends on the number of mask
3999 // bits and XLEN.
4000 // First, determine the most appropriate scalar integer type to use. This
4001 // is at most XLenVT, but may be shrunk to a smaller vector element type
4002 // according to the size of the final vector - use i8 chunks rather than
4003 // XLenVT if we're producing a v8i1. This results in more consistent
4004 // codegen across RV32 and RV64.
4005 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4006 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4007 // If we have to use more than one INSERT_VECTOR_ELT then this
4008 // optimization is likely to increase code size; avoid performing it in
4009 // such a case. We can use a load from a constant pool in this case.
4010 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4011 return SDValue();
4012 // Now we can create our integer vector type. Note that it may be larger
4013 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4014 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4015 MVT IntegerViaVecVT =
4016 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4017 IntegerViaVecElts);
4018
4019 uint64_t Bits = 0;
4020 unsigned BitPos = 0, IntegerEltIdx = 0;
4021 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4022
4023 for (unsigned I = 0; I < NumElts;) {
4024 SDValue V = Op.getOperand(I);
4025 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4026 Bits |= ((uint64_t)BitValue << BitPos);
4027 ++BitPos;
4028 ++I;
4029
4030 // Once we accumulate enough bits to fill our scalar type or process the
4031 // last element, insert into our vector and clear our accumulated data.
4032 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4033 if (NumViaIntegerBits <= 32)
4034 Bits = SignExtend64<32>(Bits);
4035 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4036 Elts[IntegerEltIdx] = Elt;
4037 Bits = 0;
4038 BitPos = 0;
4039 IntegerEltIdx++;
4040 }
4041 }
4042
4043 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4044
4045 if (NumElts < NumViaIntegerBits) {
4046 // If we're producing a smaller vector than our minimum legal integer
4047 // type, bitcast to the equivalent (known-legal) mask type, and extract
4048 // our final mask.
4049 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4050 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4051 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4052 } else {
4053 // Else we must have produced an integer type with the same size as the
4054 // mask type; bitcast for the final result.
4055 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4056 Vec = DAG.getBitcast(VT, Vec);
4057 }
4058
4059 return Vec;
4060 }
4061
4063 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4064 : RISCVISD::VMV_V_X_VL;
4065 if (!VT.isFloatingPoint())
4066 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4067 Splat =
4068 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4069 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4070 }
4071
4072 // Try and match index sequences, which we can lower to the vid instruction
4073 // with optional modifications. An all-undef vector is matched by
4074 // getSplatValue, above.
4075 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4076 return Res;
4077
4078 // For very small build_vectors, use a single scalar insert of a constant.
4079 // TODO: Base this on constant rematerialization cost, not size.
4080 const unsigned EltBitSize = VT.getScalarSizeInBits();
4081 if (VT.getSizeInBits() <= 32 &&
4083 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4084 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4085 "Unexpected sequence type");
4086 // If we can use the original VL with the modified element type, this
4087 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4088 // be moved into InsertVSETVLI?
4089 unsigned ViaVecLen =
4090 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4091 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4092
4093 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4094 uint64_t SplatValue = 0;
4095 // Construct the amalgamated value at this larger vector type.
4096 for (const auto &OpIdx : enumerate(Op->op_values())) {
4097 const auto &SeqV = OpIdx.value();
4098 if (!SeqV.isUndef())
4099 SplatValue |=
4100 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4101 }
4102
4103 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4104 // achieve better constant materializion.
4105 // On RV32, we need to sign-extend to use getSignedConstant.
4106 if (ViaIntVT == MVT::i32)
4107 SplatValue = SignExtend64<32>(SplatValue);
4108
4109 SDValue Vec = DAG.getInsertVectorElt(
4110 DL, DAG.getUNDEF(ViaVecVT),
4111 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4112 if (ViaVecLen != 1)
4113 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4114 return DAG.getBitcast(VT, Vec);
4115 }
4116
4117
4118 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4119 // when re-interpreted as a vector with a larger element type. For example,
4120 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4121 // could be instead splat as
4122 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4123 // TODO: This optimization could also work on non-constant splats, but it
4124 // would require bit-manipulation instructions to construct the splat value.
4125 SmallVector<SDValue> Sequence;
4126 const auto *BV = cast<BuildVectorSDNode>(Op);
4127 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4129 BV->getRepeatedSequence(Sequence) &&
4130 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4131 unsigned SeqLen = Sequence.size();
4132 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4133 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4134 ViaIntVT == MVT::i64) &&
4135 "Unexpected sequence type");
4136
4137 // If we can use the original VL with the modified element type, this
4138 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4139 // be moved into InsertVSETVLI?
4140 const unsigned RequiredVL = NumElts / SeqLen;
4141 const unsigned ViaVecLen =
4142 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4143 NumElts : RequiredVL;
4144 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4145
4146 unsigned EltIdx = 0;
4147 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4148 uint64_t SplatValue = 0;
4149 // Construct the amalgamated value which can be splatted as this larger
4150 // vector type.
4151 for (const auto &SeqV : Sequence) {
4152 if (!SeqV.isUndef())
4153 SplatValue |=
4154 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4155 EltIdx++;
4156 }
4157
4158 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4159 // achieve better constant materializion.
4160 // On RV32, we need to sign-extend to use getSignedConstant.
4161 if (ViaIntVT == MVT::i32)
4162 SplatValue = SignExtend64<32>(SplatValue);
4163
4164 // Since we can't introduce illegal i64 types at this stage, we can only
4165 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4166 // way we can use RVV instructions to splat.
4167 assert((ViaIntVT.bitsLE(XLenVT) ||
4168 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4169 "Unexpected bitcast sequence");
4170 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4171 SDValue ViaVL =
4172 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4173 MVT ViaContainerVT =
4174 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4175 SDValue Splat =
4176 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4177 DAG.getUNDEF(ViaContainerVT),
4178 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4179 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4180 if (ViaVecLen != RequiredVL)
4182 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4183 return DAG.getBitcast(VT, Splat);
4184 }
4185 }
4186
4187 // If the number of signbits allows, see if we can lower as a <N x i8>.
4188 // Our main goal here is to reduce LMUL (and thus work) required to
4189 // build the constant, but we will also narrow if the resulting
4190 // narrow vector is known to materialize cheaply.
4191 // TODO: We really should be costing the smaller vector. There are
4192 // profitable cases this misses.
4193 if (EltBitSize > 8 && VT.isInteger() &&
4194 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4195 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4196 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4197 DL, Op->ops());
4198 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4199 Source, DAG, Subtarget);
4200 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4201 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4202 }
4203
4204 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4205 return Res;
4206
4207 // For constant vectors, use generic constant pool lowering. Otherwise,
4208 // we'd have to materialize constants in GPRs just to move them into the
4209 // vector.
4210 return SDValue();
4211}
4212
4213static unsigned getPACKOpcode(unsigned DestBW,
4214 const RISCVSubtarget &Subtarget) {
4215 switch (DestBW) {
4216 default:
4217 llvm_unreachable("Unsupported pack size");
4218 case 16:
4219 return RISCV::PACKH;
4220 case 32:
4221 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4222 case 64:
4223 assert(Subtarget.is64Bit());
4224 return RISCV::PACK;
4225 }
4226}
4227
4228/// Double the element size of the build vector to reduce the number
4229/// of vslide1down in the build vector chain. In the worst case, this
4230/// trades three scalar operations for 1 vector operation. Scalar
4231/// operations are generally lower latency, and for out-of-order cores
4232/// we also benefit from additional parallelism.
4234 const RISCVSubtarget &Subtarget) {
4235 SDLoc DL(Op);
4236 MVT VT = Op.getSimpleValueType();
4237 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4238 MVT ElemVT = VT.getVectorElementType();
4239 if (!ElemVT.isInteger())
4240 return SDValue();
4241
4242 // TODO: Relax these architectural restrictions, possibly with costing
4243 // of the actual instructions required.
4244 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4245 return SDValue();
4246
4247 unsigned NumElts = VT.getVectorNumElements();
4248 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4249 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4250 NumElts % 2 != 0)
4251 return SDValue();
4252
4253 // Produce [B,A] packed into a type twice as wide. Note that all
4254 // scalars are XLenVT, possibly masked (see below).
4255 MVT XLenVT = Subtarget.getXLenVT();
4256 SDValue Mask = DAG.getConstant(
4257 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4258 auto pack = [&](SDValue A, SDValue B) {
4259 // Bias the scheduling of the inserted operations to near the
4260 // definition of the element - this tends to reduce register
4261 // pressure overall.
4262 SDLoc ElemDL(B);
4263 if (Subtarget.hasStdExtZbkb())
4264 // Note that we're relying on the high bits of the result being
4265 // don't care. For PACKW, the result is *sign* extended.
4266 return SDValue(
4267 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4268 ElemDL, XLenVT, A, B),
4269 0);
4270
4271 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4272 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4273 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4274 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4275 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4277 };
4278
4279 SmallVector<SDValue> NewOperands;
4280 NewOperands.reserve(NumElts / 2);
4281 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4282 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4283 assert(NumElts == NewOperands.size() * 2);
4284 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4285 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4286 return DAG.getNode(ISD::BITCAST, DL, VT,
4287 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4288}
4289
4291 const RISCVSubtarget &Subtarget) {
4292 MVT VT = Op.getSimpleValueType();
4293 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4294
4295 MVT EltVT = VT.getVectorElementType();
4296 MVT XLenVT = Subtarget.getXLenVT();
4297
4298 SDLoc DL(Op);
4299
4300 // Proper support for f16 requires Zvfh. bf16 always requires special
4301 // handling. We need to cast the scalar to integer and create an integer
4302 // build_vector.
4303 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4304 MVT IVT = VT.changeVectorElementType(MVT::i16);
4305 SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4306 for (const auto &[I, U] : enumerate(Op->ops())) {
4307 SDValue Elem = U.get();
4308 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4309 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4310 // Called by LegalizeDAG, we need to use XLenVT operations since we
4311 // can't create illegal types.
4312 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4313 // Manually constant fold so the integer build_vector can be lowered
4314 // better. Waiting for DAGCombine will be too late.
4315 APInt V =
4316 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4317 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4318 } else {
4319 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4320 }
4321 } else {
4322 // Called by scalar type legalizer, we can use i16.
4323 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4324 }
4325 }
4326 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4327 return DAG.getBitcast(VT, Res);
4328 }
4329
4330 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4332 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4333
4334 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4335
4336 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4337
4338 if (VT.getVectorElementType() == MVT::i1) {
4339 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4340 // vector type, we have a legal equivalently-sized i8 type, so we can use
4341 // that.
4342 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4343 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4344
4345 SDValue WideVec;
4347 // For a splat, perform a scalar truncate before creating the wider
4348 // vector.
4349 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4350 DAG.getConstant(1, DL, Splat.getValueType()));
4351 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4352 } else {
4353 SmallVector<SDValue, 8> Ops(Op->op_values());
4354 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4355 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4356 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4357 }
4358
4359 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4360 }
4361
4363 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4364 return Gather;
4365
4366 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4367 // pressure at high LMUL.
4368 if (all_of(Op->ops().drop_front(),
4369 [](const SDUse &U) { return U.get().isUndef(); })) {
4370 unsigned Opc =
4371 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4372 if (!VT.isFloatingPoint())
4373 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4374 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4375 Splat, VL);
4376 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4377 }
4378
4379 unsigned Opc =
4380 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4381 if (!VT.isFloatingPoint())
4382 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4383 Splat =
4384 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4385 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4386 }
4387
4388 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4389 return Res;
4390
4391 // If we're compiling for an exact VLEN value, we can split our work per
4392 // register in the register group.
4393 if (const auto VLen = Subtarget.getRealVLen();
4394 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4395 MVT ElemVT = VT.getVectorElementType();
4396 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4397 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4398 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4399 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4400 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4401
4402 // The following semantically builds up a fixed length concat_vector
4403 // of the component build_vectors. We eagerly lower to scalable and
4404 // insert_subvector here to avoid DAG combining it back to a large
4405 // build_vector.
4406 SmallVector<SDValue> BuildVectorOps(Op->ops());
4407 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4408 SDValue Vec = DAG.getUNDEF(ContainerVT);
4409 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4410 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4411 SDValue SubBV =
4412 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4413 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4414 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4415 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4416 }
4417 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4418 }
4419
4420 // If we're about to resort to vslide1down (or stack usage), pack our
4421 // elements into the widest scalar type we can. This will force a VL/VTYPE
4422 // toggle, but reduces the critical path, the number of vslide1down ops
4423 // required, and possibly enables scalar folds of the values.
4424 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4425 return Res;
4426
4427 // For m1 vectors, if we have non-undef values in both halves of our vector,
4428 // split the vector into low and high halves, build them separately, then
4429 // use a vselect to combine them. For long vectors, this cuts the critical
4430 // path of the vslide1down sequence in half, and gives us an opportunity
4431 // to special case each half independently. Note that we don't change the
4432 // length of the sub-vectors here, so if both fallback to the generic
4433 // vslide1down path, we should be able to fold the vselect into the final
4434 // vslidedown (for the undef tail) for the first half w/ masking.
4435 unsigned NumElts = VT.getVectorNumElements();
4436 unsigned NumUndefElts =
4437 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4438 unsigned NumDefElts = NumElts - NumUndefElts;
4439 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4440 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4441 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4442 SmallVector<SDValue> MaskVals;
4443 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4444 SubVecAOps.reserve(NumElts);
4445 SubVecBOps.reserve(NumElts);
4446 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4447 SDValue Elem = U.get();
4448 if (Idx < NumElts / 2) {
4449 SubVecAOps.push_back(Elem);
4450 SubVecBOps.push_back(UndefElem);
4451 } else {
4452 SubVecAOps.push_back(UndefElem);
4453 SubVecBOps.push_back(Elem);
4454 }
4455 bool SelectMaskVal = (Idx < NumElts / 2);
4456 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4457 }
4458 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4459 MaskVals.size() == NumElts);
4460
4461 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4462 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4463 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4464 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4465 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4466 }
4467
4468 // Cap the cost at a value linear to the number of elements in the vector.
4469 // The default lowering is to use the stack. The vector store + scalar loads
4470 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4471 // being (at least) linear in LMUL. As a result, using the vslidedown
4472 // lowering for every element ends up being VL*LMUL..
4473 // TODO: Should we be directly costing the stack alternative? Doing so might
4474 // give us a more accurate upper bound.
4475 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4476
4477 // TODO: unify with TTI getSlideCost.
4478 InstructionCost PerSlideCost = 1;
4479 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4480 default: break;
4481 case RISCVVType::LMUL_2:
4482 PerSlideCost = 2;
4483 break;
4484 case RISCVVType::LMUL_4:
4485 PerSlideCost = 4;
4486 break;
4487 case RISCVVType::LMUL_8:
4488 PerSlideCost = 8;
4489 break;
4490 }
4491
4492 // TODO: Should we be using the build instseq then cost + evaluate scheme
4493 // we use for integer constants here?
4494 unsigned UndefCount = 0;
4495 for (const SDValue &V : Op->ops()) {
4496 if (V.isUndef()) {
4497 UndefCount++;
4498 continue;
4499 }
4500 if (UndefCount) {
4501 LinearBudget -= PerSlideCost;
4502 UndefCount = 0;
4503 }
4504 LinearBudget -= PerSlideCost;
4505 }
4506 if (UndefCount) {
4507 LinearBudget -= PerSlideCost;
4508 }
4509
4510 if (LinearBudget < 0)
4511 return SDValue();
4512
4513 assert((!VT.isFloatingPoint() ||
4514 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4515 "Illegal type which will result in reserved encoding");
4516
4517 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4518
4519 // General case: splat the first operand and slide other operands down one
4520 // by one to form a vector. Alternatively, if every operand is an
4521 // extraction from element 0 of a vector, we use that vector from the last
4522 // extraction as the start value and slide up instead of slide down. Such that
4523 // (1) we can avoid the initial splat (2) we can turn those vslide1up into
4524 // vslideup of 1 later and eliminate the vector to scalar movement, which is
4525 // something we cannot do with vslide1down/vslidedown.
4526 // Of course, using vslide1up/vslideup might increase the register pressure,
4527 // and that's why we conservatively limit to cases where every operand is an
4528 // extraction from the first element.
4529 SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
4530 SDValue EVec;
4531 bool SlideUp = false;
4532 auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
4533 SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
4534 if (SlideUp)
4535 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4536 Mask, VL, Policy);
4537 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4538 Mask, VL, Policy);
4539 };
4540
4541 // The reason we don't use all_of here is because we're also capturing EVec
4542 // from the last non-undef operand. If the std::execution_policy of the
4543 // underlying std::all_of is anything but std::sequenced_policy we might
4544 // capture the wrong EVec.
4545 for (SDValue V : Operands) {
4546 using namespace SDPatternMatch;
4547 SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
4548 if (!SlideUp)
4549 break;
4550 }
4551
4552 if (SlideUp) {
4553 MVT EVecContainerVT = EVec.getSimpleValueType();
4554 // Make sure the original vector has scalable vector type.
4555 if (EVecContainerVT.isFixedLengthVector()) {
4556 EVecContainerVT =
4557 getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
4558 EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
4559 }
4560
4561 // Adapt EVec's type into ContainerVT.
4562 if (EVecContainerVT.getVectorMinNumElements() <
4563 ContainerVT.getVectorMinNumElements())
4564 EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
4565 else
4566 EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
4567
4568 // Reverse the elements as we're going to slide up from the last element.
4569 std::reverse(Operands.begin(), Operands.end());
4570 }
4571
4572 SDValue Vec;
4573 UndefCount = 0;
4574 for (SDValue V : Operands) {
4575 if (V.isUndef()) {
4576 UndefCount++;
4577 continue;
4578 }
4579
4580 // Start our sequence with either a TA splat or extract source in the
4581 // hopes that hardware is able to recognize there's no dependency on the
4582 // prior value of our temporary register.
4583 if (!Vec) {
4584 if (SlideUp) {
4585 Vec = EVec;
4586 } else {
4587 Vec = DAG.getSplatVector(VT, DL, V);
4588 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4589 }
4590
4591 UndefCount = 0;
4592 continue;
4593 }
4594
4595 if (UndefCount) {
4596 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4597 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4598 VL);
4599 UndefCount = 0;
4600 }
4601
4602 unsigned Opcode;
4603 if (VT.isFloatingPoint())
4604 Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
4605 else
4606 Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
4607
4608 if (!VT.isFloatingPoint())
4609 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4610 Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4611 V, Mask, VL);
4612 }
4613 if (UndefCount) {
4614 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4615 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4616 VL);
4617 }
4618 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4619}
4620
4621static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4623 SelectionDAG &DAG) {
4624 if (!Passthru)
4625 Passthru = DAG.getUNDEF(VT);
4627 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4628 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4629 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4630 // node in order to try and match RVV vector/scalar instructions.
4631 if ((LoC >> 31) == HiC)
4632 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4633
4634 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4635 // VL. This can temporarily increase VL if VL less than VLMAX.
4636 if (LoC == HiC) {
4637 SDValue NewVL;
4638 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4639 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4640 else
4641 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4642 MVT InterVT =
4643 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4644 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4645 DAG.getUNDEF(InterVT), Lo, NewVL);
4646 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4647 }
4648 }
4649
4650 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4651 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4652 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4653 Hi.getConstantOperandVal(1) == 31)
4654 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4655
4656 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4657 // even if it might be sign extended.
4658 if (Hi.isUndef())
4659 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4660
4661 // Fall back to a stack store and stride x0 vector load.
4662 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4663 Hi, VL);
4664}
4665
4666// Called by type legalization to handle splat of i64 on RV32.
4667// FIXME: We can optimize this when the type has sign or zero bits in one
4668// of the halves.
4669static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4670 SDValue Scalar, SDValue VL,
4671 SelectionDAG &DAG) {
4672 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4673 SDValue Lo, Hi;
4674 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4675 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4676}
4677
4678// This function lowers a splat of a scalar operand Splat with the vector
4679// length VL. It ensures the final sequence is type legal, which is useful when
4680// lowering a splat after type legalization.
4681static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4682 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4683 const RISCVSubtarget &Subtarget) {
4684 bool HasPassthru = Passthru && !Passthru.isUndef();
4685 if (!HasPassthru && !Passthru)
4686 Passthru = DAG.getUNDEF(VT);
4687
4688 MVT EltVT = VT.getVectorElementType();
4689 MVT XLenVT = Subtarget.getXLenVT();
4690
4691 if (VT.isFloatingPoint()) {
4692 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4693 EltVT == MVT::bf16) {
4694 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4695 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4696 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4697 else
4698 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4699 MVT IVT = VT.changeVectorElementType(MVT::i16);
4700 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4701 SDValue Splat =
4702 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4703 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4704 }
4705 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4706 }
4707
4708 // Simplest case is that the operand needs to be promoted to XLenVT.
4709 if (Scalar.getValueType().bitsLE(XLenVT)) {
4710 // If the operand is a constant, sign extend to increase our chances
4711 // of being able to use a .vi instruction. ANY_EXTEND would become a
4712 // a zero extend and the simm5 check in isel would fail.
4713 // FIXME: Should we ignore the upper bits in isel instead?
4714 unsigned ExtOpc =
4716 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4717 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4718 }
4719
4720 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4721 "Unexpected scalar for splat lowering!");
4722
4723 if (isOneConstant(VL) && isNullConstant(Scalar))
4724 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4725 DAG.getConstant(0, DL, XLenVT), VL);
4726
4727 // Otherwise use the more complicated splatting algorithm.
4728 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4729}
4730
4731// This function lowers an insert of a scalar operand Scalar into lane
4732// 0 of the vector regardless of the value of VL. The contents of the
4733// remaining lanes of the result vector are unspecified. VL is assumed
4734// to be non-zero.
4736 const SDLoc &DL, SelectionDAG &DAG,
4737 const RISCVSubtarget &Subtarget) {
4738 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4739
4740 const MVT XLenVT = Subtarget.getXLenVT();
4741 SDValue Passthru = DAG.getUNDEF(VT);
4742
4743 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4744 isNullConstant(Scalar.getOperand(1))) {
4745 SDValue ExtractedVal = Scalar.getOperand(0);
4746 // The element types must be the same.
4747 if (ExtractedVal.getValueType().getVectorElementType() ==
4748 VT.getVectorElementType()) {
4749 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4750 MVT ExtractedContainerVT = ExtractedVT;
4751 if (ExtractedContainerVT.isFixedLengthVector()) {
4752 ExtractedContainerVT = getContainerForFixedLengthVector(
4753 DAG, ExtractedContainerVT, Subtarget);
4754 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4755 ExtractedVal, DAG, Subtarget);
4756 }
4757 if (ExtractedContainerVT.bitsLE(VT))
4758 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
4759 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
4760 }
4761 }
4762
4763 if (VT.isFloatingPoint())
4764 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4765 VL);
4766
4767 // Avoid the tricky legalization cases by falling back to using the
4768 // splat code which already handles it gracefully.
4769 if (!Scalar.getValueType().bitsLE(XLenVT))
4770 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4771 DAG.getConstant(1, DL, XLenVT),
4772 VT, DL, DAG, Subtarget);
4773
4774 // If the operand is a constant, sign extend to increase our chances
4775 // of being able to use a .vi instruction. ANY_EXTEND would become a
4776 // a zero extend and the simm5 check in isel would fail.
4777 // FIXME: Should we ignore the upper bits in isel instead?
4778 unsigned ExtOpc =
4780 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4781 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4782 VL);
4783}
4784
4785/// If concat_vector(V1,V2) could be folded away to some existing
4786/// vector source, return it. Note that the source may be larger
4787/// than the requested concat_vector (i.e. a extract_subvector
4788/// might be required.)
4790 EVT VT = V1.getValueType();
4791 assert(VT == V2.getValueType() && "argument types must match");
4792 // Both input must be extracts.
4793 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4795 return SDValue();
4796
4797 // Extracting from the same source.
4798 SDValue Src = V1.getOperand(0);
4799 if (Src != V2.getOperand(0) ||
4800 VT.isScalableVector() != Src.getValueType().isScalableVector())
4801 return SDValue();
4802
4803 // The extracts must extract the two halves of the source.
4804 if (V1.getConstantOperandVal(1) != 0 ||
4806 return SDValue();
4807
4808 return Src;
4809}
4810
4811// Can this shuffle be performed on exactly one (possibly larger) input?
4813
4814 if (V2.isUndef())
4815 return V1;
4816
4817 unsigned NumElts = VT.getVectorNumElements();
4818 // Src needs to have twice the number of elements.
4819 // TODO: Update shuffle lowering to add the extract subvector
4820 if (SDValue Src = foldConcatVector(V1, V2);
4821 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4822 return Src;
4823
4824 return SDValue();
4825}
4826
4827/// Is this shuffle interleaving contiguous elements from one vector into the
4828/// even elements and contiguous elements from another vector into the odd
4829/// elements. \p EvenSrc will contain the element that should be in the first
4830/// even element. \p OddSrc will contain the element that should be in the first
4831/// odd element. These can be the first element in a source or the element half
4832/// way through the source.
4833static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4834 int &OddSrc, const RISCVSubtarget &Subtarget) {
4835 // We need to be able to widen elements to the next larger integer type or
4836 // use the zip2a instruction at e64.
4837 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
4838 !Subtarget.hasVendorXRivosVizip())
4839 return false;
4840
4841 int Size = Mask.size();
4842 int NumElts = VT.getVectorNumElements();
4843 assert(Size == (int)NumElts && "Unexpected mask size");
4844
4845 SmallVector<unsigned, 2> StartIndexes;
4846 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4847 return false;
4848
4849 EvenSrc = StartIndexes[0];
4850 OddSrc = StartIndexes[1];
4851
4852 // One source should be low half of first vector.
4853 if (EvenSrc != 0 && OddSrc != 0)
4854 return false;
4855
4856 // Subvectors will be subtracted from either at the start of the two input
4857 // vectors, or at the start and middle of the first vector if it's an unary
4858 // interleave.
4859 // In both cases, HalfNumElts will be extracted.
4860 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4861 // we'll create an illegal extract_subvector.
4862 // FIXME: We could support other values using a slidedown first.
4863 int HalfNumElts = NumElts / 2;
4864 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4865}
4866
4867/// Is this mask representing a masked combination of two slides?
4869 std::array<std::pair<int, int>, 2> &SrcInfo) {
4870 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
4871 return false;
4872
4873 // Avoid matching vselect idioms
4874 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
4875 return false;
4876 // Prefer vslideup as the second instruction, and identity
4877 // only as the initial instruction.
4878 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
4879 SrcInfo[1].second == 0)
4880 std::swap(SrcInfo[0], SrcInfo[1]);
4881 assert(SrcInfo[0].first != -1 && "Must find one slide");
4882 return true;
4883}
4884
4885// Exactly matches the semantics of a previously existing custom matcher
4886// to allow migration to new matcher without changing output.
4887static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
4888 unsigned NumElts) {
4889 if (SrcInfo[1].first == -1)
4890 return true;
4891 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
4892 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
4893}
4894
4895static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
4896 ArrayRef<int> Mask, unsigned Factor,
4897 bool RequiredPolarity) {
4898 int NumElts = Mask.size();
4899 for (const auto &[Idx, M] : enumerate(Mask)) {
4900 if (M < 0)
4901 continue;
4902 int Src = M >= NumElts;
4903 int Diff = (int)Idx - (M % NumElts);
4904 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
4905 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
4906 "Must match exactly one of the two slides");
4907 if (RequiredPolarity != (C == (Idx / Factor) % 2))
4908 return false;
4909 }
4910 return true;
4911}
4912
4913/// Given a shuffle which can be represented as a pair of two slides,
4914/// see if it is a zipeven idiom. Zipeven is:
4915/// vs2: a0 a1 a2 a3
4916/// vs1: b0 b1 b2 b3
4917/// vd: a0 b0 a2 b2
4918static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
4919 ArrayRef<int> Mask, unsigned &Factor) {
4920 Factor = SrcInfo[1].second;
4921 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4922 Mask.size() % Factor == 0 &&
4923 isAlternating(SrcInfo, Mask, Factor, true);
4924}
4925
4926/// Given a shuffle which can be represented as a pair of two slides,
4927/// see if it is a zipodd idiom. Zipodd is:
4928/// vs2: a0 a1 a2 a3
4929/// vs1: b0 b1 b2 b3
4930/// vd: a1 b1 a3 b3
4931/// Note that the operand order is swapped due to the way we canonicalize
4932/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
4933static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
4934 ArrayRef<int> Mask, unsigned &Factor) {
4935 Factor = -SrcInfo[1].second;
4936 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4937 Mask.size() % Factor == 0 &&
4938 isAlternating(SrcInfo, Mask, Factor, false);
4939}
4940
4941// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4942// 2, 4, 8 and the integer type Factor-times larger than VT's
4943// element type must be a legal element type.
4944// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4945// -> [p, q, r, s] (Factor=2, Index=1)
4947 SDValue Src, unsigned Factor,
4948 unsigned Index, SelectionDAG &DAG) {
4949 unsigned EltBits = VT.getScalarSizeInBits();
4950 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4951 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4952 SrcEC.divideCoefficientBy(Factor));
4953 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4954 SrcEC.divideCoefficientBy(Factor));
4955 Src = DAG.getBitcast(WideSrcVT, Src);
4956
4957 unsigned Shift = Index * EltBits;
4958 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4959 DAG.getConstant(Shift, DL, WideSrcVT));
4960 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4962 Res = DAG.getBitcast(CastVT, Res);
4963 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
4964}
4965
4966/// Match a single source shuffle which is an identity except that some
4967/// particular element is repeated. This can be lowered as a masked
4968/// vrgather.vi/vx. Note that the two source form of this is handled
4969/// by the recursive splitting logic and doesn't need special handling.
4971 const RISCVSubtarget &Subtarget,
4972 SelectionDAG &DAG) {
4973
4974 SDLoc DL(SVN);
4975 MVT VT = SVN->getSimpleValueType(0);
4976 SDValue V1 = SVN->getOperand(0);
4977 assert(SVN->getOperand(1).isUndef());
4978 ArrayRef<int> Mask = SVN->getMask();
4979 const unsigned NumElts = VT.getVectorNumElements();
4980 MVT XLenVT = Subtarget.getXLenVT();
4981
4982 std::optional<int> SplatIdx;
4983 for (auto [I, M] : enumerate(Mask)) {
4984 if (M == -1 || I == (unsigned)M)
4985 continue;
4986 if (SplatIdx && *SplatIdx != M)
4987 return SDValue();
4988 SplatIdx = M;
4989 }
4990
4991 if (!SplatIdx)
4992 return SDValue();
4993
4994 SmallVector<SDValue> MaskVals;
4995 for (int MaskIndex : Mask) {
4996 bool SelectMaskVal = MaskIndex == *SplatIdx;
4997 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4998 }
4999 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5000 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5001 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5002 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
5003 SmallVector<int>(NumElts, *SplatIdx));
5004 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
5005}
5006
5007// Lower the following shuffle to vslidedown.
5008// a)
5009// t49: v8i8 = extract_subvector t13, Constant:i64<0>
5010// t109: v8i8 = extract_subvector t13, Constant:i64<8>
5011// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
5012// b)
5013// t69: v16i16 = extract_subvector t68, Constant:i64<0>
5014// t23: v8i16 = extract_subvector t69, Constant:i64<0>
5015// t29: v4i16 = extract_subvector t23, Constant:i64<4>
5016// t26: v8i16 = extract_subvector t69, Constant:i64<8>
5017// t30: v4i16 = extract_subvector t26, Constant:i64<0>
5018// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
5020 SDValue V1, SDValue V2,
5021 ArrayRef<int> Mask,
5022 const RISCVSubtarget &Subtarget,
5023 SelectionDAG &DAG) {
5024 auto findNonEXTRACT_SUBVECTORParent =
5025 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
5026 uint64_t Offset = 0;
5027 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5028 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
5029 // a scalable vector. But we don't want to match the case.
5030 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
5031 Offset += Parent.getConstantOperandVal(1);
5032 Parent = Parent.getOperand(0);
5033 }
5034 return std::make_pair(Parent, Offset);
5035 };
5036
5037 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
5038 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
5039
5040 // Extracting from the same source.
5041 SDValue Src = V1Src;
5042 if (Src != V2Src)
5043 return SDValue();
5044
5045 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
5046 SmallVector<int, 16> NewMask(Mask);
5047 for (size_t i = 0; i != NewMask.size(); ++i) {
5048 if (NewMask[i] == -1)
5049 continue;
5050
5051 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
5052 NewMask[i] = NewMask[i] + V1IndexOffset;
5053 } else {
5054 // Minus NewMask.size() is needed. Otherwise, the b case would be
5055 // <5,6,7,12> instead of <5,6,7,8>.
5056 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
5057 }
5058 }
5059
5060 // First index must be known and non-zero. It will be used as the slidedown
5061 // amount.
5062 if (NewMask[0] <= 0)
5063 return SDValue();
5064
5065 // NewMask is also continuous.
5066 for (unsigned i = 1; i != NewMask.size(); ++i)
5067 if (NewMask[i - 1] + 1 != NewMask[i])
5068 return SDValue();
5069
5070 MVT XLenVT = Subtarget.getXLenVT();
5071 MVT SrcVT = Src.getSimpleValueType();
5072 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5073 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5074 SDValue Slidedown =
5075 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5076 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5077 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5078 return DAG.getExtractSubvector(
5079 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5080}
5081
5082// Because vslideup leaves the destination elements at the start intact, we can
5083// use it to perform shuffles that insert subvectors:
5084//
5085// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5086// ->
5087// vsetvli zero, 8, e8, mf2, ta, ma
5088// vslideup.vi v8, v9, 4
5089//
5090// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5091// ->
5092// vsetvli zero, 5, e8, mf2, tu, ma
5093// vslideup.v1 v8, v9, 2
5095 SDValue V1, SDValue V2,
5096 ArrayRef<int> Mask,
5097 const RISCVSubtarget &Subtarget,
5098 SelectionDAG &DAG) {
5099 unsigned NumElts = VT.getVectorNumElements();
5100 int NumSubElts, Index;
5101 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5102 Index))
5103 return SDValue();
5104
5105 bool OpsSwapped = Mask[Index] < (int)NumElts;
5106 SDValue InPlace = OpsSwapped ? V2 : V1;
5107 SDValue ToInsert = OpsSwapped ? V1 : V2;
5108
5109 MVT XLenVT = Subtarget.getXLenVT();
5110 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5111 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5112 // We slide up by the index that the subvector is being inserted at, and set
5113 // VL to the index + the number of elements being inserted.
5114 unsigned Policy =
5116 // If the we're adding a suffix to the in place vector, i.e. inserting right
5117 // up to the very end of it, then we don't actually care about the tail.
5118 if (NumSubElts + Index >= (int)NumElts)
5119 Policy |= RISCVVType::TAIL_AGNOSTIC;
5120
5121 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5122 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5123 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5124
5125 SDValue Res;
5126 // If we're inserting into the lowest elements, use a tail undisturbed
5127 // vmv.v.v.
5128 if (Index == 0)
5129 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5130 VL);
5131 else
5132 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5133 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5134 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5135}
5136
5137/// Match v(f)slide1up/down idioms. These operations involve sliding
5138/// N-1 elements to make room for an inserted scalar at one end.
5140 SDValue V1, SDValue V2,
5141 ArrayRef<int> Mask,
5142 const RISCVSubtarget &Subtarget,
5143 SelectionDAG &DAG) {
5144 bool OpsSwapped = false;
5145 if (!isa<BuildVectorSDNode>(V1)) {
5146 if (!isa<BuildVectorSDNode>(V2))
5147 return SDValue();
5148 std::swap(V1, V2);
5149 OpsSwapped = true;
5150 }
5151 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5152 if (!Splat)
5153 return SDValue();
5154
5155 // Return true if the mask could describe a slide of Mask.size() - 1
5156 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5157 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5158 const unsigned S = (Offset > 0) ? 0 : -Offset;
5159 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5160 for (unsigned i = S; i != E; ++i)
5161 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5162 return false;
5163 return true;
5164 };
5165
5166 const unsigned NumElts = VT.getVectorNumElements();
5167 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5168 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5169 return SDValue();
5170
5171 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5172 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5173 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5174 return SDValue();
5175
5176 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5177 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5178
5179 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5180 // vslide1{down,up}.vx instead.
5181 if (VT.getVectorElementType() == MVT::bf16 ||
5182 (VT.getVectorElementType() == MVT::f16 &&
5183 !Subtarget.hasVInstructionsF16())) {
5184 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5185 Splat =
5186 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5187 V2 = DAG.getBitcast(
5188 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5189 SDValue Vec = DAG.getNode(
5190 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5191 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5192 Vec = DAG.getBitcast(ContainerVT, Vec);
5193 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5194 }
5195
5196 auto OpCode = IsVSlidedown ?
5197 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5198 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5199 if (!VT.isFloatingPoint())
5200 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5201 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5202 DAG.getUNDEF(ContainerVT),
5203 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5204 Splat, TrueMask, VL);
5205 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5206}
5207
5208/// Match a mask which "spreads" the leading elements of a vector evenly
5209/// across the result. Factor is the spread amount, and Index is the
5210/// offset applied. (on success, Index < Factor) This is the inverse
5211/// of a deinterleave with the same Factor and Index. This is analogous
5212/// to an interleave, except that all but one lane is undef.
5214 unsigned &Index) {
5215 SmallVector<bool> LaneIsUndef(Factor, true);
5216 for (unsigned i = 0; i < Mask.size(); i++)
5217 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5218
5219 bool Found = false;
5220 for (unsigned i = 0; i < Factor; i++) {
5221 if (LaneIsUndef[i])
5222 continue;
5223 if (Found)
5224 return false;
5225 Index = i;
5226 Found = true;
5227 }
5228 if (!Found)
5229 return false;
5230
5231 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5232 unsigned j = i * Factor + Index;
5233 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5234 return false;
5235 }
5236 return true;
5237}
5238
5239static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5240 const SDLoc &DL, SelectionDAG &DAG,
5241 const RISCVSubtarget &Subtarget) {
5242 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5243 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5244 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5246
5247 MVT VT = Op0.getSimpleValueType();
5249 Op0 = DAG.getBitcast(IntVT, Op0);
5250 Op1 = DAG.getBitcast(IntVT, Op1);
5251
5252 MVT ContainerVT = IntVT;
5253 if (VT.isFixedLengthVector()) {
5254 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5255 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5256 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5257 }
5258
5259 MVT InnerVT = ContainerVT;
5260 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5261 if (Op1.isUndef() &&
5262 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5263 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5264 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5265 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5266 Subtarget.getXLenVT());
5267 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5268 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5269 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5270 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5271 }
5272
5273 SDValue Passthru = DAG.getUNDEF(InnerVT);
5274 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5275 if (InnerVT.bitsLT(ContainerVT))
5276 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5277 if (IntVT.isFixedLengthVector())
5278 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5279 Res = DAG.getBitcast(VT, Res);
5280 return Res;
5281}
5282
5283// Given a vector a, b, c, d return a vector Factor times longer
5284// with Factor-1 undef's between elements. Ex:
5285// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5286// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5287static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5288 const SDLoc &DL, SelectionDAG &DAG) {
5289
5290 MVT VT = V.getSimpleValueType();
5291 unsigned EltBits = VT.getScalarSizeInBits();
5293 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5294
5295 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5296
5297 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5298 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5299 // allow the SHL to fold away if Index is 0.
5300 if (Index != 0)
5301 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5302 DAG.getConstant(EltBits * Index, DL, WideVT));
5303 // Make sure to use original element type
5305 EC.multiplyCoefficientBy(Factor));
5306 return DAG.getBitcast(ResultVT, Result);
5307}
5308
5309// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5310// to create an interleaved vector of <[vscale x] n*2 x ty>.
5311// This requires that the size of ty is less than the subtarget's maximum ELEN.
5313 const SDLoc &DL, SelectionDAG &DAG,
5314 const RISCVSubtarget &Subtarget) {
5315
5316 // FIXME: Not only does this optimize the code, it fixes some correctness
5317 // issues because MIR does not have freeze.
5318 if (EvenV.isUndef())
5319 return getWideningSpread(OddV, 2, 1, DL, DAG);
5320 if (OddV.isUndef())
5321 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5322
5323 MVT VecVT = EvenV.getSimpleValueType();
5324 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5325 // Convert fixed vectors to scalable if needed
5326 if (VecContainerVT.isFixedLengthVector()) {
5327 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5328 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5329 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5330 }
5331
5332 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5333
5334 // We're working with a vector of the same size as the resulting
5335 // interleaved vector, but with half the number of elements and
5336 // twice the SEW (Hence the restriction on not using the maximum
5337 // ELEN)
5338 MVT WideVT =
5340 VecVT.getVectorElementCount());
5341 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5342 if (WideContainerVT.isFixedLengthVector())
5343 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5344
5345 // Bitcast the input vectors to integers in case they are FP
5346 VecContainerVT = VecContainerVT.changeTypeToInteger();
5347 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5348 OddV = DAG.getBitcast(VecContainerVT, OddV);
5349
5350 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5351 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5352
5353 SDValue Interleaved;
5354 if (Subtarget.hasStdExtZvbb()) {
5355 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5356 SDValue OffsetVec =
5357 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5358 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5359 OffsetVec, Passthru, Mask, VL);
5360 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5361 Interleaved, EvenV, Passthru, Mask, VL);
5362 } else {
5363 // FIXME: We should freeze the odd vector here. We already handled the case
5364 // of provably undef/poison above.
5365
5366 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5367 // vwaddu.vv
5368 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5369 OddV, Passthru, Mask, VL);
5370
5371 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5372 SDValue AllOnesVec = DAG.getSplatVector(
5373 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5374 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5375 OddV, AllOnesVec, Passthru, Mask, VL);
5376
5377 // Add the two together so we get
5378 // (OddV * 0xff...ff) + (OddV + EvenV)
5379 // = (OddV * 0x100...00) + EvenV
5380 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5381 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5382 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5383 Interleaved, OddsMul, Passthru, Mask, VL);
5384 }
5385
5386 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5387 MVT ResultContainerVT = MVT::getVectorVT(
5388 VecVT.getVectorElementType(), // Make sure to use original type
5389 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5390 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5391
5392 // Convert back to a fixed vector if needed
5393 MVT ResultVT =
5396 if (ResultVT.isFixedLengthVector())
5397 Interleaved =
5398 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5399
5400 return Interleaved;
5401}
5402
5403// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5404// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5406 SelectionDAG &DAG,
5407 const RISCVSubtarget &Subtarget) {
5408 SDLoc DL(SVN);
5409 MVT VT = SVN->getSimpleValueType(0);
5410 SDValue V = SVN->getOperand(0);
5411 unsigned NumElts = VT.getVectorNumElements();
5412
5413 assert(VT.getVectorElementType() == MVT::i1);
5414
5416 SVN->getMask().size()) ||
5417 !SVN->getOperand(1).isUndef())
5418 return SDValue();
5419
5420 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5421 EVT ViaVT = EVT::getVectorVT(
5422 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5423 EVT ViaBitVT =
5424 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5425
5426 // If we don't have zvbb or the larger element type > ELEN, the operation will
5427 // be illegal.
5429 ViaVT) ||
5430 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5431 return SDValue();
5432
5433 // If the bit vector doesn't fit exactly into the larger element type, we need
5434 // to insert it into the larger vector and then shift up the reversed bits
5435 // afterwards to get rid of the gap introduced.
5436 if (ViaEltSize > NumElts)
5437 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5438
5439 SDValue Res =
5440 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5441
5442 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5443 // element type.
5444 if (ViaEltSize > NumElts)
5445 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5446 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5447
5448 Res = DAG.getBitcast(ViaBitVT, Res);
5449
5450 if (ViaEltSize > NumElts)
5451 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5452 return Res;
5453}
5454
5456 const RISCVSubtarget &Subtarget,
5457 MVT &RotateVT, unsigned &RotateAmt) {
5458 unsigned NumElts = VT.getVectorNumElements();
5459 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5460 unsigned NumSubElts;
5461 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5462 NumElts, NumSubElts, RotateAmt))
5463 return false;
5464 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5465 NumElts / NumSubElts);
5466
5467 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5468 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5469}
5470
5471// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5472// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5473// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5475 SelectionDAG &DAG,
5476 const RISCVSubtarget &Subtarget) {
5477 SDLoc DL(SVN);
5478
5479 EVT VT = SVN->getValueType(0);
5480 unsigned RotateAmt;
5481 MVT RotateVT;
5482 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5483 return SDValue();
5484
5485 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5486
5487 SDValue Rotate;
5488 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5489 // so canonicalize to vrev8.
5490 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5491 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5492 else
5493 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5494 DAG.getConstant(RotateAmt, DL, RotateVT));
5495
5496 return DAG.getBitcast(VT, Rotate);
5497}
5498
5499// If compiling with an exactly known VLEN, see if we can split a
5500// shuffle on m2 or larger into a small number of m1 sized shuffles
5501// which write each destination registers exactly once.
5503 SelectionDAG &DAG,
5504 const RISCVSubtarget &Subtarget) {
5505 SDLoc DL(SVN);
5506 MVT VT = SVN->getSimpleValueType(0);
5507 SDValue V1 = SVN->getOperand(0);
5508 SDValue V2 = SVN->getOperand(1);
5509 ArrayRef<int> Mask = SVN->getMask();
5510
5511 // If we don't know exact data layout, not much we can do. If this
5512 // is already m1 or smaller, no point in splitting further.
5513 const auto VLen = Subtarget.getRealVLen();
5514 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5515 return SDValue();
5516
5517 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5518 // expansion for.
5519 unsigned RotateAmt;
5520 MVT RotateVT;
5521 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5522 return SDValue();
5523
5524 MVT ElemVT = VT.getVectorElementType();
5525 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5526
5527 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5528 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5529 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5530 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5531 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5532 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5533 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5534 unsigned NumOfDestRegs = NumElts / NumOpElts;
5535 // The following semantically builds up a fixed length concat_vector
5536 // of the component shuffle_vectors. We eagerly lower to scalable here
5537 // to avoid DAG combining it back to a large shuffle_vector again.
5538 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5539 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5541 Operands;
5543 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5544 [&]() { Operands.emplace_back(); },
5545 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5546 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5547 SmallVector<int>(SrcSubMask));
5548 },
5549 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5550 if (NewReg)
5551 Operands.emplace_back();
5552 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5553 });
5554 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5555 // Note: check that we do not emit too many shuffles here to prevent code
5556 // size explosion.
5557 // TODO: investigate, if it can be improved by extra analysis of the masks to
5558 // check if the code is more profitable.
5559 unsigned NumShuffles = std::accumulate(
5560 Operands.begin(), Operands.end(), 0u,
5561 [&](unsigned N,
5562 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5563 if (Data.empty())
5564 return N;
5565 N += Data.size();
5566 for (const auto &P : Data) {
5567 unsigned Idx2 = std::get<1>(P);
5568 ArrayRef<int> Mask = std::get<2>(P);
5569 if (Idx2 != UINT_MAX)
5570 ++N;
5571 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5572 --N;
5573 }
5574 return N;
5575 });
5576 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5577 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5578 return SDValue();
5579 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5580 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
5581 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5582 return SubVec;
5583 };
5584 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5586 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5587 return SubVec;
5588 };
5589 SDValue Vec = DAG.getUNDEF(ContainerVT);
5590 for (auto [I, Data] : enumerate(Operands)) {
5591 if (Data.empty())
5592 continue;
5594 for (unsigned I : seq<unsigned>(Data.size())) {
5595 const auto &[Idx1, Idx2, _] = Data[I];
5596 // If the shuffle contains permutation of odd number of elements,
5597 // Idx1 might be used already in the first iteration.
5598 //
5599 // Idx1 = shuffle Idx1, Idx2
5600 // Idx1 = shuffle Idx1, Idx3
5601 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5602 if (!V)
5603 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5604 (Idx1 % NumOfSrcRegs) * NumOpElts);
5605 if (Idx2 != UINT_MAX) {
5606 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5607 if (!V)
5608 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5609 (Idx2 % NumOfSrcRegs) * NumOpElts);
5610 }
5611 }
5612 SDValue V;
5613 for (const auto &[Idx1, Idx2, Mask] : Data) {
5614 SDValue V1 = Values.at(Idx1);
5615 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5616 V = PerformShuffle(V1, V2, Mask);
5617 Values[Idx1] = V;
5618 }
5619
5620 unsigned InsertIdx = I * NumOpElts;
5621 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5622 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
5623 }
5624 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5625}
5626
5627// Matches a subset of compress masks with a contiguous prefix of output
5628// elements. This could be extended to allow gaps by deciding which
5629// source elements to spuriously demand.
5631 int Last = -1;
5632 bool SawUndef = false;
5633 for (const auto &[Idx, M] : enumerate(Mask)) {
5634 if (M == -1) {
5635 SawUndef = true;
5636 continue;
5637 }
5638 if (SawUndef)
5639 return false;
5640 if (Idx > (unsigned)M)
5641 return false;
5642 if (M <= Last)
5643 return false;
5644 Last = M;
5645 }
5646 return true;
5647}
5648
5649/// Given a shuffle where the indices are disjoint between the two sources,
5650/// e.g.:
5651///
5652/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5653///
5654/// Merge the two sources into one and do a single source shuffle:
5655///
5656/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5657/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5658///
5659/// A vselect will either be merged into a masked instruction or be lowered as a
5660/// vmerge.vvm, which is cheaper than a vrgather.vv.
5662 SelectionDAG &DAG,
5663 const RISCVSubtarget &Subtarget) {
5664 MVT VT = SVN->getSimpleValueType(0);
5665 MVT XLenVT = Subtarget.getXLenVT();
5666 SDLoc DL(SVN);
5667
5668 const ArrayRef<int> Mask = SVN->getMask();
5669
5670 // Work out which source each lane will come from.
5671 SmallVector<int, 16> Srcs(Mask.size(), -1);
5672
5673 for (int Idx : Mask) {
5674 if (Idx == -1)
5675 continue;
5676 unsigned SrcIdx = Idx % Mask.size();
5677 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5678 if (Srcs[SrcIdx] == -1)
5679 // Mark this source as using this lane.
5680 Srcs[SrcIdx] = Src;
5681 else if (Srcs[SrcIdx] != Src)
5682 // The other source is using this lane: not disjoint.
5683 return SDValue();
5684 }
5685
5686 SmallVector<SDValue> SelectMaskVals;
5687 for (int Lane : Srcs) {
5688 if (Lane == -1)
5689 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5690 else
5691 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5692 }
5693 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5694 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5695 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5696 SVN->getOperand(0), SVN->getOperand(1));
5697
5698 // Move all indices relative to the first source.
5699 SmallVector<int> NewMask(Mask.size());
5700 for (unsigned I = 0; I < Mask.size(); I++) {
5701 if (Mask[I] == -1)
5702 NewMask[I] = -1;
5703 else
5704 NewMask[I] = Mask[I] % Mask.size();
5705 }
5706
5707 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5708}
5709
5710/// Is this mask local (i.e. elements only move within their local span), and
5711/// repeating (that is, the same rearrangement is being done within each span)?
5712static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5713 // Require a prefix from the original mask until the consumer code
5714 // is adjusted to rewrite the mask instead of just taking a prefix.
5715 for (auto [I, M] : enumerate(Mask)) {
5716 if (M == -1)
5717 continue;
5718 if ((M / Span) != (int)(I / Span))
5719 return false;
5720 int SpanIdx = I % Span;
5721 int Expected = M % Span;
5722 if (Mask[SpanIdx] != Expected)
5723 return false;
5724 }
5725 return true;
5726}
5727
5728/// Is this mask only using elements from the first span of the input?
5729static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5730 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
5731}
5732
5733/// Return true for a mask which performs an arbitrary shuffle within the first
5734/// span, and then repeats that same result across all remaining spans. Note
5735/// that this doesn't check if all the inputs come from a single span!
5736static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5737 // Require a prefix from the original mask until the consumer code
5738 // is adjusted to rewrite the mask instead of just taking a prefix.
5739 for (auto [I, M] : enumerate(Mask)) {
5740 if (M == -1)
5741 continue;
5742 int SpanIdx = I % Span;
5743 if (Mask[SpanIdx] != M)
5744 return false;
5745 }
5746 return true;
5747}
5748
5749/// Try to widen element type to get a new mask value for a better permutation
5750/// sequence. This doesn't try to inspect the widened mask for profitability;
5751/// we speculate the widened form is equal or better. This has the effect of
5752/// reducing mask constant sizes - allowing cheaper materialization sequences
5753/// - and index sequence sizes - reducing register pressure and materialization
5754/// cost, at the cost of (possibly) an extra VTYPE toggle.
5756 SDLoc DL(Op);
5757 MVT VT = Op.getSimpleValueType();
5758 MVT ScalarVT = VT.getVectorElementType();
5759 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5760 SDValue V0 = Op.getOperand(0);
5761 SDValue V1 = Op.getOperand(1);
5762 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5763
5764 // Avoid wasted work leading to isTypeLegal check failing below
5765 if (ElementSize > 32)
5766 return SDValue();
5767
5768 SmallVector<int, 8> NewMask;
5769 if (!widenShuffleMaskElts(Mask, NewMask))
5770 return SDValue();
5771
5772 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5773 : MVT::getIntegerVT(ElementSize * 2);
5774 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5775 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5776 return SDValue();
5777 V0 = DAG.getBitcast(NewVT, V0);
5778 V1 = DAG.getBitcast(NewVT, V1);
5779 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5780}
5781
5783 const RISCVSubtarget &Subtarget) {
5784 SDValue V1 = Op.getOperand(0);
5785 SDValue V2 = Op.getOperand(1);
5786 SDLoc DL(Op);
5787 MVT XLenVT = Subtarget.getXLenVT();
5788 MVT VT = Op.getSimpleValueType();
5789 unsigned NumElts = VT.getVectorNumElements();
5791
5792 if (VT.getVectorElementType() == MVT::i1) {
5793 // Lower to a vror.vi of a larger element type if possible before we promote
5794 // i1s to i8s.
5795 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5796 return V;
5797 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5798 return V;
5799
5800 // Promote i1 shuffle to i8 shuffle.
5801 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5802 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5803 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5804 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5805 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5806 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5807 ISD::SETNE);
5808 }
5809
5810 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5811
5812 // Store the return value in a single variable instead of structured bindings
5813 // so that we can pass it to GetSlide below, which cannot capture structured
5814 // bindings until C++20.
5815 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5816 auto [TrueMask, VL] = TrueMaskVL;
5817
5818 if (SVN->isSplat()) {
5819 const int Lane = SVN->getSplatIndex();
5820 if (Lane >= 0) {
5821 MVT SVT = VT.getVectorElementType();
5822
5823 // Turn splatted vector load into a strided load with an X0 stride.
5824 SDValue V = V1;
5825 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5826 // with undef.
5827 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5828 int Offset = Lane;
5829 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5830 int OpElements =
5831 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5832 V = V.getOperand(Offset / OpElements);
5833 Offset %= OpElements;
5834 }
5835
5836 // We need to ensure the load isn't atomic or volatile.
5837 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5838 auto *Ld = cast<LoadSDNode>(V);
5839 Offset *= SVT.getStoreSize();
5840 SDValue NewAddr = DAG.getMemBasePlusOffset(
5841 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5842
5843 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5844 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5845 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5846 SDValue IntID =
5847 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5848 SDValue Ops[] = {Ld->getChain(),
5849 IntID,
5850 DAG.getUNDEF(ContainerVT),
5851 NewAddr,
5852 DAG.getRegister(RISCV::X0, XLenVT),
5853 VL};
5854 SDValue NewLoad = DAG.getMemIntrinsicNode(
5855 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5857 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5858 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5859 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5860 }
5861
5862 MVT SplatVT = ContainerVT;
5863
5864 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5865 if (SVT == MVT::bf16 ||
5866 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5867 SVT = MVT::i16;
5868 SplatVT = ContainerVT.changeVectorElementType(SVT);
5869 }
5870
5871 // Otherwise use a scalar load and splat. This will give the best
5872 // opportunity to fold a splat into the operation. ISel can turn it into
5873 // the x0 strided load if we aren't able to fold away the select.
5874 if (SVT.isFloatingPoint())
5875 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5876 Ld->getPointerInfo().getWithOffset(Offset),
5877 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
5878 else
5879 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5880 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5881 Ld->getBaseAlign(),
5882 Ld->getMemOperand()->getFlags());
5884
5885 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5886 : RISCVISD::VMV_V_X_VL;
5887 SDValue Splat =
5888 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5889 Splat = DAG.getBitcast(ContainerVT, Splat);
5890 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5891 }
5892
5893 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5894 assert(Lane < (int)NumElts && "Unexpected lane!");
5895 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5896 V1, DAG.getConstant(Lane, DL, XLenVT),
5897 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5898 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5899 }
5900 }
5901
5902 // For exact VLEN m2 or greater, try to split to m1 operations if we
5903 // can split cleanly.
5904 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5905 return V;
5906
5907 ArrayRef<int> Mask = SVN->getMask();
5908
5909 if (SDValue V =
5910 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5911 return V;
5912
5913 if (SDValue V =
5914 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5915 return V;
5916
5917 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5918 // available.
5919 if (Subtarget.hasStdExtZvkb())
5920 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5921 return V;
5922
5923 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
5924 NumElts != 2)
5925 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5926
5927 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5928 // use shift and truncate to perform the shuffle.
5929 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5930 // shift-and-trunc reducing total cost for everything except an mf8 result.
5931 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5932 // to do the entire operation.
5933 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5934 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5935 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5936 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5937 unsigned Index = 0;
5938 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5939 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5940 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
5941 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5942 if (1 < count_if(Mask,
5943 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5944 1 < count_if(Mask, [&Mask](int Idx) {
5945 return Idx >= (int)Mask.size();
5946 })) {
5947 // Narrow each source and concatenate them.
5948 // FIXME: For small LMUL it is better to concatenate first.
5949 MVT EltVT = VT.getVectorElementType();
5950 auto EltCnt = VT.getVectorElementCount();
5951 MVT SubVT =
5952 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
5953
5954 SDValue Lo =
5955 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
5956 SDValue Hi =
5957 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
5958
5959 SDValue Concat =
5962 if (Factor == 2)
5963 return Concat;
5964
5965 SDValue Vec = DAG.getUNDEF(VT);
5966 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
5967 }
5968 }
5969 }
5970 }
5971
5972 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
5973 // e64 which can't match above.
5974 unsigned Index = 0;
5975 if (Subtarget.hasVendorXRivosVizip() &&
5977 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5978 unsigned Opc =
5979 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
5980 if (V2.isUndef())
5981 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
5982 if (auto VLEN = Subtarget.getRealVLen();
5983 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
5984 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
5985 if (SDValue Src = foldConcatVector(V1, V2)) {
5986 EVT NewVT = VT.getDoubleNumVectorElementsVT();
5987 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
5988 SDValue Res =
5989 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
5990 return DAG.getExtractSubvector(DL, VT, Res, 0);
5991 }
5992 // Deinterleave each source and concatenate them, or concat first, then
5993 // deinterleave.
5994 if (1 < count_if(Mask,
5995 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5996 1 < count_if(Mask,
5997 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
5998
5999 const unsigned EltSize = VT.getScalarSizeInBits();
6000 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
6001 if (NumElts < MinVLMAX) {
6002 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
6003 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
6004 SDValue Res =
6005 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
6006 return DAG.getExtractSubvector(DL, VT, Res, 0);
6007 }
6008
6009 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6010 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6011
6012 MVT SubVT = VT.getHalfNumVectorElementsVT();
6013 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
6014 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
6015 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
6016 }
6017 }
6018
6019 if (SDValue V =
6020 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
6021 return V;
6022
6023 // Detect an interleave shuffle and lower to
6024 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
6025 int EvenSrc, OddSrc;
6026 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
6027 !(NumElts == 2 &&
6028 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
6029 // Extract the halves of the vectors.
6030 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6031
6032 // Recognize if one half is actually undef; the matching above will
6033 // otherwise reuse the even stream for the undef one. This improves
6034 // spread(2) shuffles.
6035 bool LaneIsUndef[2] = { true, true};
6036 for (const auto &[Idx, M] : enumerate(Mask))
6037 LaneIsUndef[Idx % 2] &= (M == -1);
6038
6039 int Size = Mask.size();
6040 SDValue EvenV, OddV;
6041 if (LaneIsUndef[0]) {
6042 EvenV = DAG.getUNDEF(HalfVT);
6043 } else {
6044 assert(EvenSrc >= 0 && "Undef source?");
6045 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
6046 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
6047 }
6048
6049 if (LaneIsUndef[1]) {
6050 OddV = DAG.getUNDEF(HalfVT);
6051 } else {
6052 assert(OddSrc >= 0 && "Undef source?");
6053 OddV = (OddSrc / Size) == 0 ? V1 : V2;
6054 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
6055 }
6056
6057 // Prefer vzip2a if available.
6058 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
6059 if (Subtarget.hasVendorXRivosVizip()) {
6060 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6061 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6062 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6063 }
6064 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6065 }
6066
6067 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6068 // instructions (in any combination) with masking on the second instruction.
6069 // Also handles masked slides into an identity source, and single slides
6070 // without masking. Avoid matching bit rotates (which are not also element
6071 // rotates) as slide pairs. This is a performance heuristic, not a
6072 // functional check.
6073 std::array<std::pair<int, int>, 2> SrcInfo;
6074 unsigned RotateAmt;
6075 MVT RotateVT;
6076 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6077 (isElementRotate(SrcInfo, NumElts) ||
6078 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6079 SDValue Sources[2];
6080 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6081 int SrcIdx = Info.first;
6082 assert(SrcIdx == 0 || SrcIdx == 1);
6083 SDValue &Src = Sources[SrcIdx];
6084 if (!Src) {
6085 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6086 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6087 }
6088 return Src;
6089 };
6090 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6091 SDValue Passthru) {
6092 auto [TrueMask, VL] = TrueMaskVL;
6093 SDValue SrcV = GetSourceFor(Src);
6094 int SlideAmt = Src.second;
6095 if (SlideAmt == 0) {
6096 // Should never be second operation
6097 assert(Mask == TrueMask);
6098 return SrcV;
6099 }
6100 if (SlideAmt < 0)
6101 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6102 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6104 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6105 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6107 };
6108
6109 if (SrcInfo[1].first == -1) {
6110 SDValue Res = DAG.getUNDEF(ContainerVT);
6111 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6112 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6113 }
6114
6115 if (Subtarget.hasVendorXRivosVizip()) {
6116 bool TryWiden = false;
6117 unsigned Factor;
6118 if (isZipEven(SrcInfo, Mask, Factor)) {
6119 if (Factor == 1) {
6120 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6121 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6122 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6123 Subtarget);
6124 }
6125 TryWiden = true;
6126 }
6127 if (isZipOdd(SrcInfo, Mask, Factor)) {
6128 if (Factor == 1) {
6129 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6130 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6131 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6132 Subtarget);
6133 }
6134 TryWiden = true;
6135 }
6136 // If we found a widening oppurtunity which would let us form a
6137 // zipeven or zipodd, use the generic code to widen the shuffle
6138 // and recurse through this logic.
6139 if (TryWiden)
6140 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6141 return V;
6142 }
6143
6144 // Build the mask. Note that vslideup unconditionally preserves elements
6145 // below the slide amount in the destination, and thus those elements are
6146 // undefined in the mask. If the mask ends up all true (or undef), it
6147 // will be folded away by general logic.
6148 SmallVector<SDValue> MaskVals;
6149 for (const auto &[Idx, M] : enumerate(Mask)) {
6150 if (M < 0 ||
6151 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6152 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6153 continue;
6154 }
6155 int Src = M >= (int)NumElts;
6156 int Diff = (int)Idx - (M % NumElts);
6157 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6158 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6159 "Must match exactly one of the two slides");
6160 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6161 }
6162 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6163 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6164 SDValue SelectMask = convertToScalableVector(
6165 ContainerVT.changeVectorElementType(MVT::i1),
6166 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6167
6168 SDValue Res = DAG.getUNDEF(ContainerVT);
6169 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6170 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6171 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6172 }
6173
6174 // Handle any remaining single source shuffles
6175 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6176 if (V2.isUndef()) {
6177 // We might be able to express the shuffle as a bitrotate. But even if we
6178 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6179 // shifts and a vor will have a higher throughput than a vrgather.
6180 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6181 return V;
6182
6183 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6184 return V;
6185
6186 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6187 // is fully covered in interleave(2) above, so it is ignored here.
6188 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6189 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6190 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6191 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6192 unsigned Index;
6193 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6194 MVT NarrowVT =
6195 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6196 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6197 return getWideningSpread(Src, Factor, Index, DL, DAG);
6198 }
6199 }
6200 }
6201
6202 // If only a prefix of the source elements influence a prefix of the
6203 // destination elements, try to see if we can reduce the required LMUL
6204 unsigned MinVLen = Subtarget.getRealMinVLen();
6205 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6206 if (NumElts > MinVLMAX) {
6207 unsigned MaxIdx = 0;
6208 for (auto [I, M] : enumerate(Mask)) {
6209 if (M == -1)
6210 continue;
6211 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6212 }
6213 unsigned NewNumElts =
6214 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6215 if (NewNumElts != NumElts) {
6216 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6217 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6218 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6219 Mask.take_front(NewNumElts));
6220 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6221 }
6222 }
6223
6224 // Before hitting generic lowering fallbacks, try to widen the mask
6225 // to a wider SEW.
6226 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6227 return V;
6228
6229 // Can we generate a vcompress instead of a vrgather? These scale better
6230 // at high LMUL, at the cost of not being able to fold a following select
6231 // into them. The mask constants are also smaller than the index vector
6232 // constants, and thus easier to materialize.
6233 if (isCompressMask(Mask)) {
6234 SmallVector<SDValue> MaskVals(NumElts,
6235 DAG.getConstant(false, DL, XLenVT));
6236 for (auto Idx : Mask) {
6237 if (Idx == -1)
6238 break;
6239 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6240 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6241 }
6242 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6243 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6244 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6245 DAG.getUNDEF(VT));
6246 }
6247
6248 if (VT.getScalarSizeInBits() == 8 &&
6249 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6250 // On such a vector we're unable to use i8 as the index type.
6251 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6252 // may involve vector splitting if we're already at LMUL=8, or our
6253 // user-supplied maximum fixed-length LMUL.
6254 return SDValue();
6255 }
6256
6257 // Base case for the two operand recursion below - handle the worst case
6258 // single source shuffle.
6259 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6260 MVT IndexVT = VT.changeTypeToInteger();
6261 // Since we can't introduce illegal index types at this stage, use i16 and
6262 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6263 // than XLenVT.
6264 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6265 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6266 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6267 }
6268
6269 // If the mask allows, we can do all the index computation in 16 bits. This
6270 // requires less work and less register pressure at high LMUL, and creates
6271 // smaller constants which may be cheaper to materialize.
6272 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6273 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6274 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6275 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6276 }
6277
6278 MVT IndexContainerVT =
6279 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6280
6281 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6282 SmallVector<SDValue> GatherIndicesLHS;
6283 for (int MaskIndex : Mask) {
6284 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6285 GatherIndicesLHS.push_back(IsLHSIndex
6286 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6287 : DAG.getUNDEF(XLenVT));
6288 }
6289 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6290 LHSIndices =
6291 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6292 // At m1 and less, there's no point trying any of the high LMUL splitting
6293 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6294 if (NumElts <= MinVLMAX) {
6295 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6296 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6297 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6298 }
6299
6300 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6301 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6302 auto [InnerTrueMask, InnerVL] =
6303 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6304 int N =
6305 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6306 assert(isPowerOf2_32(N) && N <= 8);
6307
6308 // If we have a locally repeating mask, then we can reuse the first
6309 // register in the index register group for all registers within the
6310 // source register group. TODO: This generalizes to m2, and m4.
6311 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6312 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6313 SDValue Gather = DAG.getUNDEF(ContainerVT);
6314 for (int i = 0; i < N; i++) {
6315 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6316 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6317 SDValue SubVec =
6318 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6319 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6320 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6321 }
6322 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6323 }
6324
6325 // If we have a shuffle which only uses the first register in our source
6326 // register group, and repeats the same index across all spans, we can
6327 // use a single vrgather (and possibly some register moves).
6328 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6329 // which we can do a linear number of shuffles to form an m1 which
6330 // contains all the output elements.
6331 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6332 isSpanSplatShuffle(Mask, MinVLMAX)) {
6333 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6334 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6335 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6336 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6337 SDValue Gather = DAG.getUNDEF(ContainerVT);
6338 for (int i = 0; i < N; i++)
6339 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6340 M1VT.getVectorMinNumElements() * i);
6341 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6342 }
6343
6344 // If we have a shuffle which only uses the first register in our
6345 // source register group, we can do a linear number of m1 vrgathers
6346 // reusing the same source register (but with different indices)
6347 // TODO: This can be generalized for m2 or m4, or for any shuffle
6348 // for which we can do a vslidedown followed by this expansion.
6349 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6350 SDValue SlideAmt =
6351 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6352 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6353 SDValue Gather = DAG.getUNDEF(ContainerVT);
6354 for (int i = 0; i < N; i++) {
6355 if (i != 0)
6356 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6357 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6358 SlideAmt, TrueMask, VL);
6359 SDValue SubIndex =
6360 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6361 SDValue SubVec =
6362 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6363 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6364 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6365 M1VT.getVectorMinNumElements() * i);
6366 }
6367 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6368 }
6369
6370 // Fallback to generic vrgather if we can't find anything better.
6371 // On many machines, this will be O(LMUL^2)
6372 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6373 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6374 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6375 }
6376
6377 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6378 // merged with a second vrgather.
6379 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6380
6381 // Now construct the mask that will be used by the blended vrgather operation.
6382 // Construct the appropriate indices into each vector.
6383 for (int MaskIndex : Mask) {
6384 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6385 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6386 ? MaskIndex : -1);
6387 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6388 }
6389
6390 // If the mask indices are disjoint between the two sources, we can lower it
6391 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6392 // operands may end up being lowered to something cheaper than a vrgather.vv.
6393 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6394 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6395 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6396 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6397 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6398 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6399 return V;
6400
6401 // Before hitting generic lowering fallbacks, try to widen the mask
6402 // to a wider SEW.
6403 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6404 return V;
6405
6406 // Try to pick a profitable operand order.
6407 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6408 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6409
6410 // Recursively invoke lowering for each operand if we had two
6411 // independent single source shuffles, and then combine the result via a
6412 // vselect. Note that the vselect will likely be folded back into the
6413 // second permute (vrgather, or other) by the post-isel combine.
6414 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6415 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6416
6417 SmallVector<SDValue> MaskVals;
6418 for (int MaskIndex : Mask) {
6419 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6420 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6421 }
6422
6423 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6424 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6425 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6426
6427 if (SwapOps)
6428 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6429 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6430}
6431
6433 // Only support legal VTs for other shuffles for now.
6434 if (!isTypeLegal(VT))
6435 return false;
6436
6437 // Support splats for any type. These should type legalize well.
6439 return true;
6440
6441 const unsigned NumElts = M.size();
6442 MVT SVT = VT.getSimpleVT();
6443
6444 // Not for i1 vectors.
6445 if (SVT.getScalarType() == MVT::i1)
6446 return false;
6447
6448 std::array<std::pair<int, int>, 2> SrcInfo;
6449 int Dummy1, Dummy2;
6450 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6451 (::isMaskedSlidePair(M, SrcInfo) &&
6452 isElementRotate(SrcInfo, NumElts)) ||
6453 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6454}
6455
6456// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6457// the exponent.
6458SDValue
6459RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6460 SelectionDAG &DAG) const {
6461 MVT VT = Op.getSimpleValueType();
6462 unsigned EltSize = VT.getScalarSizeInBits();
6463 SDValue Src = Op.getOperand(0);
6464 SDLoc DL(Op);
6465 MVT ContainerVT = VT;
6466
6467 SDValue Mask, VL;
6468 if (Op->isVPOpcode()) {
6469 Mask = Op.getOperand(1);
6470 if (VT.isFixedLengthVector())
6471 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6472 Subtarget);
6473 VL = Op.getOperand(2);
6474 }
6475
6476 // We choose FP type that can represent the value if possible. Otherwise, we
6477 // use rounding to zero conversion for correct exponent of the result.
6478 // TODO: Use f16 for i8 when possible?
6479 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6480 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6481 FloatEltVT = MVT::f32;
6482 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6483
6484 // Legal types should have been checked in the RISCVTargetLowering
6485 // constructor.
6486 // TODO: Splitting may make sense in some cases.
6487 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6488 "Expected legal float type!");
6489
6490 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6491 // The trailing zero count is equal to log2 of this single bit value.
6492 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6493 SDValue Neg = DAG.getNegative(Src, DL, VT);
6494 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6495 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6496 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6497 Src, Mask, VL);
6498 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6499 }
6500
6501 // We have a legal FP type, convert to it.
6502 SDValue FloatVal;
6503 if (FloatVT.bitsGT(VT)) {
6504 if (Op->isVPOpcode())
6505 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6506 else
6507 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6508 } else {
6509 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6510 if (VT.isFixedLengthVector()) {
6511 ContainerVT = getContainerForFixedLengthVector(VT);
6512 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6513 }
6514 if (!Op->isVPOpcode())
6515 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6516 SDValue RTZRM =
6517 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
6518 MVT ContainerFloatVT =
6519 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6520 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6521 Src, Mask, RTZRM, VL);
6522 if (VT.isFixedLengthVector())
6523 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6524 }
6525 // Bitcast to integer and shift the exponent to the LSB.
6526 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6527 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6528 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6529
6530 SDValue Exp;
6531 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6532 if (Op->isVPOpcode()) {
6533 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6534 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6535 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6536 } else {
6537 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6538 DAG.getConstant(ShiftAmt, DL, IntVT));
6539 if (IntVT.bitsLT(VT))
6540 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6541 else if (IntVT.bitsGT(VT))
6542 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6543 }
6544
6545 // The exponent contains log2 of the value in biased form.
6546 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6547 // For trailing zeros, we just need to subtract the bias.
6548 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6549 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6550 DAG.getConstant(ExponentBias, DL, VT));
6551 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6552 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6553 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6554
6555 // For leading zeros, we need to remove the bias and convert from log2 to
6556 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6557 unsigned Adjust = ExponentBias + (EltSize - 1);
6558 SDValue Res;
6559 if (Op->isVPOpcode())
6560 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6561 Mask, VL);
6562 else
6563 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6564
6565 // The above result with zero input equals to Adjust which is greater than
6566 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6567 if (Op.getOpcode() == ISD::CTLZ)
6568 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6569 else if (Op.getOpcode() == ISD::VP_CTLZ)
6570 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6571 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6572 return Res;
6573}
6574
6575SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6576 SelectionDAG &DAG) const {
6577 SDLoc DL(Op);
6578 MVT XLenVT = Subtarget.getXLenVT();
6579 SDValue Source = Op->getOperand(0);
6580 MVT SrcVT = Source.getSimpleValueType();
6581 SDValue Mask = Op->getOperand(1);
6582 SDValue EVL = Op->getOperand(2);
6583
6584 if (SrcVT.isFixedLengthVector()) {
6585 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6586 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6587 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6588 Subtarget);
6589 SrcVT = ContainerVT;
6590 }
6591
6592 // Convert to boolean vector.
6593 if (SrcVT.getScalarType() != MVT::i1) {
6594 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6595 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6596 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6597 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6598 DAG.getUNDEF(SrcVT), Mask, EVL});
6599 }
6600
6601 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6602 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6603 // In this case, we can interpret poison as -1, so nothing to do further.
6604 return Res;
6605
6606 // Convert -1 to VL.
6607 SDValue SetCC =
6608 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6609 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6610 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6611}
6612
6613// While RVV has alignment restrictions, we should always be able to load as a
6614// legal equivalently-sized byte-typed vector instead. This method is
6615// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6616// the load is already correctly-aligned, it returns SDValue().
6617SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6618 SelectionDAG &DAG) const {
6619 auto *Load = cast<LoadSDNode>(Op);
6620 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6621
6623 Load->getMemoryVT(),
6624 *Load->getMemOperand()))
6625 return SDValue();
6626
6627 SDLoc DL(Op);
6628 MVT VT = Op.getSimpleValueType();
6629 unsigned EltSizeBits = VT.getScalarSizeInBits();
6630 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6631 "Unexpected unaligned RVV load type");
6632 MVT NewVT =
6633 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6634 assert(NewVT.isValid() &&
6635 "Expecting equally-sized RVV vector types to be legal");
6636 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6637 Load->getPointerInfo(), Load->getBaseAlign(),
6638 Load->getMemOperand()->getFlags());
6639 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6640}
6641
6642// While RVV has alignment restrictions, we should always be able to store as a
6643// legal equivalently-sized byte-typed vector instead. This method is
6644// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6645// returns SDValue() if the store is already correctly aligned.
6646SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6647 SelectionDAG &DAG) const {
6648 auto *Store = cast<StoreSDNode>(Op);
6649 assert(Store && Store->getValue().getValueType().isVector() &&
6650 "Expected vector store");
6651
6653 Store->getMemoryVT(),
6654 *Store->getMemOperand()))
6655 return SDValue();
6656
6657 SDLoc DL(Op);
6658 SDValue StoredVal = Store->getValue();
6659 MVT VT = StoredVal.getSimpleValueType();
6660 unsigned EltSizeBits = VT.getScalarSizeInBits();
6661 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6662 "Unexpected unaligned RVV store type");
6663 MVT NewVT =
6664 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6665 assert(NewVT.isValid() &&
6666 "Expecting equally-sized RVV vector types to be legal");
6667 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6668 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6669 Store->getPointerInfo(), Store->getBaseAlign(),
6670 Store->getMemOperand()->getFlags());
6671}
6672
6674 const RISCVSubtarget &Subtarget) {
6675 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
6676
6677 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
6678
6679 // All simm32 constants should be handled by isel.
6680 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
6681 // this check redundant, but small immediates are common so this check
6682 // should have better compile time.
6683 if (isInt<32>(Imm))
6684 return Op;
6685
6686 // We only need to cost the immediate, if constant pool lowering is enabled.
6687 if (!Subtarget.useConstantPoolForLargeInts())
6688 return Op;
6689
6691 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6692 return Op;
6693
6694 // Optimizations below are disabled for opt size. If we're optimizing for
6695 // size, use a constant pool.
6696 if (DAG.shouldOptForSize())
6697 return SDValue();
6698
6699 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6700 // that if it will avoid a constant pool.
6701 // It will require an extra temporary register though.
6702 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6703 // low and high 32 bits are the same and bit 31 and 63 are set.
6704 unsigned ShiftAmt, AddOpc;
6705 RISCVMatInt::InstSeq SeqLo =
6706 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6707 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6708 return Op;
6709
6710 return SDValue();
6711}
6712
6713SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6714 SelectionDAG &DAG) const {
6715 MVT VT = Op.getSimpleValueType();
6716 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6717
6718 // Can this constant be selected by a Zfa FLI instruction?
6719 bool Negate = false;
6720 int Index = getLegalZfaFPImm(Imm, VT);
6721
6722 // If the constant is negative, try negating.
6723 if (Index < 0 && Imm.isNegative()) {
6724 Index = getLegalZfaFPImm(-Imm, VT);
6725 Negate = true;
6726 }
6727
6728 // If we couldn't find a FLI lowering, fall back to generic code.
6729 if (Index < 0)
6730 return SDValue();
6731
6732 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6733 SDLoc DL(Op);
6734 SDValue Const =
6735 DAG.getNode(RISCVISD::FLI, DL, VT,
6736 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6737 if (!Negate)
6738 return Const;
6739
6740 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6741}
6742
6744 SelectionDAG &DAG) {
6745
6746 unsigned IsData = Op.getConstantOperandVal(4);
6747
6748 // mips-p8700 we support data prefetch for now.
6749 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
6750 return Op.getOperand(0);
6751 return Op;
6752}
6753
6755 const RISCVSubtarget &Subtarget) {
6756 SDLoc dl(Op);
6757 AtomicOrdering FenceOrdering =
6758 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6759 SyncScope::ID FenceSSID =
6760 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6761
6762 if (Subtarget.hasStdExtZtso()) {
6763 // The only fence that needs an instruction is a sequentially-consistent
6764 // cross-thread fence.
6765 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6766 FenceSSID == SyncScope::System)
6767 return Op;
6768
6769 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6770 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6771 }
6772
6773 // singlethread fences only synchronize with signal handlers on the same
6774 // thread and thus only need to preserve instruction order, not actually
6775 // enforce memory ordering.
6776 if (FenceSSID == SyncScope::SingleThread)
6777 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6778 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6779
6780 return Op;
6781}
6782
6783SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6784 SelectionDAG &DAG) const {
6785 SDLoc DL(Op);
6786 MVT VT = Op.getSimpleValueType();
6787 MVT XLenVT = Subtarget.getXLenVT();
6788 unsigned Check = Op.getConstantOperandVal(1);
6789 unsigned TDCMask = 0;
6790 if (Check & fcSNan)
6791 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6792 if (Check & fcQNan)
6793 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6794 if (Check & fcPosInf)
6796 if (Check & fcNegInf)
6798 if (Check & fcPosNormal)
6800 if (Check & fcNegNormal)
6802 if (Check & fcPosSubnormal)
6804 if (Check & fcNegSubnormal)
6806 if (Check & fcPosZero)
6807 TDCMask |= RISCV::FPMASK_Positive_Zero;
6808 if (Check & fcNegZero)
6809 TDCMask |= RISCV::FPMASK_Negative_Zero;
6810
6811 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6812
6813 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6814
6815 if (VT.isVector()) {
6816 SDValue Op0 = Op.getOperand(0);
6817 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6818
6819 if (VT.isScalableVector()) {
6820 MVT DstVT = VT0.changeVectorElementTypeToInteger();
6821 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6822 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6823 Mask = Op.getOperand(2);
6824 VL = Op.getOperand(3);
6825 }
6826 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6827 VL, Op->getFlags());
6828 if (IsOneBitMask)
6829 return DAG.getSetCC(DL, VT, FPCLASS,
6830 DAG.getConstant(TDCMask, DL, DstVT),
6832 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6833 DAG.getConstant(TDCMask, DL, DstVT));
6834 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6835 ISD::SETNE);
6836 }
6837
6838 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6839 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6840 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6841 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6842 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6843 Mask = Op.getOperand(2);
6844 MVT MaskContainerVT =
6845 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6846 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6847 VL = Op.getOperand(3);
6848 }
6849 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6850
6851 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6852 Mask, VL, Op->getFlags());
6853
6854 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6855 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6856 if (IsOneBitMask) {
6857 SDValue VMSEQ =
6858 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6859 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6860 DAG.getUNDEF(ContainerVT), Mask, VL});
6861 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6862 }
6863 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6864 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6865
6866 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6867 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6868 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6869
6870 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6871 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6872 DAG.getUNDEF(ContainerVT), Mask, VL});
6873 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6874 }
6875
6876 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6877 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6878 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6880 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6881}
6882
6883// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6884// operations propagate nans.
6886 const RISCVSubtarget &Subtarget) {
6887 SDLoc DL(Op);
6888 MVT VT = Op.getSimpleValueType();
6889
6890 SDValue X = Op.getOperand(0);
6891 SDValue Y = Op.getOperand(1);
6892
6893 if (!VT.isVector()) {
6894 MVT XLenVT = Subtarget.getXLenVT();
6895
6896 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6897 // ensures that when one input is a nan, the other will also be a nan
6898 // allowing the nan to propagate. If both inputs are nan, this will swap the
6899 // inputs which is harmless.
6900
6901 SDValue NewY = Y;
6902 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6903 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6904 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6905 }
6906
6907 SDValue NewX = X;
6908 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6909 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6910 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6911 }
6912
6913 unsigned Opc =
6914 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6915 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6916 }
6917
6918 // Check no NaNs before converting to fixed vector scalable.
6919 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6920 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6921
6922 MVT ContainerVT = VT;
6923 if (VT.isFixedLengthVector()) {
6924 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6925 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6926 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6927 }
6928
6929 SDValue Mask, VL;
6930 if (Op->isVPOpcode()) {
6931 Mask = Op.getOperand(2);
6932 if (VT.isFixedLengthVector())
6933 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6934 Subtarget);
6935 VL = Op.getOperand(3);
6936 } else {
6937 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6938 }
6939
6940 SDValue NewY = Y;
6941 if (!XIsNeverNan) {
6942 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6943 {X, X, DAG.getCondCode(ISD::SETOEQ),
6944 DAG.getUNDEF(ContainerVT), Mask, VL});
6945 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6946 DAG.getUNDEF(ContainerVT), VL);
6947 }
6948
6949 SDValue NewX = X;
6950 if (!YIsNeverNan) {
6951 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6952 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6953 DAG.getUNDEF(ContainerVT), Mask, VL});
6954 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6955 DAG.getUNDEF(ContainerVT), VL);
6956 }
6957
6958 unsigned Opc =
6959 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6960 ? RISCVISD::VFMAX_VL
6961 : RISCVISD::VFMIN_VL;
6962 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6963 DAG.getUNDEF(ContainerVT), Mask, VL);
6964 if (VT.isFixedLengthVector())
6965 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6966 return Res;
6967}
6968
6970 const RISCVSubtarget &Subtarget) {
6971 bool IsFABS = Op.getOpcode() == ISD::FABS;
6972 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6973 "Wrong opcode for lowering FABS or FNEG.");
6974
6975 MVT XLenVT = Subtarget.getXLenVT();
6976 MVT VT = Op.getSimpleValueType();
6977 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6978
6979 SDLoc DL(Op);
6980 SDValue Fmv =
6981 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6982
6983 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6984 Mask = Mask.sext(Subtarget.getXLen());
6985
6986 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6987 SDValue Logic =
6988 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6989 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6990}
6991
6993 const RISCVSubtarget &Subtarget) {
6994 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6995
6996 MVT XLenVT = Subtarget.getXLenVT();
6997 MVT VT = Op.getSimpleValueType();
6998 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6999
7000 SDValue Mag = Op.getOperand(0);
7001 SDValue Sign = Op.getOperand(1);
7002
7003 SDLoc DL(Op);
7004
7005 // Get sign bit into an integer value.
7006 unsigned SignSize = Sign.getValueSizeInBits();
7007 SDValue SignAsInt = [&]() {
7008 if (SignSize == Subtarget.getXLen())
7009 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
7010 switch (SignSize) {
7011 case 16:
7012 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
7013 case 32:
7014 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
7015 case 64: {
7016 assert(XLenVT == MVT::i32 && "Unexpected type");
7017 // Copy the upper word to integer.
7018 SignSize = 32;
7019 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
7020 .getValue(1);
7021 }
7022 default:
7023 llvm_unreachable("Unexpected sign size");
7024 }
7025 }();
7026
7027 // Get the signbit at the right position for MagAsInt.
7028 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
7029 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
7030 SignAsInt,
7031 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
7032
7033 // Mask the sign bit and any bits above it. The extra bits will be dropped
7034 // when we convert back to FP.
7035 SDValue SignMask = DAG.getConstant(
7036 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
7037 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
7038
7039 // Transform Mag value to integer, and clear the sign bit.
7040 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
7041 SDValue ClearSignMask = DAG.getConstant(
7042 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
7043 SDValue ClearedSign =
7044 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
7045
7046 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
7048
7049 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
7050}
7051
7052/// Get a RISC-V target specified VL op for a given SDNode.
7053static unsigned getRISCVVLOp(SDValue Op) {
7054#define OP_CASE(NODE) \
7055 case ISD::NODE: \
7056 return RISCVISD::NODE##_VL;
7057#define VP_CASE(NODE) \
7058 case ISD::VP_##NODE: \
7059 return RISCVISD::NODE##_VL;
7060 // clang-format off
7061 switch (Op.getOpcode()) {
7062 default:
7063 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7064 OP_CASE(ADD)
7065 OP_CASE(SUB)
7066 OP_CASE(MUL)
7067 OP_CASE(MULHS)
7068 OP_CASE(MULHU)
7069 OP_CASE(SDIV)
7070 OP_CASE(SREM)
7071 OP_CASE(UDIV)
7072 OP_CASE(UREM)
7073 OP_CASE(SHL)
7074 OP_CASE(SRA)
7075 OP_CASE(SRL)
7076 OP_CASE(ROTL)
7077 OP_CASE(ROTR)
7078 OP_CASE(BSWAP)
7079 OP_CASE(CTTZ)
7080 OP_CASE(CTLZ)
7081 OP_CASE(CTPOP)
7082 OP_CASE(BITREVERSE)
7083 OP_CASE(SADDSAT)
7084 OP_CASE(UADDSAT)
7085 OP_CASE(SSUBSAT)
7086 OP_CASE(USUBSAT)
7087 OP_CASE(AVGFLOORS)
7088 OP_CASE(AVGFLOORU)
7089 OP_CASE(AVGCEILS)
7090 OP_CASE(AVGCEILU)
7091 OP_CASE(FADD)
7092 OP_CASE(FSUB)
7093 OP_CASE(FMUL)
7094 OP_CASE(FDIV)
7095 OP_CASE(FNEG)
7096 OP_CASE(FABS)
7097 OP_CASE(FCOPYSIGN)
7098 OP_CASE(FSQRT)
7099 OP_CASE(SMIN)
7100 OP_CASE(SMAX)
7101 OP_CASE(UMIN)
7102 OP_CASE(UMAX)
7103 OP_CASE(STRICT_FADD)
7104 OP_CASE(STRICT_FSUB)
7105 OP_CASE(STRICT_FMUL)
7106 OP_CASE(STRICT_FDIV)
7107 OP_CASE(STRICT_FSQRT)
7108 VP_CASE(ADD) // VP_ADD
7109 VP_CASE(SUB) // VP_SUB
7110 VP_CASE(MUL) // VP_MUL
7111 VP_CASE(SDIV) // VP_SDIV
7112 VP_CASE(SREM) // VP_SREM
7113 VP_CASE(UDIV) // VP_UDIV
7114 VP_CASE(UREM) // VP_UREM
7115 VP_CASE(SHL) // VP_SHL
7116 VP_CASE(FADD) // VP_FADD
7117 VP_CASE(FSUB) // VP_FSUB
7118 VP_CASE(FMUL) // VP_FMUL
7119 VP_CASE(FDIV) // VP_FDIV
7120 VP_CASE(FNEG) // VP_FNEG
7121 VP_CASE(FABS) // VP_FABS
7122 VP_CASE(SMIN) // VP_SMIN
7123 VP_CASE(SMAX) // VP_SMAX
7124 VP_CASE(UMIN) // VP_UMIN
7125 VP_CASE(UMAX) // VP_UMAX
7126 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7127 VP_CASE(SETCC) // VP_SETCC
7128 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7129 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7130 VP_CASE(BITREVERSE) // VP_BITREVERSE
7131 VP_CASE(SADDSAT) // VP_SADDSAT
7132 VP_CASE(UADDSAT) // VP_UADDSAT
7133 VP_CASE(SSUBSAT) // VP_SSUBSAT
7134 VP_CASE(USUBSAT) // VP_USUBSAT
7135 VP_CASE(BSWAP) // VP_BSWAP
7136 VP_CASE(CTLZ) // VP_CTLZ
7137 VP_CASE(CTTZ) // VP_CTTZ
7138 VP_CASE(CTPOP) // VP_CTPOP
7140 case ISD::VP_CTLZ_ZERO_UNDEF:
7141 return RISCVISD::CTLZ_VL;
7143 case ISD::VP_CTTZ_ZERO_UNDEF:
7144 return RISCVISD::CTTZ_VL;
7145 case ISD::FMA:
7146 case ISD::VP_FMA:
7147 return RISCVISD::VFMADD_VL;
7148 case ISD::STRICT_FMA:
7149 return RISCVISD::STRICT_VFMADD_VL;
7150 case ISD::AND:
7151 case ISD::VP_AND:
7152 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7153 return RISCVISD::VMAND_VL;
7154 return RISCVISD::AND_VL;
7155 case ISD::OR:
7156 case ISD::VP_OR:
7157 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7158 return RISCVISD::VMOR_VL;
7159 return RISCVISD::OR_VL;
7160 case ISD::XOR:
7161 case ISD::VP_XOR:
7162 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7163 return RISCVISD::VMXOR_VL;
7164 return RISCVISD::XOR_VL;
7165 case ISD::ANY_EXTEND:
7166 case ISD::ZERO_EXTEND:
7167 return RISCVISD::VZEXT_VL;
7168 case ISD::SIGN_EXTEND:
7169 return RISCVISD::VSEXT_VL;
7170 case ISD::SETCC:
7171 return RISCVISD::SETCC_VL;
7172 case ISD::VSELECT:
7173 return RISCVISD::VMERGE_VL;
7174 case ISD::VP_SELECT:
7175 case ISD::VP_MERGE:
7176 return RISCVISD::VMERGE_VL;
7177 case ISD::VP_SRA:
7178 return RISCVISD::SRA_VL;
7179 case ISD::VP_SRL:
7180 return RISCVISD::SRL_VL;
7181 case ISD::VP_SQRT:
7182 return RISCVISD::FSQRT_VL;
7183 case ISD::VP_SIGN_EXTEND:
7184 return RISCVISD::VSEXT_VL;
7185 case ISD::VP_ZERO_EXTEND:
7186 return RISCVISD::VZEXT_VL;
7187 case ISD::VP_FP_TO_SINT:
7188 return RISCVISD::VFCVT_RTZ_X_F_VL;
7189 case ISD::VP_FP_TO_UINT:
7190 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7191 case ISD::FMINNUM:
7192 case ISD::FMINIMUMNUM:
7193 case ISD::VP_FMINNUM:
7194 return RISCVISD::VFMIN_VL;
7195 case ISD::FMAXNUM:
7196 case ISD::FMAXIMUMNUM:
7197 case ISD::VP_FMAXNUM:
7198 return RISCVISD::VFMAX_VL;
7199 case ISD::LRINT:
7200 case ISD::VP_LRINT:
7201 case ISD::LLRINT:
7202 case ISD::VP_LLRINT:
7203 return RISCVISD::VFCVT_RM_X_F_VL;
7204 }
7205 // clang-format on
7206#undef OP_CASE
7207#undef VP_CASE
7208}
7209
7211 const RISCVSubtarget &Subtarget) {
7212 return (Op.getValueType() == MVT::nxv32f16 &&
7213 (Subtarget.hasVInstructionsF16Minimal() &&
7214 !Subtarget.hasVInstructionsF16())) ||
7215 Op.getValueType() == MVT::nxv32bf16;
7216}
7217
7219 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7220 SDLoc DL(Op);
7221
7222 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7223 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7224
7225 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7226 if (!Op.getOperand(j).getValueType().isVector()) {
7227 LoOperands[j] = Op.getOperand(j);
7228 HiOperands[j] = Op.getOperand(j);
7229 continue;
7230 }
7231 std::tie(LoOperands[j], HiOperands[j]) =
7232 DAG.SplitVector(Op.getOperand(j), DL);
7233 }
7234
7235 SDValue LoRes =
7236 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7237 SDValue HiRes =
7238 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7239
7240 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7241}
7242
7244 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7245 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7246 SDLoc DL(Op);
7247
7248 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7249 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7250
7251 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7252 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7253 std::tie(LoOperands[j], HiOperands[j]) =
7254 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7255 continue;
7256 }
7257 if (!Op.getOperand(j).getValueType().isVector()) {
7258 LoOperands[j] = Op.getOperand(j);
7259 HiOperands[j] = Op.getOperand(j);
7260 continue;
7261 }
7262 std::tie(LoOperands[j], HiOperands[j]) =
7263 DAG.SplitVector(Op.getOperand(j), DL);
7264 }
7265
7266 SDValue LoRes =
7267 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7268 SDValue HiRes =
7269 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7270
7271 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7272}
7273
7275 SDLoc DL(Op);
7276
7277 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7278 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7279 auto [EVLLo, EVLHi] =
7280 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7281
7282 SDValue ResLo =
7283 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7284 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7285 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7286 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7287}
7288
7290
7291 assert(Op->isStrictFPOpcode());
7292
7293 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7294
7295 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7296 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7297
7298 SDLoc DL(Op);
7299
7300 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7301 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7302
7303 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7304 if (!Op.getOperand(j).getValueType().isVector()) {
7305 LoOperands[j] = Op.getOperand(j);
7306 HiOperands[j] = Op.getOperand(j);
7307 continue;
7308 }
7309 std::tie(LoOperands[j], HiOperands[j]) =
7310 DAG.SplitVector(Op.getOperand(j), DL);
7311 }
7312
7313 SDValue LoRes =
7314 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7315 HiOperands[0] = LoRes.getValue(1);
7316 SDValue HiRes =
7317 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7318
7319 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7320 LoRes.getValue(0), HiRes.getValue(0));
7321 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7322}
7323
7324SDValue
7325RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7326 SelectionDAG &DAG) const {
7327 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7328 "Unexpected bfloat16 load lowering");
7329
7330 SDLoc DL(Op);
7331 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7332 EVT MemVT = LD->getMemoryVT();
7333 SDValue Load = DAG.getExtLoad(
7334 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7335 LD->getBasePtr(),
7337 LD->getMemOperand());
7338 // Using mask to make bf16 nan-boxing valid when we don't have flh
7339 // instruction. -65536 would be treat as a small number and thus it can be
7340 // directly used lui to get the constant.
7341 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7342 SDValue OrSixteenOne =
7343 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7344 SDValue ConvertedResult =
7345 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7346 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7347}
7348
7349SDValue
7350RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7351 SelectionDAG &DAG) const {
7352 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7353 "Unexpected bfloat16 store lowering");
7354
7355 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7356 SDLoc DL(Op);
7357 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7358 Subtarget.getXLenVT(), ST->getValue());
7359 return DAG.getTruncStore(
7360 ST->getChain(), DL, FMV, ST->getBasePtr(),
7361 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7362 ST->getMemOperand());
7363}
7364
7366 SelectionDAG &DAG) const {
7367 switch (Op.getOpcode()) {
7368 default:
7370 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7371 case ISD::PREFETCH:
7372 return LowerPREFETCH(Op, Subtarget, DAG);
7373 case ISD::ATOMIC_FENCE:
7374 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7375 case ISD::GlobalAddress:
7376 return lowerGlobalAddress(Op, DAG);
7377 case ISD::BlockAddress:
7378 return lowerBlockAddress(Op, DAG);
7379 case ISD::ConstantPool:
7380 return lowerConstantPool(Op, DAG);
7381 case ISD::JumpTable:
7382 return lowerJumpTable(Op, DAG);
7384 return lowerGlobalTLSAddress(Op, DAG);
7385 case ISD::Constant:
7386 return lowerConstant(Op, DAG, Subtarget);
7387 case ISD::ConstantFP:
7388 return lowerConstantFP(Op, DAG);
7389 case ISD::SELECT:
7390 return lowerSELECT(Op, DAG);
7391 case ISD::BRCOND:
7392 return lowerBRCOND(Op, DAG);
7393 case ISD::VASTART:
7394 return lowerVASTART(Op, DAG);
7395 case ISD::FRAMEADDR:
7396 return lowerFRAMEADDR(Op, DAG);
7397 case ISD::RETURNADDR:
7398 return lowerRETURNADDR(Op, DAG);
7399 case ISD::SHL_PARTS:
7400 return lowerShiftLeftParts(Op, DAG);
7401 case ISD::SRA_PARTS:
7402 return lowerShiftRightParts(Op, DAG, true);
7403 case ISD::SRL_PARTS:
7404 return lowerShiftRightParts(Op, DAG, false);
7405 case ISD::ROTL:
7406 case ISD::ROTR:
7407 if (Op.getValueType().isFixedLengthVector()) {
7408 assert(Subtarget.hasStdExtZvkb());
7409 return lowerToScalableOp(Op, DAG);
7410 }
7411 assert(Subtarget.hasVendorXTHeadBb() &&
7412 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7413 "Unexpected custom legalization");
7414 // XTHeadBb only supports rotate by constant.
7415 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7416 return SDValue();
7417 return Op;
7418 case ISD::BITCAST: {
7419 SDLoc DL(Op);
7420 EVT VT = Op.getValueType();
7421 SDValue Op0 = Op.getOperand(0);
7422 EVT Op0VT = Op0.getValueType();
7423 MVT XLenVT = Subtarget.getXLenVT();
7424 if (Op0VT == MVT::i16 &&
7425 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7426 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7427 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7428 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7429 }
7430 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7431 Subtarget.hasStdExtFOrZfinx()) {
7432 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7433 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7434 }
7435 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7436 Subtarget.hasStdExtDOrZdinx()) {
7437 SDValue Lo, Hi;
7438 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7439 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7440 }
7441
7442 // Consider other scalar<->scalar casts as legal if the types are legal.
7443 // Otherwise expand them.
7444 if (!VT.isVector() && !Op0VT.isVector()) {
7445 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7446 return Op;
7447 return SDValue();
7448 }
7449
7450 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7451 "Unexpected types");
7452
7453 if (VT.isFixedLengthVector()) {
7454 // We can handle fixed length vector bitcasts with a simple replacement
7455 // in isel.
7456 if (Op0VT.isFixedLengthVector())
7457 return Op;
7458 // When bitcasting from scalar to fixed-length vector, insert the scalar
7459 // into a one-element vector of the result type, and perform a vector
7460 // bitcast.
7461 if (!Op0VT.isVector()) {
7462 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7463 if (!isTypeLegal(BVT))
7464 return SDValue();
7465 return DAG.getBitcast(
7466 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7467 }
7468 return SDValue();
7469 }
7470 // Custom-legalize bitcasts from fixed-length vector types to scalar types
7471 // thus: bitcast the vector to a one-element vector type whose element type
7472 // is the same as the result type, and extract the first element.
7473 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7474 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7475 if (!isTypeLegal(BVT))
7476 return SDValue();
7477 SDValue BVec = DAG.getBitcast(BVT, Op0);
7478 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
7479 }
7480 return SDValue();
7481 }
7483 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7485 return LowerINTRINSIC_W_CHAIN(Op, DAG);
7487 return LowerINTRINSIC_VOID(Op, DAG);
7488 case ISD::IS_FPCLASS:
7489 return LowerIS_FPCLASS(Op, DAG);
7490 case ISD::BITREVERSE: {
7491 MVT VT = Op.getSimpleValueType();
7492 if (VT.isFixedLengthVector()) {
7493 assert(Subtarget.hasStdExtZvbb());
7494 return lowerToScalableOp(Op, DAG);
7495 }
7496 SDLoc DL(Op);
7497 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7498 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7499 // Expand bitreverse to a bswap(rev8) followed by brev8.
7500 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
7501 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
7502 }
7503 case ISD::TRUNCATE:
7506 // Only custom-lower vector truncates
7507 if (!Op.getSimpleValueType().isVector())
7508 return Op;
7509 return lowerVectorTruncLike(Op, DAG);
7510 case ISD::ANY_EXTEND:
7511 case ISD::ZERO_EXTEND:
7512 if (Op.getOperand(0).getValueType().isVector() &&
7513 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7514 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
7515 if (Op.getValueType().isScalableVector())
7516 return Op;
7517 return lowerToScalableOp(Op, DAG);
7518 case ISD::SIGN_EXTEND:
7519 if (Op.getOperand(0).getValueType().isVector() &&
7520 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7521 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
7522 if (Op.getValueType().isScalableVector())
7523 return Op;
7524 return lowerToScalableOp(Op, DAG);
7526 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7528 return lowerINSERT_VECTOR_ELT(Op, DAG);
7530 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7531 case ISD::SCALAR_TO_VECTOR: {
7532 MVT VT = Op.getSimpleValueType();
7533 SDLoc DL(Op);
7534 SDValue Scalar = Op.getOperand(0);
7535 if (VT.getVectorElementType() == MVT::i1) {
7536 MVT WideVT = VT.changeVectorElementType(MVT::i8);
7537 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
7538 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
7539 }
7540 MVT ContainerVT = VT;
7541 if (VT.isFixedLengthVector())
7542 ContainerVT = getContainerForFixedLengthVector(VT);
7543 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7544
7545 SDValue V;
7546 if (VT.isFloatingPoint()) {
7547 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
7548 DAG.getUNDEF(ContainerVT), Scalar, VL);
7549 } else {
7550 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
7551 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
7552 DAG.getUNDEF(ContainerVT), Scalar, VL);
7553 }
7554 if (VT.isFixedLengthVector())
7555 V = convertFromScalableVector(VT, V, DAG, Subtarget);
7556 return V;
7557 }
7558 case ISD::VSCALE: {
7559 MVT XLenVT = Subtarget.getXLenVT();
7560 MVT VT = Op.getSimpleValueType();
7561 SDLoc DL(Op);
7562 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7563 // We define our scalable vector types for lmul=1 to use a 64 bit known
7564 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7565 // vscale as VLENB / 8.
7566 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
7567 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7568 reportFatalInternalError("Support for VLEN==32 is incomplete.");
7569 // We assume VLENB is a multiple of 8. We manually choose the best shift
7570 // here because SimplifyDemandedBits isn't always able to simplify it.
7571 uint64_t Val = Op.getConstantOperandVal(0);
7572 if (isPowerOf2_64(Val)) {
7573 uint64_t Log2 = Log2_64(Val);
7574 if (Log2 < 3) {
7575 SDNodeFlags Flags;
7576 Flags.setExact(true);
7577 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7578 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
7579 } else if (Log2 > 3) {
7580 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
7581 DAG.getConstant(Log2 - 3, DL, XLenVT));
7582 }
7583 } else if ((Val % 8) == 0) {
7584 // If the multiplier is a multiple of 8, scale it down to avoid needing
7585 // to shift the VLENB value.
7586 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7587 DAG.getConstant(Val / 8, DL, XLenVT));
7588 } else {
7589 SDNodeFlags Flags;
7590 Flags.setExact(true);
7591 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7592 DAG.getConstant(3, DL, XLenVT), Flags);
7593 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7594 DAG.getConstant(Val, DL, XLenVT));
7595 }
7596 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7597 }
7598 case ISD::FPOWI: {
7599 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
7600 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7601 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7602 Op.getOperand(1).getValueType() == MVT::i32) {
7603 SDLoc DL(Op);
7604 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7605 SDValue Powi =
7606 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
7607 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
7608 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7609 }
7610 return SDValue();
7611 }
7612 case ISD::FMAXIMUM:
7613 case ISD::FMINIMUM:
7614 if (isPromotedOpNeedingSplit(Op, Subtarget))
7615 return SplitVectorOp(Op, DAG);
7616 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7617 case ISD::FP_EXTEND:
7618 case ISD::FP_ROUND:
7619 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7622 return lowerStrictFPExtendOrRoundLike(Op, DAG);
7623 case ISD::SINT_TO_FP:
7624 case ISD::UINT_TO_FP:
7625 if (Op.getValueType().isVector() &&
7626 ((Op.getValueType().getScalarType() == MVT::f16 &&
7627 (Subtarget.hasVInstructionsF16Minimal() &&
7628 !Subtarget.hasVInstructionsF16())) ||
7629 Op.getValueType().getScalarType() == MVT::bf16)) {
7630 if (isPromotedOpNeedingSplit(Op, Subtarget))
7631 return SplitVectorOp(Op, DAG);
7632 // int -> f32
7633 SDLoc DL(Op);
7634 MVT NVT =
7635 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7636 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7637 // f32 -> [b]f16
7638 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7639 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7640 }
7641 [[fallthrough]];
7642 case ISD::FP_TO_SINT:
7643 case ISD::FP_TO_UINT:
7644 if (SDValue Op1 = Op.getOperand(0);
7645 Op1.getValueType().isVector() &&
7646 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7647 (Subtarget.hasVInstructionsF16Minimal() &&
7648 !Subtarget.hasVInstructionsF16())) ||
7649 Op1.getValueType().getScalarType() == MVT::bf16)) {
7650 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7651 return SplitVectorOp(Op, DAG);
7652 // [b]f16 -> f32
7653 SDLoc DL(Op);
7654 MVT NVT = MVT::getVectorVT(MVT::f32,
7655 Op1.getValueType().getVectorElementCount());
7656 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7657 // f32 -> int
7658 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
7659 }
7660 [[fallthrough]];
7665 // RVV can only do fp<->int conversions to types half/double the size as
7666 // the source. We custom-lower any conversions that do two hops into
7667 // sequences.
7668 MVT VT = Op.getSimpleValueType();
7669 if (VT.isScalarInteger())
7670 return lowerFP_TO_INT(Op, DAG, Subtarget);
7671 bool IsStrict = Op->isStrictFPOpcode();
7672 SDValue Src = Op.getOperand(0 + IsStrict);
7673 MVT SrcVT = Src.getSimpleValueType();
7674 if (SrcVT.isScalarInteger())
7675 return lowerINT_TO_FP(Op, DAG, Subtarget);
7676 if (!VT.isVector())
7677 return Op;
7678 SDLoc DL(Op);
7679 MVT EltVT = VT.getVectorElementType();
7680 MVT SrcEltVT = SrcVT.getVectorElementType();
7681 unsigned EltSize = EltVT.getSizeInBits();
7682 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
7683 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
7684 "Unexpected vector element types");
7685
7686 bool IsInt2FP = SrcEltVT.isInteger();
7687 // Widening conversions
7688 if (EltSize > (2 * SrcEltSize)) {
7689 if (IsInt2FP) {
7690 // Do a regular integer sign/zero extension then convert to float.
7691 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
7693 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
7694 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
7697 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
7698 if (IsStrict)
7699 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
7700 Op.getOperand(0), Ext);
7701 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
7702 }
7703 // FP2Int
7704 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
7705 // Do one doubling fp_extend then complete the operation by converting
7706 // to int.
7707 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7708 if (IsStrict) {
7709 auto [FExt, Chain] =
7710 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
7711 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
7712 }
7713 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7714 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7715 }
7716
7717 // Narrowing conversions
7718 if (SrcEltSize > (2 * EltSize)) {
7719 if (IsInt2FP) {
7720 // One narrowing int_to_fp, then an fp_round.
7721 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7722 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7723 if (IsStrict) {
7724 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7725 DAG.getVTList(InterimFVT, MVT::Other),
7726 Op.getOperand(0), Src);
7727 SDValue Chain = Int2FP.getValue(1);
7728 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7729 }
7730 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7731 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7732 }
7733 // FP2Int
7734 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7735 // representable by the integer, the result is poison.
7736 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7738 if (IsStrict) {
7739 SDValue FP2Int =
7740 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7741 Op.getOperand(0), Src);
7742 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7743 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7744 }
7745 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7746 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7747 }
7748
7749 // Scalable vectors can exit here. Patterns will handle equally-sized
7750 // conversions halving/doubling ones.
7751 if (!VT.isFixedLengthVector())
7752 return Op;
7753
7754 // For fixed-length vectors we lower to a custom "VL" node.
7755 unsigned RVVOpc = 0;
7756 switch (Op.getOpcode()) {
7757 default:
7758 llvm_unreachable("Impossible opcode");
7759 case ISD::FP_TO_SINT:
7760 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
7761 break;
7762 case ISD::FP_TO_UINT:
7763 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
7764 break;
7765 case ISD::SINT_TO_FP:
7766 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7767 break;
7768 case ISD::UINT_TO_FP:
7769 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7770 break;
7772 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7773 break;
7775 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7776 break;
7778 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
7779 break;
7781 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
7782 break;
7783 }
7784
7785 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7786 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7787 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7788 "Expected same element count");
7789
7790 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7791
7792 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7793 if (IsStrict) {
7794 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7795 Op.getOperand(0), Src, Mask, VL);
7796 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7797 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7798 }
7799 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7800 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7801 }
7804 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7805 case ISD::FP_TO_BF16: {
7806 // Custom lower to ensure the libcall return is passed in an FPR on hard
7807 // float ABIs.
7808 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7809 SDLoc DL(Op);
7810 MakeLibCallOptions CallOptions;
7811 RTLIB::Libcall LC =
7812 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7813 SDValue Res =
7814 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7815 if (Subtarget.is64Bit())
7816 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7817 return DAG.getBitcast(MVT::i32, Res);
7818 }
7819 case ISD::BF16_TO_FP: {
7820 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7821 MVT VT = Op.getSimpleValueType();
7822 SDLoc DL(Op);
7823 Op = DAG.getNode(
7824 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7825 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7826 SDValue Res = Subtarget.is64Bit()
7827 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7828 : DAG.getBitcast(MVT::f32, Op);
7829 // fp_extend if the target VT is bigger than f32.
7830 if (VT != MVT::f32)
7831 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7832 return Res;
7833 }
7834 case ISD::STRICT_FP_TO_FP16:
7835 case ISD::FP_TO_FP16: {
7836 // Custom lower to ensure the libcall return is passed in an FPR on hard
7837 // float ABIs.
7838 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7839 SDLoc DL(Op);
7840 MakeLibCallOptions CallOptions;
7841 bool IsStrict = Op->isStrictFPOpcode();
7842 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7843 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7844 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7845 SDValue Res;
7846 std::tie(Res, Chain) =
7847 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7848 if (Subtarget.is64Bit())
7849 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7850 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7851 if (IsStrict)
7852 return DAG.getMergeValues({Result, Chain}, DL);
7853 return Result;
7854 }
7855 case ISD::STRICT_FP16_TO_FP:
7856 case ISD::FP16_TO_FP: {
7857 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7858 // float ABIs.
7859 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7860 SDLoc DL(Op);
7861 MakeLibCallOptions CallOptions;
7862 bool IsStrict = Op->isStrictFPOpcode();
7863 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7864 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7865 SDValue Arg = Subtarget.is64Bit()
7866 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7867 : DAG.getBitcast(MVT::f32, Op0);
7868 SDValue Res;
7869 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7870 CallOptions, DL, Chain);
7871 if (IsStrict)
7872 return DAG.getMergeValues({Res, Chain}, DL);
7873 return Res;
7874 }
7875 case ISD::FTRUNC:
7876 case ISD::FCEIL:
7877 case ISD::FFLOOR:
7878 case ISD::FNEARBYINT:
7879 case ISD::FRINT:
7880 case ISD::FROUND:
7881 case ISD::FROUNDEVEN:
7882 if (isPromotedOpNeedingSplit(Op, Subtarget))
7883 return SplitVectorOp(Op, DAG);
7884 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7885 case ISD::LRINT:
7886 case ISD::LLRINT:
7887 case ISD::LROUND:
7888 case ISD::LLROUND: {
7889 if (Op.getValueType().isVector())
7890 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
7891 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7892 "Unexpected custom legalisation");
7893 SDLoc DL(Op);
7894 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7895 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7896 }
7897 case ISD::STRICT_LRINT:
7898 case ISD::STRICT_LLRINT:
7899 case ISD::STRICT_LROUND:
7900 case ISD::STRICT_LLROUND: {
7901 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7902 "Unexpected custom legalisation");
7903 SDLoc DL(Op);
7904 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7905 {Op.getOperand(0), Op.getOperand(1)});
7906 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7907 {Ext.getValue(1), Ext.getValue(0)});
7908 }
7909 case ISD::VECREDUCE_ADD:
7910 case ISD::VECREDUCE_UMAX:
7911 case ISD::VECREDUCE_SMAX:
7912 case ISD::VECREDUCE_UMIN:
7913 case ISD::VECREDUCE_SMIN:
7914 return lowerVECREDUCE(Op, DAG);
7915 case ISD::VECREDUCE_AND:
7916 case ISD::VECREDUCE_OR:
7917 case ISD::VECREDUCE_XOR:
7918 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7919 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7920 return lowerVECREDUCE(Op, DAG);
7921 case ISD::VECREDUCE_FADD:
7922 case ISD::VECREDUCE_SEQ_FADD:
7923 case ISD::VECREDUCE_FMIN:
7924 case ISD::VECREDUCE_FMAX:
7925 case ISD::VECREDUCE_FMAXIMUM:
7926 case ISD::VECREDUCE_FMINIMUM:
7927 return lowerFPVECREDUCE(Op, DAG);
7928 case ISD::VP_REDUCE_ADD:
7929 case ISD::VP_REDUCE_UMAX:
7930 case ISD::VP_REDUCE_SMAX:
7931 case ISD::VP_REDUCE_UMIN:
7932 case ISD::VP_REDUCE_SMIN:
7933 case ISD::VP_REDUCE_FADD:
7934 case ISD::VP_REDUCE_SEQ_FADD:
7935 case ISD::VP_REDUCE_FMIN:
7936 case ISD::VP_REDUCE_FMAX:
7937 case ISD::VP_REDUCE_FMINIMUM:
7938 case ISD::VP_REDUCE_FMAXIMUM:
7939 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7940 return SplitVectorReductionOp(Op, DAG);
7941 return lowerVPREDUCE(Op, DAG);
7942 case ISD::VP_REDUCE_AND:
7943 case ISD::VP_REDUCE_OR:
7944 case ISD::VP_REDUCE_XOR:
7945 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7946 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7947 return lowerVPREDUCE(Op, DAG);
7948 case ISD::VP_CTTZ_ELTS:
7949 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7950 return lowerVPCttzElements(Op, DAG);
7951 case ISD::UNDEF: {
7952 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7953 return convertFromScalableVector(Op.getSimpleValueType(),
7954 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7955 }
7957 return lowerINSERT_SUBVECTOR(Op, DAG);
7959 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7961 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7963 return lowerVECTOR_INTERLEAVE(Op, DAG);
7964 case ISD::STEP_VECTOR:
7965 return lowerSTEP_VECTOR(Op, DAG);
7967 return lowerVECTOR_REVERSE(Op, DAG);
7968 case ISD::VECTOR_SPLICE:
7969 return lowerVECTOR_SPLICE(Op, DAG);
7970 case ISD::BUILD_VECTOR: {
7971 MVT VT = Op.getSimpleValueType();
7972 MVT EltVT = VT.getVectorElementType();
7973 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
7974 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
7975 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7976 }
7977 case ISD::SPLAT_VECTOR: {
7978 MVT VT = Op.getSimpleValueType();
7979 MVT EltVT = VT.getVectorElementType();
7980 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7981 EltVT == MVT::bf16) {
7982 SDLoc DL(Op);
7983 SDValue Elt;
7984 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7985 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7986 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7987 Op.getOperand(0));
7988 else
7989 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7990 MVT IVT = VT.changeVectorElementType(MVT::i16);
7991 return DAG.getNode(ISD::BITCAST, DL, VT,
7992 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7993 }
7994
7995 if (EltVT == MVT::i1)
7996 return lowerVectorMaskSplat(Op, DAG);
7997 return SDValue();
7998 }
8000 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8001 case ISD::CONCAT_VECTORS: {
8002 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
8003 // better than going through the stack, as the default expansion does.
8004 SDLoc DL(Op);
8005 MVT VT = Op.getSimpleValueType();
8006 MVT ContainerVT = VT;
8007 if (VT.isFixedLengthVector())
8008 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
8009
8010 // Recursively split concat_vectors with more than 2 operands:
8011 //
8012 // concat_vector op1, op2, op3, op4
8013 // ->
8014 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
8015 //
8016 // This reduces the length of the chain of vslideups and allows us to
8017 // perform the vslideups at a smaller LMUL, limited to MF2.
8018 if (Op.getNumOperands() > 2 &&
8019 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
8020 MVT HalfVT = VT.getHalfNumVectorElementsVT();
8021 assert(isPowerOf2_32(Op.getNumOperands()));
8022 size_t HalfNumOps = Op.getNumOperands() / 2;
8023 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8024 Op->ops().take_front(HalfNumOps));
8025 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8026 Op->ops().drop_front(HalfNumOps));
8027 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8028 }
8029
8030 unsigned NumOpElts =
8031 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
8032 SDValue Vec = DAG.getUNDEF(VT);
8033 for (const auto &OpIdx : enumerate(Op->ops())) {
8034 SDValue SubVec = OpIdx.value();
8035 // Don't insert undef subvectors.
8036 if (SubVec.isUndef())
8037 continue;
8038 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
8039 }
8040 return Vec;
8041 }
8042 case ISD::LOAD: {
8043 auto *Load = cast<LoadSDNode>(Op);
8044 EVT VT = Load->getValueType(0);
8045 if (VT == MVT::f64) {
8046 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8047 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8048
8049 // Replace a double precision load with two i32 loads and a BuildPairF64.
8050 SDLoc DL(Op);
8051 SDValue BasePtr = Load->getBasePtr();
8052 SDValue Chain = Load->getChain();
8053
8054 SDValue Lo =
8055 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
8056 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8057 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8058 SDValue Hi = DAG.getLoad(
8059 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
8060 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8061 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8062 Hi.getValue(1));
8063
8064 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8065 return DAG.getMergeValues({Pair, Chain}, DL);
8066 }
8067
8068 if (VT == MVT::bf16)
8069 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8070
8071 // Handle normal vector tuple load.
8072 if (VT.isRISCVVectorTuple()) {
8073 SDLoc DL(Op);
8074 MVT XLenVT = Subtarget.getXLenVT();
8075 unsigned NF = VT.getRISCVVectorTupleNumFields();
8076 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8077 unsigned NumElts = Sz / (NF * 8);
8078 int Log2LMUL = Log2_64(NumElts) - 3;
8079
8080 auto Flag = SDNodeFlags();
8081 Flag.setNoUnsignedWrap(true);
8082 SDValue Ret = DAG.getUNDEF(VT);
8083 SDValue BasePtr = Load->getBasePtr();
8084 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8085 VROffset =
8086 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8087 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8088 SmallVector<SDValue, 8> OutChains;
8089
8090 // Load NF vector registers and combine them to a vector tuple.
8091 for (unsigned i = 0; i < NF; ++i) {
8092 SDValue LoadVal = DAG.getLoad(
8093 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8094 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8095 OutChains.push_back(LoadVal.getValue(1));
8096 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8097 DAG.getTargetConstant(i, DL, MVT::i32));
8098 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8099 }
8100 return DAG.getMergeValues(
8101 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8102 }
8103
8104 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8105 return V;
8106 if (Op.getValueType().isFixedLengthVector())
8107 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8108 return Op;
8109 }
8110 case ISD::STORE: {
8111 auto *Store = cast<StoreSDNode>(Op);
8112 SDValue StoredVal = Store->getValue();
8113 EVT VT = StoredVal.getValueType();
8114 if (VT == MVT::f64) {
8115 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8116 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8117
8118 // Replace a double precision store with a SplitF64 and i32 stores.
8119 SDValue DL(Op);
8120 SDValue BasePtr = Store->getBasePtr();
8121 SDValue Chain = Store->getChain();
8122 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8123 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8124
8125 SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
8126 Store->getPointerInfo(), Store->getBaseAlign(),
8127 Store->getMemOperand()->getFlags());
8128 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8129 SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
8130 Store->getPointerInfo().getWithOffset(4),
8131 Store->getBaseAlign(),
8132 Store->getMemOperand()->getFlags());
8133 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8134 }
8135 if (VT == MVT::i64) {
8136 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8137 "Unexpected custom legalisation");
8138 if (Store->isTruncatingStore())
8139 return SDValue();
8140
8141 if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
8142 return SDValue();
8143
8144 SDLoc DL(Op);
8145 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8146 DAG.getTargetConstant(0, DL, MVT::i32));
8147 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8148 DAG.getTargetConstant(1, DL, MVT::i32));
8149
8150 return DAG.getMemIntrinsicNode(
8151 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8152 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8153 Store->getMemOperand());
8154 }
8155
8156 if (VT == MVT::bf16)
8157 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8158
8159 // Handle normal vector tuple store.
8160 if (VT.isRISCVVectorTuple()) {
8161 SDLoc DL(Op);
8162 MVT XLenVT = Subtarget.getXLenVT();
8163 unsigned NF = VT.getRISCVVectorTupleNumFields();
8164 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8165 unsigned NumElts = Sz / (NF * 8);
8166 int Log2LMUL = Log2_64(NumElts) - 3;
8167
8168 auto Flag = SDNodeFlags();
8169 Flag.setNoUnsignedWrap(true);
8170 SDValue Ret;
8171 SDValue Chain = Store->getChain();
8172 SDValue BasePtr = Store->getBasePtr();
8173 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8174 VROffset =
8175 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8176 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8177
8178 // Extract subregisters in a vector tuple and store them individually.
8179 for (unsigned i = 0; i < NF; ++i) {
8180 auto Extract =
8181 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8182 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8183 DAG.getTargetConstant(i, DL, MVT::i32));
8184 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8185 MachinePointerInfo(Store->getAddressSpace()),
8186 Store->getBaseAlign(),
8187 Store->getMemOperand()->getFlags());
8188 Chain = Ret.getValue(0);
8189 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8190 }
8191 return Ret;
8192 }
8193
8194 if (auto V = expandUnalignedRVVStore(Op, DAG))
8195 return V;
8196 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8197 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8198 return Op;
8199 }
8200 case ISD::MLOAD:
8201 case ISD::VP_LOAD:
8202 return lowerMaskedLoad(Op, DAG);
8203 case ISD::VP_LOAD_FF:
8204 return lowerLoadFF(Op, DAG);
8205 case ISD::MSTORE:
8206 case ISD::VP_STORE:
8207 return lowerMaskedStore(Op, DAG);
8209 return lowerVectorCompress(Op, DAG);
8210 case ISD::SELECT_CC: {
8211 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8212 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8213 // into separate SETCC+SELECT just like LegalizeDAG.
8214 SDValue Tmp1 = Op.getOperand(0);
8215 SDValue Tmp2 = Op.getOperand(1);
8216 SDValue True = Op.getOperand(2);
8217 SDValue False = Op.getOperand(3);
8218 EVT VT = Op.getValueType();
8219 SDValue CC = Op.getOperand(4);
8220 EVT CmpVT = Tmp1.getValueType();
8221 EVT CCVT =
8222 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8223 SDLoc DL(Op);
8224 SDValue Cond =
8225 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8226 return DAG.getSelect(DL, VT, Cond, True, False);
8227 }
8228 case ISD::SETCC: {
8229 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8230 if (OpVT.isScalarInteger()) {
8231 MVT VT = Op.getSimpleValueType();
8232 SDValue LHS = Op.getOperand(0);
8233 SDValue RHS = Op.getOperand(1);
8234 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8235 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8236 "Unexpected CondCode");
8237
8238 SDLoc DL(Op);
8239
8240 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8241 // convert this to the equivalent of (set(u)ge X, C+1) by using
8242 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8243 // in a register.
8244 if (isa<ConstantSDNode>(RHS)) {
8245 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8246 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8247 // If this is an unsigned compare and the constant is -1, incrementing
8248 // the constant would change behavior. The result should be false.
8249 if (CCVal == ISD::SETUGT && Imm == -1)
8250 return DAG.getConstant(0, DL, VT);
8251 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8252 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8253 SDValue SetCC = DAG.getSetCC(
8254 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8255 return DAG.getLogicalNOT(DL, SetCC, VT);
8256 }
8257 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8258 if (CCVal == ISD::SETUGT && Imm == 2047) {
8259 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8260 DAG.getShiftAmountConstant(11, OpVT, DL));
8261 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8262 ISD::SETNE);
8263 }
8264 }
8265
8266 // Not a constant we could handle, swap the operands and condition code to
8267 // SETLT/SETULT.
8268 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8269 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8270 }
8271
8272 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8273 return SplitVectorOp(Op, DAG);
8274
8275 return lowerToScalableOp(Op, DAG);
8276 }
8277 case ISD::ADD:
8278 case ISD::SUB:
8279 case ISD::MUL:
8280 case ISD::MULHS:
8281 case ISD::MULHU:
8282 case ISD::AND:
8283 case ISD::OR:
8284 case ISD::XOR:
8285 case ISD::SDIV:
8286 case ISD::SREM:
8287 case ISD::UDIV:
8288 case ISD::UREM:
8289 case ISD::BSWAP:
8290 case ISD::CTPOP:
8291 case ISD::VSELECT:
8292 return lowerToScalableOp(Op, DAG);
8293 case ISD::SHL:
8294 case ISD::SRA:
8295 case ISD::SRL:
8296 if (Op.getSimpleValueType().isFixedLengthVector())
8297 return lowerToScalableOp(Op, DAG);
8298 // This can be called for an i32 shift amount that needs to be promoted.
8299 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8300 "Unexpected custom legalisation");
8301 return SDValue();
8302 case ISD::FABS:
8303 case ISD::FNEG:
8304 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8305 return lowerFABSorFNEG(Op, DAG, Subtarget);
8306 [[fallthrough]];
8307 case ISD::FADD:
8308 case ISD::FSUB:
8309 case ISD::FMUL:
8310 case ISD::FDIV:
8311 case ISD::FSQRT:
8312 case ISD::FMA:
8313 case ISD::FMINNUM:
8314 case ISD::FMAXNUM:
8315 case ISD::FMINIMUMNUM:
8316 case ISD::FMAXIMUMNUM:
8317 if (isPromotedOpNeedingSplit(Op, Subtarget))
8318 return SplitVectorOp(Op, DAG);
8319 [[fallthrough]];
8320 case ISD::AVGFLOORS:
8321 case ISD::AVGFLOORU:
8322 case ISD::AVGCEILS:
8323 case ISD::AVGCEILU:
8324 case ISD::SMIN:
8325 case ISD::SMAX:
8326 case ISD::UMIN:
8327 case ISD::UMAX:
8328 case ISD::UADDSAT:
8329 case ISD::USUBSAT:
8330 case ISD::SADDSAT:
8331 case ISD::SSUBSAT:
8332 return lowerToScalableOp(Op, DAG);
8333 case ISD::ABDS:
8334 case ISD::ABDU: {
8335 SDLoc dl(Op);
8336 EVT VT = Op->getValueType(0);
8337 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8338 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8339 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8340
8341 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8342 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8343 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8344 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8345 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8346 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8347 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8348 }
8349 case ISD::ABS:
8350 case ISD::VP_ABS:
8351 return lowerABS(Op, DAG);
8352 case ISD::CTLZ:
8354 case ISD::CTTZ:
8356 if (Subtarget.hasStdExtZvbb())
8357 return lowerToScalableOp(Op, DAG);
8358 assert(Op.getOpcode() != ISD::CTTZ);
8359 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8360 case ISD::FCOPYSIGN:
8361 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8362 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8363 if (isPromotedOpNeedingSplit(Op, Subtarget))
8364 return SplitVectorOp(Op, DAG);
8365 return lowerToScalableOp(Op, DAG);
8366 case ISD::STRICT_FADD:
8367 case ISD::STRICT_FSUB:
8368 case ISD::STRICT_FMUL:
8369 case ISD::STRICT_FDIV:
8370 case ISD::STRICT_FSQRT:
8371 case ISD::STRICT_FMA:
8372 if (isPromotedOpNeedingSplit(Op, Subtarget))
8373 return SplitStrictFPVectorOp(Op, DAG);
8374 return lowerToScalableOp(Op, DAG);
8375 case ISD::STRICT_FSETCC:
8377 return lowerVectorStrictFSetcc(Op, DAG);
8378 case ISD::STRICT_FCEIL:
8379 case ISD::STRICT_FRINT:
8380 case ISD::STRICT_FFLOOR:
8381 case ISD::STRICT_FTRUNC:
8383 case ISD::STRICT_FROUND:
8385 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8386 case ISD::MGATHER:
8387 case ISD::VP_GATHER:
8388 return lowerMaskedGather(Op, DAG);
8389 case ISD::MSCATTER:
8390 case ISD::VP_SCATTER:
8391 return lowerMaskedScatter(Op, DAG);
8392 case ISD::GET_ROUNDING:
8393 return lowerGET_ROUNDING(Op, DAG);
8394 case ISD::SET_ROUNDING:
8395 return lowerSET_ROUNDING(Op, DAG);
8396 case ISD::GET_FPENV:
8397 return lowerGET_FPENV(Op, DAG);
8398 case ISD::SET_FPENV:
8399 return lowerSET_FPENV(Op, DAG);
8400 case ISD::RESET_FPENV:
8401 return lowerRESET_FPENV(Op, DAG);
8402 case ISD::GET_FPMODE:
8403 return lowerGET_FPMODE(Op, DAG);
8404 case ISD::SET_FPMODE:
8405 return lowerSET_FPMODE(Op, DAG);
8406 case ISD::RESET_FPMODE:
8407 return lowerRESET_FPMODE(Op, DAG);
8408 case ISD::EH_DWARF_CFA:
8409 return lowerEH_DWARF_CFA(Op, DAG);
8410 case ISD::VP_MERGE:
8411 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8412 return lowerVPMergeMask(Op, DAG);
8413 [[fallthrough]];
8414 case ISD::VP_SELECT:
8415 case ISD::VP_ADD:
8416 case ISD::VP_SUB:
8417 case ISD::VP_MUL:
8418 case ISD::VP_SDIV:
8419 case ISD::VP_UDIV:
8420 case ISD::VP_SREM:
8421 case ISD::VP_UREM:
8422 case ISD::VP_UADDSAT:
8423 case ISD::VP_USUBSAT:
8424 case ISD::VP_SADDSAT:
8425 case ISD::VP_SSUBSAT:
8426 case ISD::VP_LRINT:
8427 case ISD::VP_LLRINT:
8428 return lowerVPOp(Op, DAG);
8429 case ISD::VP_AND:
8430 case ISD::VP_OR:
8431 case ISD::VP_XOR:
8432 return lowerLogicVPOp(Op, DAG);
8433 case ISD::VP_FADD:
8434 case ISD::VP_FSUB:
8435 case ISD::VP_FMUL:
8436 case ISD::VP_FDIV:
8437 case ISD::VP_FNEG:
8438 case ISD::VP_FABS:
8439 case ISD::VP_SQRT:
8440 case ISD::VP_FMA:
8441 case ISD::VP_FMINNUM:
8442 case ISD::VP_FMAXNUM:
8443 case ISD::VP_FCOPYSIGN:
8444 if (isPromotedOpNeedingSplit(Op, Subtarget))
8445 return SplitVPOp(Op, DAG);
8446 [[fallthrough]];
8447 case ISD::VP_SRA:
8448 case ISD::VP_SRL:
8449 case ISD::VP_SHL:
8450 return lowerVPOp(Op, DAG);
8451 case ISD::VP_IS_FPCLASS:
8452 return LowerIS_FPCLASS(Op, DAG);
8453 case ISD::VP_SIGN_EXTEND:
8454 case ISD::VP_ZERO_EXTEND:
8455 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8456 return lowerVPExtMaskOp(Op, DAG);
8457 return lowerVPOp(Op, DAG);
8458 case ISD::VP_TRUNCATE:
8459 return lowerVectorTruncLike(Op, DAG);
8460 case ISD::VP_FP_EXTEND:
8461 case ISD::VP_FP_ROUND:
8462 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8463 case ISD::VP_SINT_TO_FP:
8464 case ISD::VP_UINT_TO_FP:
8465 if (Op.getValueType().isVector() &&
8466 ((Op.getValueType().getScalarType() == MVT::f16 &&
8467 (Subtarget.hasVInstructionsF16Minimal() &&
8468 !Subtarget.hasVInstructionsF16())) ||
8469 Op.getValueType().getScalarType() == MVT::bf16)) {
8470 if (isPromotedOpNeedingSplit(Op, Subtarget))
8471 return SplitVectorOp(Op, DAG);
8472 // int -> f32
8473 SDLoc DL(Op);
8474 MVT NVT =
8475 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8476 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8477 // f32 -> [b]f16
8478 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8479 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8480 }
8481 [[fallthrough]];
8482 case ISD::VP_FP_TO_SINT:
8483 case ISD::VP_FP_TO_UINT:
8484 if (SDValue Op1 = Op.getOperand(0);
8485 Op1.getValueType().isVector() &&
8486 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8487 (Subtarget.hasVInstructionsF16Minimal() &&
8488 !Subtarget.hasVInstructionsF16())) ||
8489 Op1.getValueType().getScalarType() == MVT::bf16)) {
8490 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8491 return SplitVectorOp(Op, DAG);
8492 // [b]f16 -> f32
8493 SDLoc DL(Op);
8494 MVT NVT = MVT::getVectorVT(MVT::f32,
8495 Op1.getValueType().getVectorElementCount());
8496 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8497 // f32 -> int
8498 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
8499 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
8500 }
8501 return lowerVPFPIntConvOp(Op, DAG);
8502 case ISD::VP_SETCC:
8503 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8504 return SplitVPOp(Op, DAG);
8505 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8506 return lowerVPSetCCMaskOp(Op, DAG);
8507 [[fallthrough]];
8508 case ISD::VP_SMIN:
8509 case ISD::VP_SMAX:
8510 case ISD::VP_UMIN:
8511 case ISD::VP_UMAX:
8512 case ISD::VP_BITREVERSE:
8513 case ISD::VP_BSWAP:
8514 return lowerVPOp(Op, DAG);
8515 case ISD::VP_CTLZ:
8516 case ISD::VP_CTLZ_ZERO_UNDEF:
8517 if (Subtarget.hasStdExtZvbb())
8518 return lowerVPOp(Op, DAG);
8519 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8520 case ISD::VP_CTTZ:
8521 case ISD::VP_CTTZ_ZERO_UNDEF:
8522 if (Subtarget.hasStdExtZvbb())
8523 return lowerVPOp(Op, DAG);
8524 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8525 case ISD::VP_CTPOP:
8526 return lowerVPOp(Op, DAG);
8527 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8528 return lowerVPStridedLoad(Op, DAG);
8529 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8530 return lowerVPStridedStore(Op, DAG);
8531 case ISD::VP_FCEIL:
8532 case ISD::VP_FFLOOR:
8533 case ISD::VP_FRINT:
8534 case ISD::VP_FNEARBYINT:
8535 case ISD::VP_FROUND:
8536 case ISD::VP_FROUNDEVEN:
8537 case ISD::VP_FROUNDTOZERO:
8538 if (isPromotedOpNeedingSplit(Op, Subtarget))
8539 return SplitVPOp(Op, DAG);
8540 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8541 case ISD::VP_FMAXIMUM:
8542 case ISD::VP_FMINIMUM:
8543 if (isPromotedOpNeedingSplit(Op, Subtarget))
8544 return SplitVPOp(Op, DAG);
8545 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8546 case ISD::EXPERIMENTAL_VP_SPLICE:
8547 return lowerVPSpliceExperimental(Op, DAG);
8548 case ISD::EXPERIMENTAL_VP_REVERSE:
8549 return lowerVPReverseExperimental(Op, DAG);
8550 case ISD::EXPERIMENTAL_VP_SPLAT:
8551 return lowerVPSplatExperimental(Op, DAG);
8552 case ISD::CLEAR_CACHE: {
8553 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8554 "llvm.clear_cache only needs custom lower on Linux targets");
8555 SDLoc DL(Op);
8556 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8557 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
8558 Op.getOperand(2), Flags, DL);
8559 }
8560 case ISD::DYNAMIC_STACKALLOC:
8561 return lowerDYNAMIC_STACKALLOC(Op, DAG);
8562 case ISD::INIT_TRAMPOLINE:
8563 return lowerINIT_TRAMPOLINE(Op, DAG);
8564 case ISD::ADJUST_TRAMPOLINE:
8565 return lowerADJUST_TRAMPOLINE(Op, DAG);
8566 case ISD::PARTIAL_REDUCE_UMLA:
8567 case ISD::PARTIAL_REDUCE_SMLA:
8568 case ISD::PARTIAL_REDUCE_SUMLA:
8569 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
8570 }
8571}
8572
8573SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
8574 SDValue Start, SDValue End,
8575 SDValue Flags, SDLoc DL) const {
8576 MakeLibCallOptions CallOptions;
8577 std::pair<SDValue, SDValue> CallResult =
8578 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
8579 {Start, End, Flags}, CallOptions, DL, InChain);
8580
8581 // This function returns void so only the out chain matters.
8582 return CallResult.second;
8583}
8584
8585SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
8586 SelectionDAG &DAG) const {
8587 if (!Subtarget.is64Bit())
8588 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8589
8590 // Create an MCCodeEmitter to encode instructions.
8591 TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
8592 assert(TLO);
8593 MCContext &MCCtx = TLO->getContext();
8594
8595 std::unique_ptr<MCCodeEmitter> CodeEmitter(
8596 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
8597
8598 SDValue Root = Op.getOperand(0);
8599 SDValue Trmp = Op.getOperand(1); // trampoline
8600 SDLoc dl(Op);
8601
8602 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
8603
8604 // We store in the trampoline buffer the following instructions and data.
8605 // Offset:
8606 // 0: auipc t2, 0
8607 // 4: ld t0, 24(t2)
8608 // 8: ld t2, 16(t2)
8609 // 12: jalr t0
8610 // 16: <StaticChainOffset>
8611 // 24: <FunctionAddressOffset>
8612 // 32:
8613 // Offset with branch control flow protection enabled:
8614 // 0: lpad <imm20>
8615 // 4: auipc t3, 0
8616 // 8: ld t2, 28(t3)
8617 // 12: ld t3, 20(t3)
8618 // 16: jalr t2
8619 // 20: <StaticChainOffset>
8620 // 28: <FunctionAddressOffset>
8621 // 36:
8622
8623 const bool HasCFBranch =
8624 Subtarget.hasStdExtZicfilp() &&
8626 "cf-protection-branch");
8627 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
8628 const unsigned StaticChainOffset = StaticChainIdx * 4;
8629 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
8630
8631 const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
8632 assert(STI);
8633 auto GetEncoding = [&](const MCInst &MC) {
8636 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
8637 uint32_t Encoding = support::endian::read32le(CB.data());
8638 return Encoding;
8639 };
8640
8641 SmallVector<SDValue> OutChains;
8642
8643 SmallVector<uint32_t> Encodings;
8644 if (!HasCFBranch) {
8645 Encodings.append(
8646 {// auipc t2, 0
8647 // Loads the current PC into t2.
8648 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
8649 // ld t0, 24(t2)
8650 // Loads the function address into t0. Note that we are using offsets
8651 // pc-relative to the first instruction of the trampoline.
8652 GetEncoding(MCInstBuilder(RISCV::LD)
8653 .addReg(RISCV::X5)
8654 .addReg(RISCV::X7)
8655 .addImm(FunctionAddressOffset)),
8656 // ld t2, 16(t2)
8657 // Load the value of the static chain.
8658 GetEncoding(MCInstBuilder(RISCV::LD)
8659 .addReg(RISCV::X7)
8660 .addReg(RISCV::X7)
8661 .addImm(StaticChainOffset)),
8662 // jalr t0
8663 // Jump to the function.
8664 GetEncoding(MCInstBuilder(RISCV::JALR)
8665 .addReg(RISCV::X0)
8666 .addReg(RISCV::X5)
8667 .addImm(0))});
8668 } else {
8669 Encodings.append(
8670 {// auipc x0, <imm20> (lpad <imm20>)
8671 // Landing pad.
8672 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
8673 // auipc t3, 0
8674 // Loads the current PC into t3.
8675 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
8676 // ld t2, (FunctionAddressOffset - 4)(t3)
8677 // Loads the function address into t2. Note that we are using offsets
8678 // pc-relative to the SECOND instruction of the trampoline.
8679 GetEncoding(MCInstBuilder(RISCV::LD)
8680 .addReg(RISCV::X7)
8681 .addReg(RISCV::X28)
8682 .addImm(FunctionAddressOffset - 4)),
8683 // ld t3, (StaticChainOffset - 4)(t3)
8684 // Load the value of the static chain.
8685 GetEncoding(MCInstBuilder(RISCV::LD)
8686 .addReg(RISCV::X28)
8687 .addReg(RISCV::X28)
8688 .addImm(StaticChainOffset - 4)),
8689 // jalr t2
8690 // Software-guarded jump to the function.
8691 GetEncoding(MCInstBuilder(RISCV::JALR)
8692 .addReg(RISCV::X0)
8693 .addReg(RISCV::X7)
8694 .addImm(0))});
8695 }
8696
8697 // Store encoded instructions.
8698 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
8699 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8700 DAG.getConstant(Idx * 4, dl, MVT::i64))
8701 : Trmp;
8702 OutChains.push_back(DAG.getTruncStore(
8703 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
8704 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
8705 }
8706
8707 // Now store the variable part of the trampoline.
8708 SDValue FunctionAddress = Op.getOperand(2);
8709 SDValue StaticChain = Op.getOperand(3);
8710
8711 // Store the given static chain and function pointer in the trampoline buffer.
8712 struct OffsetValuePair {
8713 const unsigned Offset;
8714 const SDValue Value;
8715 SDValue Addr = SDValue(); // Used to cache the address.
8716 } OffsetValues[] = {
8717 {StaticChainOffset, StaticChain},
8718 {FunctionAddressOffset, FunctionAddress},
8719 };
8720 for (auto &OffsetValue : OffsetValues) {
8721 SDValue Addr =
8722 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8723 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
8724 OffsetValue.Addr = Addr;
8725 OutChains.push_back(
8726 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
8727 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
8728 }
8729
8730 assert(OutChains.size() == StaticChainIdx + 2 &&
8731 "Size of OutChains mismatch");
8732 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
8733
8734 // The end of instructions of trampoline is the same as the static chain
8735 // address that we computed earlier.
8736 SDValue EndOfTrmp = OffsetValues[0].Addr;
8737
8738 // Call clear cache on the trampoline instructions.
8739 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
8740 Trmp, EndOfTrmp);
8741
8742 return Chain;
8743}
8744
8745SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
8746 SelectionDAG &DAG) const {
8747 if (!Subtarget.is64Bit())
8748 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8749
8750 return Op.getOperand(0);
8751}
8752
8753SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
8754 SelectionDAG &DAG) const {
8755 // Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
8756 // TODO: There are many other sub-cases we could potentially lower, are
8757 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
8758 SDLoc DL(Op);
8759 MVT VT = Op.getSimpleValueType();
8760 SDValue Accum = Op.getOperand(0);
8761 assert(Accum.getSimpleValueType() == VT &&
8762 VT.getVectorElementType() == MVT::i32);
8763 SDValue A = Op.getOperand(1);
8764 SDValue B = Op.getOperand(2);
8765 MVT ArgVT = A.getSimpleValueType();
8766 assert(ArgVT == B.getSimpleValueType() &&
8767 ArgVT.getVectorElementType() == MVT::i8);
8768 (void)ArgVT;
8769
8770 // The zvqdotq pseudos are defined with sources and destination both
8771 // being i32. This cast is needed for correctness to avoid incorrect
8772 // .vx matching of i8 splats.
8773 A = DAG.getBitcast(VT, A);
8774 B = DAG.getBitcast(VT, B);
8775
8776 MVT ContainerVT = VT;
8777 if (VT.isFixedLengthVector()) {
8778 ContainerVT = getContainerForFixedLengthVector(VT);
8779 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
8780 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
8781 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
8782 }
8783
8784 unsigned Opc;
8785 switch (Op.getOpcode()) {
8786 case ISD::PARTIAL_REDUCE_SMLA:
8787 Opc = RISCVISD::VQDOT_VL;
8788 break;
8789 case ISD::PARTIAL_REDUCE_UMLA:
8790 Opc = RISCVISD::VQDOTU_VL;
8791 break;
8792 case ISD::PARTIAL_REDUCE_SUMLA:
8793 Opc = RISCVISD::VQDOTSU_VL;
8794 break;
8795 default:
8796 llvm_unreachable("Unexpected opcode");
8797 }
8798 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8799 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
8800 if (VT.isFixedLengthVector())
8801 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
8802 return Res;
8803}
8804
8806 SelectionDAG &DAG, unsigned Flags) {
8807 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
8808}
8809
8811 SelectionDAG &DAG, unsigned Flags) {
8812 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
8813 Flags);
8814}
8815
8817 SelectionDAG &DAG, unsigned Flags) {
8818 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
8819 N->getOffset(), Flags);
8820}
8821
8823 SelectionDAG &DAG, unsigned Flags) {
8824 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
8825}
8826
8828 EVT Ty, SelectionDAG &DAG) {
8830 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8831 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8832 return DAG.getLoad(
8833 Ty, DL, DAG.getEntryNode(), LC,
8835}
8836
8838 EVT Ty, SelectionDAG &DAG) {
8840 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
8841 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8842 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8843 return DAG.getLoad(
8844 Ty, DL, DAG.getEntryNode(), LC,
8846}
8847
8848template <class NodeTy>
8849SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
8850 bool IsLocal, bool IsExternWeak) const {
8851 SDLoc DL(N);
8852 EVT Ty = getPointerTy(DAG.getDataLayout());
8853
8854 // When HWASAN is used and tagging of global variables is enabled
8855 // they should be accessed via the GOT, since the tagged address of a global
8856 // is incompatible with existing code models. This also applies to non-pic
8857 // mode.
8858 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
8859 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8860 if (IsLocal && !Subtarget.allowTaggedGlobals())
8861 // Use PC-relative addressing to access the symbol. This generates the
8862 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
8863 // %pcrel_lo(auipc)).
8864 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8865
8866 // Use PC-relative addressing to access the GOT for this symbol, then load
8867 // the address from the GOT. This generates the pattern (PseudoLGA sym),
8868 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8869 SDValue Load =
8870 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8871 MachineFunction &MF = DAG.getMachineFunction();
8872 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8876 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8877 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8878 return Load;
8879 }
8880
8881 switch (getTargetMachine().getCodeModel()) {
8882 default:
8883 reportFatalUsageError("Unsupported code model for lowering");
8884 case CodeModel::Small: {
8885 // Generate a sequence for accessing addresses within the first 2 GiB of
8886 // address space.
8887 if (Subtarget.hasVendorXqcili()) {
8888 // Use QC.E.LI to generate the address, as this is easier to relax than
8889 // LUI/ADDI.
8890 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8891 return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
8892 }
8893
8894 // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
8895 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
8896 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
8897 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8898 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
8899 }
8900 case CodeModel::Medium: {
8901 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8902 if (IsExternWeak) {
8903 // An extern weak symbol may be undefined, i.e. have value 0, which may
8904 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
8905 // symbol. This generates the pattern (PseudoLGA sym), which expands to
8906 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8907 SDValue Load =
8908 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8909 MachineFunction &MF = DAG.getMachineFunction();
8910 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8914 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8915 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8916 return Load;
8917 }
8918
8919 // Generate a sequence for accessing addresses within any 2GiB range within
8920 // the address space. This generates the pattern (PseudoLLA sym), which
8921 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8922 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8923 }
8924 case CodeModel::Large: {
8925 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8926 return getLargeGlobalAddress(G, DL, Ty, DAG);
8927
8928 // Using pc-relative mode for other node type.
8929 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8930 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8931 }
8932 }
8933}
8934
8935SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8936 SelectionDAG &DAG) const {
8937 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8938 assert(N->getOffset() == 0 && "unexpected offset in global node");
8939 const GlobalValue *GV = N->getGlobal();
8940 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8941}
8942
8943SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8944 SelectionDAG &DAG) const {
8945 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8946
8947 return getAddr(N, DAG);
8948}
8949
8950SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8951 SelectionDAG &DAG) const {
8952 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8953
8954 return getAddr(N, DAG);
8955}
8956
8957SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8958 SelectionDAG &DAG) const {
8959 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8960
8961 return getAddr(N, DAG);
8962}
8963
8964SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8965 SelectionDAG &DAG,
8966 bool UseGOT) const {
8967 SDLoc DL(N);
8968 EVT Ty = getPointerTy(DAG.getDataLayout());
8969 const GlobalValue *GV = N->getGlobal();
8970 MVT XLenVT = Subtarget.getXLenVT();
8971
8972 if (UseGOT) {
8973 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8974 // load the address from the GOT and add the thread pointer. This generates
8975 // the pattern (PseudoLA_TLS_IE sym), which expands to
8976 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8977 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8978 SDValue Load =
8979 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8980 MachineFunction &MF = DAG.getMachineFunction();
8981 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8985 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8986 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8987
8988 // Add the thread pointer.
8989 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8990 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8991 }
8992
8993 // Generate a sequence for accessing the address relative to the thread
8994 // pointer, with the appropriate adjustment for the thread pointer offset.
8995 // This generates the pattern
8996 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8997 SDValue AddrHi =
8999 SDValue AddrAdd =
9001 SDValue AddrLo =
9003
9004 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9005 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9006 SDValue MNAdd =
9007 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
9008 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
9009}
9010
9011SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
9012 SelectionDAG &DAG) const {
9013 SDLoc DL(N);
9014 EVT Ty = getPointerTy(DAG.getDataLayout());
9015 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
9016 const GlobalValue *GV = N->getGlobal();
9017
9018 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9019 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
9020 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
9021 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9022 SDValue Load =
9023 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
9024
9025 // Prepare argument list to generate call.
9027 Args.emplace_back(Load, CallTy);
9028
9029 // Setup call to __tls_get_addr.
9030 TargetLowering::CallLoweringInfo CLI(DAG);
9031 CLI.setDebugLoc(DL)
9032 .setChain(DAG.getEntryNode())
9033 .setLibCallee(CallingConv::C, CallTy,
9034 DAG.getExternalSymbol("__tls_get_addr", Ty),
9035 std::move(Args));
9036
9037 return LowerCallTo(CLI).first;
9038}
9039
9040SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
9041 SelectionDAG &DAG) const {
9042 SDLoc DL(N);
9043 EVT Ty = getPointerTy(DAG.getDataLayout());
9044 const GlobalValue *GV = N->getGlobal();
9045
9046 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9047 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
9048 //
9049 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
9050 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
9051 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
9052 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
9053 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9054 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
9055}
9056
9057SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
9058 SelectionDAG &DAG) const {
9059 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9060 assert(N->getOffset() == 0 && "unexpected offset in global node");
9061
9062 if (DAG.getTarget().useEmulatedTLS())
9063 return LowerToTLSEmulatedModel(N, DAG);
9064
9066
9069 reportFatalUsageError("In GHC calling convention TLS is not supported");
9070
9071 SDValue Addr;
9072 switch (Model) {
9074 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9075 break;
9077 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9078 break;
9081 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9082 : getDynamicTLSAddr(N, DAG);
9083 break;
9084 }
9085
9086 return Addr;
9087}
9088
9089// Return true if Val is equal to (setcc LHS, RHS, CC).
9090// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9091// Otherwise, return std::nullopt.
9092static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9093 ISD::CondCode CC, SDValue Val) {
9094 assert(Val->getOpcode() == ISD::SETCC);
9095 SDValue LHS2 = Val.getOperand(0);
9096 SDValue RHS2 = Val.getOperand(1);
9097 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9098
9099 if (LHS == LHS2 && RHS == RHS2) {
9100 if (CC == CC2)
9101 return true;
9102 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9103 return false;
9104 } else if (LHS == RHS2 && RHS == LHS2) {
9106 if (CC == CC2)
9107 return true;
9108 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9109 return false;
9110 }
9111
9112 return std::nullopt;
9113}
9114
9116 return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
9117}
9118
9120 const RISCVSubtarget &Subtarget) {
9121 SDValue CondV = N->getOperand(0);
9122 SDValue TrueV = N->getOperand(1);
9123 SDValue FalseV = N->getOperand(2);
9124 MVT VT = N->getSimpleValueType(0);
9125 SDLoc DL(N);
9126
9127 if (!Subtarget.hasConditionalMoveFusion()) {
9128 // (select c, -1, y) -> -c | y
9129 if (isAllOnesConstant(TrueV)) {
9130 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9131 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9132 }
9133 // (select c, y, -1) -> (c-1) | y
9134 if (isAllOnesConstant(FalseV)) {
9135 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9136 DAG.getAllOnesConstant(DL, VT));
9137 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9138 }
9139
9140 const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
9141
9142 // (select c, 0, y) -> (c-1) & y
9143 if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
9144 SDValue Neg =
9145 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
9146 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9147 }
9148 if (isNullConstant(FalseV)) {
9149 // (select c, (1 << ShAmount) + 1, 0) -> (c << ShAmount) + c
9150 if (auto *TrueC = dyn_cast<ConstantSDNode>(TrueV)) {
9151 uint64_t TrueM1 = TrueC->getZExtValue() - 1;
9152 if (isPowerOf2_64(TrueM1)) {
9153 unsigned ShAmount = Log2_64(TrueM1);
9154 if (Subtarget.hasShlAdd(ShAmount))
9155 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, CondV,
9156 DAG.getConstant(ShAmount, DL, VT), CondV);
9157 }
9158 }
9159 // (select c, y, 0) -> -c & y
9160 if (!HasCZero || isSimm12Constant(TrueV)) {
9161 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9162 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9163 }
9164 }
9165 }
9166
9167 // select c, ~x, x --> xor -c, x
9168 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9169 const APInt &TrueVal = TrueV->getAsAPIntVal();
9170 const APInt &FalseVal = FalseV->getAsAPIntVal();
9171 if (~TrueVal == FalseVal) {
9172 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9173 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9174 }
9175 }
9176
9177 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9178 // when both truev and falsev are also setcc.
9179 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9180 FalseV.getOpcode() == ISD::SETCC) {
9181 SDValue LHS = CondV.getOperand(0);
9182 SDValue RHS = CondV.getOperand(1);
9183 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9184
9185 // (select x, x, y) -> x | y
9186 // (select !x, x, y) -> x & y
9187 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9188 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9189 DAG.getFreeze(FalseV));
9190 }
9191 // (select x, y, x) -> x & y
9192 // (select !x, y, x) -> x | y
9193 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9194 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9195 DAG.getFreeze(TrueV), FalseV);
9196 }
9197 }
9198
9199 return SDValue();
9200}
9201
9202// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9203// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9204// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9205// being `0` or `-1`. In such cases we can replace `select` with `and`.
9206// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9207// than `c0`?
9208static SDValue
9210 const RISCVSubtarget &Subtarget) {
9211 if (Subtarget.hasShortForwardBranchOpt())
9212 return SDValue();
9213
9214 unsigned SelOpNo = 0;
9215 SDValue Sel = BO->getOperand(0);
9216 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9217 SelOpNo = 1;
9218 Sel = BO->getOperand(1);
9219 }
9220
9221 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9222 return SDValue();
9223
9224 unsigned ConstSelOpNo = 1;
9225 unsigned OtherSelOpNo = 2;
9226 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9227 ConstSelOpNo = 2;
9228 OtherSelOpNo = 1;
9229 }
9230 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9231 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9232 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9233 return SDValue();
9234
9235 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9236 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9237 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9238 return SDValue();
9239
9240 SDLoc DL(Sel);
9241 EVT VT = BO->getValueType(0);
9242
9243 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9244 if (SelOpNo == 1)
9245 std::swap(NewConstOps[0], NewConstOps[1]);
9246
9247 SDValue NewConstOp =
9248 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9249 if (!NewConstOp)
9250 return SDValue();
9251
9252 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9253 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9254 return SDValue();
9255
9256 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9257 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9258 if (SelOpNo == 1)
9259 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9260 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9261
9262 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9263 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9264 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9265}
9266
9267SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9268 SDValue CondV = Op.getOperand(0);
9269 SDValue TrueV = Op.getOperand(1);
9270 SDValue FalseV = Op.getOperand(2);
9271 SDLoc DL(Op);
9272 MVT VT = Op.getSimpleValueType();
9273 MVT XLenVT = Subtarget.getXLenVT();
9274
9275 // Lower vector SELECTs to VSELECTs by splatting the condition.
9276 if (VT.isVector()) {
9277 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9278 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9279 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9280 }
9281
9282 // Try some other optimizations before falling back to generic lowering.
9283 if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
9284 return V;
9285
9286 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9287 // nodes to implement the SELECT. Performing the lowering here allows for
9288 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9289 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9290 if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
9291
9292 // (select c, t, 0) -> (czero_eqz t, c)
9293 if (isNullConstant(FalseV))
9294 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9295 // (select c, 0, f) -> (czero_nez f, c)
9296 if (isNullConstant(TrueV))
9297 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9298
9299 // Check to see if a given operation is a 'NOT', if so return the negated
9300 // operand
9301 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9302 using namespace llvm::SDPatternMatch;
9303 SDValue Xor;
9304 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9305 return Xor;
9306 }
9307 return std::nullopt;
9308 };
9309 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9310 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9311 if (TrueV.getOpcode() == ISD::AND &&
9312 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9313 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9314 ? getNotOperand(TrueV.getOperand(1))
9315 : getNotOperand(TrueV.getOperand(0));
9316 if (NotOperand) {
9317 SDValue CMOV =
9318 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9319 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9320 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
9321 }
9322 return DAG.getNode(
9323 ISD::OR, DL, VT, TrueV,
9324 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
9325 }
9326
9327 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9328 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
9329 if (FalseV.getOpcode() == ISD::AND &&
9330 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
9331 auto NotOperand = (FalseV.getOperand(0) == TrueV)
9332 ? getNotOperand(FalseV.getOperand(1))
9333 : getNotOperand(FalseV.getOperand(0));
9334 if (NotOperand) {
9335 SDValue CMOV =
9336 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
9337 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9338 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
9339 }
9340 return DAG.getNode(
9341 ISD::OR, DL, VT, FalseV,
9342 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
9343 }
9344
9345 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9346 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9347 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9348 const APInt &TrueVal = TrueV->getAsAPIntVal();
9349 const APInt &FalseVal = FalseV->getAsAPIntVal();
9350
9351 // Prefer these over Zicond to avoid materializing an immediate:
9352 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9353 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9354 if (CondV.getOpcode() == ISD::SETCC &&
9355 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
9356 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9357 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
9358 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
9359 int64_t TrueImm = TrueVal.getSExtValue();
9360 int64_t FalseImm = FalseVal.getSExtValue();
9361 if (CCVal == ISD::SETGT)
9362 std::swap(TrueImm, FalseImm);
9363 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
9364 isInt<12>(TrueImm - FalseImm)) {
9365 SDValue SRA =
9366 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
9367 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
9368 SDValue AND =
9369 DAG.getNode(ISD::AND, DL, VT, SRA,
9370 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
9371 return DAG.getNode(ISD::ADD, DL, VT, AND,
9372 DAG.getSignedConstant(FalseImm, DL, VT));
9373 }
9374 }
9375 }
9376
9377 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
9378 // a constant in register
9379 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
9380 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
9381 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9382 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
9383 }
9384 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9385 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9386 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9387 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9388 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9389 }
9390
9391 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
9392 const int DeltaCost = RISCVMatInt::getIntMatCost(
9393 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9394 // Does the addend fold into an ADDI
9395 if (Addend.isSignedIntN(12))
9396 return DeltaCost;
9397 const int AddendCost = RISCVMatInt::getIntMatCost(
9398 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9399 return AddendCost + DeltaCost;
9400 };
9401 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
9402 getCost(TrueVal - FalseVal, FalseVal);
9403 SDValue LHSVal = DAG.getConstant(
9404 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9405 SDValue CMOV =
9406 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9407 DL, VT, LHSVal, CondV);
9408 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
9409 }
9410
9411 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9412 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9413 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
9414 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
9415 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9416 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9417 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
9418 // Fall back to XORI if Const == -0x800
9419 if (RawConstVal == -0x800) {
9420 SDValue XorOp = DAG.getNode(ISD::XOR, DL, VT, RegV, ConstVal);
9421 SDValue CMOV =
9422 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9423 DL, VT, XorOp, CondV);
9424 return DAG.getNode(ISD::XOR, DL, VT, CMOV, ConstVal);
9425 }
9426 // Efficient only if the constant and its negation fit into `ADDI`
9427 // Prefer Add/Sub over Xor since can be compressed for small immediates
9428 if (isInt<12>(RawConstVal)) {
9429 SDValue SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
9430 SDValue CMOV =
9431 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9432 DL, VT, SubOp, CondV);
9433 return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
9434 }
9435 }
9436
9437 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9438 // Unless we have the short forward branch optimization.
9439 if (!Subtarget.hasConditionalMoveFusion())
9440 return DAG.getNode(
9441 ISD::OR, DL, VT,
9442 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
9443 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
9445 }
9446
9447 if (Op.hasOneUse()) {
9448 unsigned UseOpc = Op->user_begin()->getOpcode();
9449 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
9450 SDNode *BinOp = *Op->user_begin();
9451 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
9452 DAG, Subtarget)) {
9453 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
9454 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9455 // may return a constant node and cause crash in lowerSELECT.
9456 if (NewSel.getOpcode() == ISD::SELECT)
9457 return lowerSELECT(NewSel, DAG);
9458 return NewSel;
9459 }
9460 }
9461 }
9462
9463 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9464 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9465 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
9466 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
9467 if (FPTV && FPFV) {
9468 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
9469 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
9470 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
9471 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
9472 DAG.getConstant(1, DL, XLenVT));
9473 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
9474 }
9475 }
9476
9477 // If the condition is not an integer SETCC which operates on XLenVT, we need
9478 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9479 // (select condv, truev, falsev)
9480 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9481 if (CondV.getOpcode() != ISD::SETCC ||
9482 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
9483 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9484 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
9485
9486 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9487
9488 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9489 }
9490
9491 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9492 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9493 // advantage of the integer compare+branch instructions. i.e.:
9494 // (select (setcc lhs, rhs, cc), truev, falsev)
9495 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9496 SDValue LHS = CondV.getOperand(0);
9497 SDValue RHS = CondV.getOperand(1);
9498 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9499
9500 // Special case for a select of 2 constants that have a difference of 1.
9501 // Normally this is done by DAGCombine, but if the select is introduced by
9502 // type legalization or op legalization, we miss it. Restricting to SETLT
9503 // case for now because that is what signed saturating add/sub need.
9504 // FIXME: We don't need the condition to be SETLT or even a SETCC,
9505 // but we would probably want to swap the true/false values if the condition
9506 // is SETGE/SETLE to avoid an XORI.
9507 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
9508 CCVal == ISD::SETLT) {
9509 const APInt &TrueVal = TrueV->getAsAPIntVal();
9510 const APInt &FalseVal = FalseV->getAsAPIntVal();
9511 if (TrueVal - 1 == FalseVal)
9512 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
9513 if (TrueVal + 1 == FalseVal)
9514 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
9515 }
9516
9517 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9518 // 1 < x ? x : 1 -> 0 < x ? x : 1
9519 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
9520 RHS == TrueV && LHS == FalseV) {
9521 LHS = DAG.getConstant(0, DL, VT);
9522 // 0 <u x is the same as x != 0.
9523 if (CCVal == ISD::SETULT) {
9524 std::swap(LHS, RHS);
9525 CCVal = ISD::SETNE;
9526 }
9527 }
9528
9529 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
9530 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
9531 RHS == FalseV) {
9532 RHS = DAG.getConstant(0, DL, VT);
9533 }
9534
9535 SDValue TargetCC = DAG.getCondCode(CCVal);
9536
9537 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
9538 // (select (setcc lhs, rhs, CC), constant, falsev)
9539 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
9540 std::swap(TrueV, FalseV);
9541 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
9542 }
9543
9544 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
9545 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9546}
9547
9548SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
9549 SDValue CondV = Op.getOperand(1);
9550 SDLoc DL(Op);
9551 MVT XLenVT = Subtarget.getXLenVT();
9552
9553 if (CondV.getOpcode() == ISD::SETCC &&
9554 CondV.getOperand(0).getValueType() == XLenVT) {
9555 SDValue LHS = CondV.getOperand(0);
9556 SDValue RHS = CondV.getOperand(1);
9557 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9558
9559 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9560
9561 SDValue TargetCC = DAG.getCondCode(CCVal);
9562 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9563 LHS, RHS, TargetCC, Op.getOperand(2));
9564 }
9565
9566 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9567 CondV, DAG.getConstant(0, DL, XLenVT),
9568 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
9569}
9570
9571SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
9572 MachineFunction &MF = DAG.getMachineFunction();
9573 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
9574
9575 SDLoc DL(Op);
9576 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
9578
9579 // vastart just stores the address of the VarArgsFrameIndex slot into the
9580 // memory location argument.
9581 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9582 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
9583 MachinePointerInfo(SV));
9584}
9585
9586SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
9587 SelectionDAG &DAG) const {
9588 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9589 MachineFunction &MF = DAG.getMachineFunction();
9590 MachineFrameInfo &MFI = MF.getFrameInfo();
9591 MFI.setFrameAddressIsTaken(true);
9592 Register FrameReg = RI.getFrameRegister(MF);
9593 int XLenInBytes = Subtarget.getXLen() / 8;
9594
9595 EVT VT = Op.getValueType();
9596 SDLoc DL(Op);
9597 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
9598 unsigned Depth = Op.getConstantOperandVal(0);
9599 while (Depth--) {
9600 int Offset = -(XLenInBytes * 2);
9601 SDValue Ptr = DAG.getNode(
9602 ISD::ADD, DL, VT, FrameAddr,
9604 FrameAddr =
9605 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
9606 }
9607 return FrameAddr;
9608}
9609
9610SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
9611 SelectionDAG &DAG) const {
9612 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9613 MachineFunction &MF = DAG.getMachineFunction();
9614 MachineFrameInfo &MFI = MF.getFrameInfo();
9615 MFI.setReturnAddressIsTaken(true);
9616 MVT XLenVT = Subtarget.getXLenVT();
9617 int XLenInBytes = Subtarget.getXLen() / 8;
9618
9619 EVT VT = Op.getValueType();
9620 SDLoc DL(Op);
9621 unsigned Depth = Op.getConstantOperandVal(0);
9622 if (Depth) {
9623 int Off = -XLenInBytes;
9624 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
9625 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
9626 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
9627 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
9628 MachinePointerInfo());
9629 }
9630
9631 // Return the value of the return address register, marking it an implicit
9632 // live-in.
9633 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
9634 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
9635}
9636
9637SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
9638 SelectionDAG &DAG) const {
9639 SDLoc DL(Op);
9640 SDValue Lo = Op.getOperand(0);
9641 SDValue Hi = Op.getOperand(1);
9642 SDValue Shamt = Op.getOperand(2);
9643 EVT VT = Lo.getValueType();
9644
9645 // if Shamt-XLEN < 0: // Shamt < XLEN
9646 // Lo = Lo << Shamt
9647 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
9648 // else:
9649 // Lo = 0
9650 // Hi = Lo << (Shamt-XLEN)
9651
9652 SDValue Zero = DAG.getConstant(0, DL, VT);
9653 SDValue One = DAG.getConstant(1, DL, VT);
9654 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9655 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9656 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9657 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9658
9659 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
9660 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
9661 SDValue ShiftRightLo =
9662 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
9663 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
9664 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
9665 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
9666
9667 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9668
9669 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
9670 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9671
9672 SDValue Parts[2] = {Lo, Hi};
9673 return DAG.getMergeValues(Parts, DL);
9674}
9675
9676SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
9677 bool IsSRA) const {
9678 SDLoc DL(Op);
9679 SDValue Lo = Op.getOperand(0);
9680 SDValue Hi = Op.getOperand(1);
9681 SDValue Shamt = Op.getOperand(2);
9682 EVT VT = Lo.getValueType();
9683
9684 // SRA expansion:
9685 // if Shamt-XLEN < 0: // Shamt < XLEN
9686 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9687 // Hi = Hi >>s Shamt
9688 // else:
9689 // Lo = Hi >>s (Shamt-XLEN);
9690 // Hi = Hi >>s (XLEN-1)
9691 //
9692 // SRL expansion:
9693 // if Shamt-XLEN < 0: // Shamt < XLEN
9694 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9695 // Hi = Hi >>u Shamt
9696 // else:
9697 // Lo = Hi >>u (Shamt-XLEN);
9698 // Hi = 0;
9699
9700 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
9701
9702 SDValue Zero = DAG.getConstant(0, DL, VT);
9703 SDValue One = DAG.getConstant(1, DL, VT);
9704 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9705 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9706 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9707 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9708
9709 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
9710 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
9711 SDValue ShiftLeftHi =
9712 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
9713 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
9714 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
9715 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
9716 SDValue HiFalse =
9717 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
9718
9719 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9720
9721 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
9722 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9723
9724 SDValue Parts[2] = {Lo, Hi};
9725 return DAG.getMergeValues(Parts, DL);
9726}
9727
9728// Lower splats of i1 types to SETCC. For each mask vector type, we have a
9729// legal equivalently-sized i8 type, so we can use that as a go-between.
9730SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
9731 SelectionDAG &DAG) const {
9732 SDLoc DL(Op);
9733 MVT VT = Op.getSimpleValueType();
9734 SDValue SplatVal = Op.getOperand(0);
9735 // All-zeros or all-ones splats are handled specially.
9736 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
9737 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9738 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
9739 }
9740 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
9741 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9742 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
9743 }
9744 MVT InterVT = VT.changeVectorElementType(MVT::i8);
9745 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
9746 DAG.getConstant(1, DL, SplatVal.getValueType()));
9747 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
9748 SDValue Zero = DAG.getConstant(0, DL, InterVT);
9749 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
9750}
9751
9752// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
9753// illegal (currently only vXi64 RV32).
9754// FIXME: We could also catch non-constant sign-extended i32 values and lower
9755// them to VMV_V_X_VL.
9756SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
9757 SelectionDAG &DAG) const {
9758 SDLoc DL(Op);
9759 MVT VecVT = Op.getSimpleValueType();
9760 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
9761 "Unexpected SPLAT_VECTOR_PARTS lowering");
9762
9763 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
9764 SDValue Lo = Op.getOperand(0);
9765 SDValue Hi = Op.getOperand(1);
9766
9767 MVT ContainerVT = VecVT;
9768 if (VecVT.isFixedLengthVector())
9769 ContainerVT = getContainerForFixedLengthVector(VecVT);
9770
9771 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9772
9773 SDValue Res =
9774 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
9775
9776 if (VecVT.isFixedLengthVector())
9777 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
9778
9779 return Res;
9780}
9781
9782// Custom-lower extensions from mask vectors by using a vselect either with 1
9783// for zero/any-extension or -1 for sign-extension:
9784// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
9785// Note that any-extension is lowered identically to zero-extension.
9786SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
9787 int64_t ExtTrueVal) const {
9788 SDLoc DL(Op);
9789 MVT VecVT = Op.getSimpleValueType();
9790 SDValue Src = Op.getOperand(0);
9791 // Only custom-lower extensions from mask types
9792 assert(Src.getValueType().isVector() &&
9793 Src.getValueType().getVectorElementType() == MVT::i1);
9794
9795 if (VecVT.isScalableVector()) {
9796 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
9797 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
9798 if (Src.getOpcode() == ISD::XOR &&
9799 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
9800 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
9801 SplatTrueVal);
9802 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
9803 }
9804
9805 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
9806 MVT I1ContainerVT =
9807 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
9808
9809 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
9810
9811 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9812
9813 MVT XLenVT = Subtarget.getXLenVT();
9814 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9815 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
9816
9817 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9818 SDValue Xor = Src.getOperand(0);
9819 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
9820 SDValue ScalableOnes = Xor.getOperand(1);
9821 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
9822 ScalableOnes.getOperand(0).isUndef() &&
9824 ScalableOnes.getOperand(1).getNode())) {
9825 CC = Xor.getOperand(0);
9826 std::swap(SplatZero, SplatTrueVal);
9827 }
9828 }
9829 }
9830
9831 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9832 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9833 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9834 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
9835 SDValue Select =
9836 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
9837 SplatZero, DAG.getUNDEF(ContainerVT), VL);
9838
9839 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
9840}
9841
9842// Custom-lower truncations from vectors to mask vectors by using a mask and a
9843// setcc operation:
9844// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
9845SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
9846 SelectionDAG &DAG) const {
9847 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
9848 SDLoc DL(Op);
9849 EVT MaskVT = Op.getValueType();
9850 // Only expect to custom-lower truncations to mask types
9851 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
9852 "Unexpected type for vector mask lowering");
9853 SDValue Src = Op.getOperand(0);
9854 MVT VecVT = Src.getSimpleValueType();
9855 SDValue Mask, VL;
9856 if (IsVPTrunc) {
9857 Mask = Op.getOperand(1);
9858 VL = Op.getOperand(2);
9859 }
9860 // If this is a fixed vector, we need to convert it to a scalable vector.
9861 MVT ContainerVT = VecVT;
9862
9863 if (VecVT.isFixedLengthVector()) {
9864 ContainerVT = getContainerForFixedLengthVector(VecVT);
9865 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9866 if (IsVPTrunc) {
9867 MVT MaskContainerVT =
9868 getContainerForFixedLengthVector(Mask.getSimpleValueType());
9869 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
9870 }
9871 }
9872
9873 if (!IsVPTrunc) {
9874 std::tie(Mask, VL) =
9875 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9876 }
9877
9878 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
9879 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
9880
9881 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9882 DAG.getUNDEF(ContainerVT), SplatOne, VL);
9883 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9884 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9885
9886 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
9887 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
9888 DAG.getUNDEF(ContainerVT), Mask, VL);
9889 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
9890 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
9891 DAG.getUNDEF(MaskContainerVT), Mask, VL});
9892 if (MaskVT.isFixedLengthVector())
9893 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
9894 return Trunc;
9895}
9896
9897SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
9898 SelectionDAG &DAG) const {
9899 unsigned Opc = Op.getOpcode();
9900 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
9901 SDLoc DL(Op);
9902
9903 MVT VT = Op.getSimpleValueType();
9904 // Only custom-lower vector truncates
9905 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9906
9907 // Truncates to mask types are handled differently
9908 if (VT.getVectorElementType() == MVT::i1)
9909 return lowerVectorMaskTruncLike(Op, DAG);
9910
9911 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
9912 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
9913 // truncate by one power of two at a time.
9914 MVT DstEltVT = VT.getVectorElementType();
9915
9916 SDValue Src = Op.getOperand(0);
9917 MVT SrcVT = Src.getSimpleValueType();
9918 MVT SrcEltVT = SrcVT.getVectorElementType();
9919
9920 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
9921 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
9922 "Unexpected vector truncate lowering");
9923
9924 MVT ContainerVT = SrcVT;
9925 SDValue Mask, VL;
9926 if (IsVPTrunc) {
9927 Mask = Op.getOperand(1);
9928 VL = Op.getOperand(2);
9929 }
9930 if (SrcVT.isFixedLengthVector()) {
9931 ContainerVT = getContainerForFixedLengthVector(SrcVT);
9932 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9933 if (IsVPTrunc) {
9934 MVT MaskVT = getMaskTypeFor(ContainerVT);
9935 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9936 }
9937 }
9938
9939 SDValue Result = Src;
9940 if (!IsVPTrunc) {
9941 std::tie(Mask, VL) =
9942 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9943 }
9944
9945 unsigned NewOpc;
9947 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
9948 else if (Opc == ISD::TRUNCATE_USAT_U)
9949 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
9950 else
9951 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
9952
9953 do {
9954 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
9955 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
9956 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
9957 } while (SrcEltVT != DstEltVT);
9958
9959 if (SrcVT.isFixedLengthVector())
9960 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9961
9962 return Result;
9963}
9964
9965SDValue
9966RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
9967 SelectionDAG &DAG) const {
9968 SDLoc DL(Op);
9969 SDValue Chain = Op.getOperand(0);
9970 SDValue Src = Op.getOperand(1);
9971 MVT VT = Op.getSimpleValueType();
9972 MVT SrcVT = Src.getSimpleValueType();
9973 MVT ContainerVT = VT;
9974 if (VT.isFixedLengthVector()) {
9975 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9976 ContainerVT =
9977 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9978 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9979 }
9980
9981 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9982
9983 // RVV can only widen/truncate fp to types double/half the size as the source.
9984 if ((VT.getVectorElementType() == MVT::f64 &&
9985 (SrcVT.getVectorElementType() == MVT::f16 ||
9986 SrcVT.getVectorElementType() == MVT::bf16)) ||
9987 ((VT.getVectorElementType() == MVT::f16 ||
9988 VT.getVectorElementType() == MVT::bf16) &&
9989 SrcVT.getVectorElementType() == MVT::f64)) {
9990 // For double rounding, the intermediate rounding should be round-to-odd.
9991 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
9992 ? RISCVISD::STRICT_FP_EXTEND_VL
9993 : RISCVISD::STRICT_VFNCVT_ROD_VL;
9994 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9995 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
9996 Chain, Src, Mask, VL);
9997 Chain = Src.getValue(1);
9998 }
9999
10000 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10001 ? RISCVISD::STRICT_FP_EXTEND_VL
10002 : RISCVISD::STRICT_FP_ROUND_VL;
10003 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
10004 Chain, Src, Mask, VL);
10005 if (VT.isFixedLengthVector()) {
10006 // StrictFP operations have two result values. Their lowered result should
10007 // have same result count.
10008 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10009 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10010 }
10011 return Res;
10012}
10013
10014SDValue
10015RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
10016 SelectionDAG &DAG) const {
10017 bool IsVP =
10018 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
10019 bool IsExtend =
10020 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
10021 // RVV can only do truncate fp to types half the size as the source. We
10022 // custom-lower f64->f16 rounds via RVV's round-to-odd float
10023 // conversion instruction.
10024 SDLoc DL(Op);
10025 MVT VT = Op.getSimpleValueType();
10026
10027 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10028
10029 SDValue Src = Op.getOperand(0);
10030 MVT SrcVT = Src.getSimpleValueType();
10031
10032 bool IsDirectExtend =
10033 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
10034 (SrcVT.getVectorElementType() != MVT::f16 &&
10035 SrcVT.getVectorElementType() != MVT::bf16));
10036 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
10037 VT.getVectorElementType() != MVT::bf16) ||
10038 SrcVT.getVectorElementType() != MVT::f64);
10039
10040 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
10041
10042 // We have regular SD node patterns for direct non-VL extends.
10043 if (VT.isScalableVector() && IsDirectConv && !IsVP)
10044 return Op;
10045
10046 // Prepare any fixed-length vector operands.
10047 MVT ContainerVT = VT;
10048 SDValue Mask, VL;
10049 if (IsVP) {
10050 Mask = Op.getOperand(1);
10051 VL = Op.getOperand(2);
10052 }
10053 if (VT.isFixedLengthVector()) {
10054 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10055 ContainerVT =
10056 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10057 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10058 if (IsVP) {
10059 MVT MaskVT = getMaskTypeFor(ContainerVT);
10060 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10061 }
10062 }
10063
10064 if (!IsVP)
10065 std::tie(Mask, VL) =
10066 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10067
10068 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
10069
10070 if (IsDirectConv) {
10071 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
10072 if (VT.isFixedLengthVector())
10073 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
10074 return Src;
10075 }
10076
10077 unsigned InterConvOpc =
10078 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
10079
10080 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10081 SDValue IntermediateConv =
10082 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
10083 SDValue Result =
10084 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10085 if (VT.isFixedLengthVector())
10086 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10087 return Result;
10088}
10089
10090// Given a scalable vector type and an index into it, returns the type for the
10091// smallest subvector that the index fits in. This can be used to reduce LMUL
10092// for operations like vslidedown.
10093//
10094// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10095static std::optional<MVT>
10096getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10097 const RISCVSubtarget &Subtarget) {
10098 assert(VecVT.isScalableVector());
10099 const unsigned EltSize = VecVT.getScalarSizeInBits();
10100 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10101 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10102 MVT SmallerVT;
10103 if (MaxIdx < MinVLMAX)
10104 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10105 else if (MaxIdx < MinVLMAX * 2)
10106 SmallerVT =
10108 else if (MaxIdx < MinVLMAX * 4)
10109 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10112 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10113 return std::nullopt;
10114 return SmallerVT;
10115}
10116
10118 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10119 if (!IdxC || isNullConstant(Idx))
10120 return false;
10121 return isUInt<5>(IdxC->getZExtValue());
10122}
10123
10124// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10125// first position of a vector, and that vector is slid up to the insert index.
10126// By limiting the active vector length to index+1 and merging with the
10127// original vector (with an undisturbed tail policy for elements >= VL), we
10128// achieve the desired result of leaving all elements untouched except the one
10129// at VL-1, which is replaced with the desired value.
10130SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10131 SelectionDAG &DAG) const {
10132 SDLoc DL(Op);
10133 MVT VecVT = Op.getSimpleValueType();
10134 MVT XLenVT = Subtarget.getXLenVT();
10135 SDValue Vec = Op.getOperand(0);
10136 SDValue Val = Op.getOperand(1);
10137 MVT ValVT = Val.getSimpleValueType();
10138 SDValue Idx = Op.getOperand(2);
10139
10140 if (VecVT.getVectorElementType() == MVT::i1) {
10141 // FIXME: For now we just promote to an i8 vector and insert into that,
10142 // but this is probably not optimal.
10143 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10144 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10145 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10146 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10147 }
10148
10149 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10150 ValVT == MVT::bf16) {
10151 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10152 MVT IntVT = VecVT.changeTypeToInteger();
10153 SDValue IntInsert = DAG.getNode(
10154 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10155 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10156 return DAG.getBitcast(VecVT, IntInsert);
10157 }
10158
10159 MVT ContainerVT = VecVT;
10160 // If the operand is a fixed-length vector, convert to a scalable one.
10161 if (VecVT.isFixedLengthVector()) {
10162 ContainerVT = getContainerForFixedLengthVector(VecVT);
10163 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10164 }
10165
10166 // If we know the index we're going to insert at, we can shrink Vec so that
10167 // we're performing the scalar inserts and slideup on a smaller LMUL.
10168 SDValue OrigVec = Vec;
10169 std::optional<unsigned> AlignedIdx;
10170 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10171 const unsigned OrigIdx = IdxC->getZExtValue();
10172 // Do we know an upper bound on LMUL?
10173 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10174 DL, DAG, Subtarget)) {
10175 ContainerVT = *ShrunkVT;
10176 AlignedIdx = 0;
10177 }
10178
10179 // If we're compiling for an exact VLEN value, we can always perform
10180 // the insert in m1 as we can determine the register corresponding to
10181 // the index in the register group.
10182 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10183 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10184 EVT ElemVT = VecVT.getVectorElementType();
10185 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10186 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10187 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10188 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10189 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10190 ContainerVT = M1VT;
10191 }
10192
10193 if (AlignedIdx)
10194 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
10195 }
10196
10197 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
10198 // Even i64-element vectors on RV32 can be lowered without scalar
10199 // legalization if the most-significant 32 bits of the value are not affected
10200 // by the sign-extension of the lower 32 bits.
10201 // TODO: We could also catch sign extensions of a 32-bit value.
10202 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
10203 const auto *CVal = cast<ConstantSDNode>(Val);
10204 if (isInt<32>(CVal->getSExtValue())) {
10205 IsLegalInsert = true;
10206 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
10207 }
10208 }
10209
10210 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10211
10212 SDValue ValInVec;
10213
10214 if (IsLegalInsert) {
10215 unsigned Opc =
10216 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
10217 if (isNullConstant(Idx)) {
10218 if (!VecVT.isFloatingPoint())
10219 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10220 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
10221
10222 if (AlignedIdx)
10223 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10224 if (!VecVT.isFixedLengthVector())
10225 return Vec;
10226 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10227 }
10228
10229 // Use ri.vinsert.v.x if available.
10230 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
10232 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
10233 SDValue PolicyOp =
10235 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
10236 VL, PolicyOp);
10237 if (AlignedIdx)
10238 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10239 if (!VecVT.isFixedLengthVector())
10240 return Vec;
10241 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10242 }
10243
10244 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
10245 } else {
10246 // On RV32, i64-element vectors must be specially handled to place the
10247 // value at element 0, by using two vslide1down instructions in sequence on
10248 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10249 // this.
10250 SDValue ValLo, ValHi;
10251 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
10252 MVT I32ContainerVT =
10253 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
10254 SDValue I32Mask =
10255 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
10256 // Limit the active VL to two.
10257 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
10258 // If the Idx is 0 we can insert directly into the vector.
10259 if (isNullConstant(Idx)) {
10260 // First slide in the lo value, then the hi in above it. We use slide1down
10261 // to avoid the register group overlap constraint of vslide1up.
10262 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10263 Vec, Vec, ValLo, I32Mask, InsertI64VL);
10264 // If the source vector is undef don't pass along the tail elements from
10265 // the previous slide1down.
10266 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10267 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10268 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
10269 // Bitcast back to the right container type.
10270 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10271
10272 if (AlignedIdx)
10273 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
10274 if (!VecVT.isFixedLengthVector())
10275 return ValInVec;
10276 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
10277 }
10278
10279 // First slide in the lo value, then the hi in above it. We use slide1down
10280 // to avoid the register group overlap constraint of vslide1up.
10281 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10282 DAG.getUNDEF(I32ContainerVT),
10283 DAG.getUNDEF(I32ContainerVT), ValLo,
10284 I32Mask, InsertI64VL);
10285 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10286 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
10287 I32Mask, InsertI64VL);
10288 // Bitcast back to the right container type.
10289 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10290 }
10291
10292 // Now that the value is in a vector, slide it into position.
10293 SDValue InsertVL =
10294 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
10295
10296 // Use tail agnostic policy if Idx is the last index of Vec.
10298 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
10299 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
10301 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
10302 Idx, Mask, InsertVL, Policy);
10303
10304 if (AlignedIdx)
10305 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
10306 if (!VecVT.isFixedLengthVector())
10307 return Slideup;
10308 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
10309}
10310
10311// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10312// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10313// types this is done using VMV_X_S to allow us to glean information about the
10314// sign bits of the result.
10315SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10316 SelectionDAG &DAG) const {
10317 SDLoc DL(Op);
10318 SDValue Idx = Op.getOperand(1);
10319 SDValue Vec = Op.getOperand(0);
10320 EVT EltVT = Op.getValueType();
10321 MVT VecVT = Vec.getSimpleValueType();
10322 MVT XLenVT = Subtarget.getXLenVT();
10323
10324 if (VecVT.getVectorElementType() == MVT::i1) {
10325 // Use vfirst.m to extract the first bit.
10326 if (isNullConstant(Idx)) {
10327 MVT ContainerVT = VecVT;
10328 if (VecVT.isFixedLengthVector()) {
10329 ContainerVT = getContainerForFixedLengthVector(VecVT);
10330 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10331 }
10332 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10333 SDValue Vfirst =
10334 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
10335 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
10336 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10337 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10338 }
10339 if (VecVT.isFixedLengthVector()) {
10340 unsigned NumElts = VecVT.getVectorNumElements();
10341 if (NumElts >= 8) {
10342 MVT WideEltVT;
10343 unsigned WidenVecLen;
10344 SDValue ExtractElementIdx;
10345 SDValue ExtractBitIdx;
10346 unsigned MaxEEW = Subtarget.getELen();
10347 MVT LargestEltVT = MVT::getIntegerVT(
10348 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
10349 if (NumElts <= LargestEltVT.getSizeInBits()) {
10350 assert(isPowerOf2_32(NumElts) &&
10351 "the number of elements should be power of 2");
10352 WideEltVT = MVT::getIntegerVT(NumElts);
10353 WidenVecLen = 1;
10354 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
10355 ExtractBitIdx = Idx;
10356 } else {
10357 WideEltVT = LargestEltVT;
10358 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10359 // extract element index = index / element width
10360 ExtractElementIdx = DAG.getNode(
10361 ISD::SRL, DL, XLenVT, Idx,
10362 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
10363 // mask bit index = index % element width
10364 ExtractBitIdx = DAG.getNode(
10365 ISD::AND, DL, XLenVT, Idx,
10366 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
10367 }
10368 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
10369 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
10370 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
10371 Vec, ExtractElementIdx);
10372 // Extract the bit from GPR.
10373 SDValue ShiftRight =
10374 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
10375 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
10376 DAG.getConstant(1, DL, XLenVT));
10377 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10378 }
10379 }
10380 // Otherwise, promote to an i8 vector and extract from that.
10381 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10382 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10383 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
10384 }
10385
10386 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10387 EltVT == MVT::bf16) {
10388 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10389 MVT IntVT = VecVT.changeTypeToInteger();
10390 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
10391 SDValue IntExtract =
10392 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
10393 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
10394 }
10395
10396 // If this is a fixed vector, we need to convert it to a scalable vector.
10397 MVT ContainerVT = VecVT;
10398 if (VecVT.isFixedLengthVector()) {
10399 ContainerVT = getContainerForFixedLengthVector(VecVT);
10400 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10401 }
10402
10403 // If we're compiling for an exact VLEN value and we have a known
10404 // constant index, we can always perform the extract in m1 (or
10405 // smaller) as we can determine the register corresponding to
10406 // the index in the register group.
10407 const auto VLen = Subtarget.getRealVLen();
10408 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10409 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10410 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10411 unsigned OrigIdx = IdxC->getZExtValue();
10412 EVT ElemVT = VecVT.getVectorElementType();
10413 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10414 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10415 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10416 unsigned ExtractIdx =
10417 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10418 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
10419 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10420 ContainerVT = M1VT;
10421 }
10422
10423 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10424 // contains our index.
10425 std::optional<uint64_t> MaxIdx;
10426 if (VecVT.isFixedLengthVector())
10427 MaxIdx = VecVT.getVectorNumElements() - 1;
10428 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
10429 MaxIdx = IdxC->getZExtValue();
10430 if (MaxIdx) {
10431 if (auto SmallerVT =
10432 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
10433 ContainerVT = *SmallerVT;
10434 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
10435 }
10436 }
10437
10438 // Use ri.vextract.x.v if available.
10439 // TODO: Avoid index 0 and just use the vmv.x.s
10440 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10442 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
10443 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
10444 }
10445
10446 // If after narrowing, the required slide is still greater than LMUL2,
10447 // fallback to generic expansion and go through the stack. This is done
10448 // for a subtle reason: extracting *all* elements out of a vector is
10449 // widely expected to be linear in vector size, but because vslidedown
10450 // is linear in LMUL, performing N extracts using vslidedown becomes
10451 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10452 // seems to have the same problem (the store is linear in LMUL), but the
10453 // generic expansion *memoizes* the store, and thus for many extracts of
10454 // the same vector we end up with one store and a bunch of loads.
10455 // TODO: We don't have the same code for insert_vector_elt because we
10456 // have BUILD_VECTOR and handle the degenerate case there. Should we
10457 // consider adding an inverse BUILD_VECTOR node?
10458 MVT LMUL2VT =
10460 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
10461 return SDValue();
10462
10463 // If the index is 0, the vector is already in the right position.
10464 if (!isNullConstant(Idx)) {
10465 // Use a VL of 1 to avoid processing more elements than we need.
10466 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10467 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10468 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10469 }
10470
10471 if (!EltVT.isInteger()) {
10472 // Floating-point extracts are handled in TableGen.
10473 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
10474 }
10475
10476 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10477 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
10478}
10479
10480// Some RVV intrinsics may claim that they want an integer operand to be
10481// promoted or expanded.
10483 const RISCVSubtarget &Subtarget) {
10484 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
10485 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
10486 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10487 "Unexpected opcode");
10488
10489 if (!Subtarget.hasVInstructions())
10490 return SDValue();
10491
10492 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10493 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10494 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10495
10496 SDLoc DL(Op);
10497
10499 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10500 if (!II || !II->hasScalarOperand())
10501 return SDValue();
10502
10503 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
10504 assert(SplatOp < Op.getNumOperands());
10505
10507 SDValue &ScalarOp = Operands[SplatOp];
10508 MVT OpVT = ScalarOp.getSimpleValueType();
10509 MVT XLenVT = Subtarget.getXLenVT();
10510
10511 // If this isn't a scalar, or its type is XLenVT we're done.
10512 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10513 return SDValue();
10514
10515 // Simplest case is that the operand needs to be promoted to XLenVT.
10516 if (OpVT.bitsLT(XLenVT)) {
10517 // If the operand is a constant, sign extend to increase our chances
10518 // of being able to use a .vi instruction. ANY_EXTEND would become a
10519 // a zero extend and the simm5 check in isel would fail.
10520 // FIXME: Should we ignore the upper bits in isel instead?
10521 unsigned ExtOpc =
10523 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10524 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10525 }
10526
10527 // Use the previous operand to get the vXi64 VT. The result might be a mask
10528 // VT for compares. Using the previous operand assumes that the previous
10529 // operand will never have a smaller element size than a scalar operand and
10530 // that a widening operation never uses SEW=64.
10531 // NOTE: If this fails the below assert, we can probably just find the
10532 // element count from any operand or result and use it to construct the VT.
10533 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
10534 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
10535
10536 // The more complex case is when the scalar is larger than XLenVT.
10537 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
10538 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
10539
10540 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
10541 // instruction to sign-extend since SEW>XLEN.
10542 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
10543 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
10544 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10545 }
10546
10547 switch (IntNo) {
10548 case Intrinsic::riscv_vslide1up:
10549 case Intrinsic::riscv_vslide1down:
10550 case Intrinsic::riscv_vslide1up_mask:
10551 case Intrinsic::riscv_vslide1down_mask: {
10552 // We need to special case these when the scalar is larger than XLen.
10553 unsigned NumOps = Op.getNumOperands();
10554 bool IsMasked = NumOps == 7;
10555
10556 // Convert the vector source to the equivalent nxvXi32 vector.
10557 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
10558 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
10559 SDValue ScalarLo, ScalarHi;
10560 std::tie(ScalarLo, ScalarHi) =
10561 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
10562
10563 // Double the VL since we halved SEW.
10564 SDValue AVL = getVLOperand(Op);
10565 SDValue I32VL;
10566
10567 // Optimize for constant AVL
10568 if (isa<ConstantSDNode>(AVL)) {
10569 const auto [MinVLMAX, MaxVLMAX] =
10571
10572 uint64_t AVLInt = AVL->getAsZExtVal();
10573 if (AVLInt <= MinVLMAX) {
10574 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
10575 } else if (AVLInt >= 2 * MaxVLMAX) {
10576 // Just set vl to VLMAX in this situation
10577 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
10578 } else {
10579 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
10580 // is related to the hardware implementation.
10581 // So let the following code handle
10582 }
10583 }
10584 if (!I32VL) {
10586 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
10587 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
10588 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
10589 SDValue SETVL =
10590 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
10591 // Using vsetvli instruction to get actually used length which related to
10592 // the hardware implementation
10593 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
10594 SEW, LMUL);
10595 I32VL =
10596 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
10597 }
10598
10599 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
10600
10601 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
10602 // instructions.
10603 SDValue Passthru;
10604 if (IsMasked)
10605 Passthru = DAG.getUNDEF(I32VT);
10606 else
10607 Passthru = DAG.getBitcast(I32VT, Operands[1]);
10608
10609 if (IntNo == Intrinsic::riscv_vslide1up ||
10610 IntNo == Intrinsic::riscv_vslide1up_mask) {
10611 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10612 ScalarHi, I32Mask, I32VL);
10613 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10614 ScalarLo, I32Mask, I32VL);
10615 } else {
10616 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10617 ScalarLo, I32Mask, I32VL);
10618 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10619 ScalarHi, I32Mask, I32VL);
10620 }
10621
10622 // Convert back to nxvXi64.
10623 Vec = DAG.getBitcast(VT, Vec);
10624
10625 if (!IsMasked)
10626 return Vec;
10627 // Apply mask after the operation.
10628 SDValue Mask = Operands[NumOps - 3];
10629 SDValue MaskedOff = Operands[1];
10630 // Assume Policy operand is the last operand.
10631 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
10632 // We don't need to select maskedoff if it's undef.
10633 if (MaskedOff.isUndef())
10634 return Vec;
10635 // TAMU
10636 if (Policy == RISCVVType::TAIL_AGNOSTIC)
10637 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10638 DAG.getUNDEF(VT), AVL);
10639 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
10640 // It's fine because vmerge does not care mask policy.
10641 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10642 MaskedOff, AVL);
10643 }
10644 }
10645
10646 // We need to convert the scalar to a splat vector.
10647 SDValue VL = getVLOperand(Op);
10648 assert(VL.getValueType() == XLenVT);
10649 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
10650 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10651}
10652
10653// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
10654// scalable vector llvm.get.vector.length for now.
10655//
10656// We need to convert from a scalable VF to a vsetvli with VLMax equal to
10657// (vscale * VF). The vscale and VF are independent of element width. We use
10658// SEW=8 for the vsetvli because it is the only element width that supports all
10659// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
10660// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
10661// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
10662// SEW and LMUL are better for the surrounding vector instructions.
10664 const RISCVSubtarget &Subtarget) {
10665 MVT XLenVT = Subtarget.getXLenVT();
10666
10667 // The smallest LMUL is only valid for the smallest element width.
10668 const unsigned ElementWidth = 8;
10669
10670 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
10671 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
10672 // We don't support VF==1 with ELEN==32.
10673 [[maybe_unused]] unsigned MinVF =
10674 RISCV::RVVBitsPerBlock / Subtarget.getELen();
10675
10676 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
10677 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
10678 "Unexpected VF");
10679
10680 bool Fractional = VF < LMul1VF;
10681 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
10682 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
10683 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
10684
10685 SDLoc DL(N);
10686
10687 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
10688 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
10689
10690 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
10691
10692 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
10693 SDValue Res =
10694 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
10695 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
10696}
10697
10699 const RISCVSubtarget &Subtarget) {
10700 SDValue Op0 = N->getOperand(1);
10701 MVT OpVT = Op0.getSimpleValueType();
10702 MVT ContainerVT = OpVT;
10703 if (OpVT.isFixedLengthVector()) {
10704 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
10705 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
10706 }
10707 MVT XLenVT = Subtarget.getXLenVT();
10708 SDLoc DL(N);
10709 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
10710 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
10711 if (isOneConstant(N->getOperand(2)))
10712 return Res;
10713
10714 // Convert -1 to VL.
10715 SDValue Setcc =
10716 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
10717 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
10718 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
10719}
10720
10721static inline void promoteVCIXScalar(const SDValue &Op,
10723 SelectionDAG &DAG) {
10724 const RISCVSubtarget &Subtarget =
10726
10727 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10728 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10729 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10730 SDLoc DL(Op);
10731
10733 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10734 if (!II || !II->hasScalarOperand())
10735 return;
10736
10737 unsigned SplatOp = II->ScalarOperand + 1;
10738 assert(SplatOp < Op.getNumOperands());
10739
10740 SDValue &ScalarOp = Operands[SplatOp];
10741 MVT OpVT = ScalarOp.getSimpleValueType();
10742 MVT XLenVT = Subtarget.getXLenVT();
10743
10744 // The code below is partially copied from lowerVectorIntrinsicScalars.
10745 // If this isn't a scalar, or its type is XLenVT we're done.
10746 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10747 return;
10748
10749 // Manually emit promote operation for scalar operation.
10750 if (OpVT.bitsLT(XLenVT)) {
10751 unsigned ExtOpc =
10753 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10754 }
10755}
10756
10757static void processVCIXOperands(SDValue &OrigOp,
10759 SelectionDAG &DAG) {
10760 promoteVCIXScalar(OrigOp, Operands, DAG);
10761 const RISCVSubtarget &Subtarget =
10763 for (SDValue &V : Operands) {
10764 EVT ValType = V.getValueType();
10765 if (ValType.isVector() && ValType.isFloatingPoint()) {
10766 MVT InterimIVT =
10767 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
10768 ValType.getVectorElementCount());
10769 V = DAG.getBitcast(InterimIVT, V);
10770 }
10771 if (ValType.isFixedLengthVector()) {
10772 MVT OpContainerVT = getContainerForFixedLengthVector(
10773 DAG, V.getSimpleValueType(), Subtarget);
10774 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
10775 }
10776 }
10777}
10778
10779// LMUL * VLEN should be greater than or equal to EGS * SEW
10780static inline bool isValidEGW(int EGS, EVT VT,
10781 const RISCVSubtarget &Subtarget) {
10782 return (Subtarget.getRealMinVLen() *
10784 EGS * VT.getScalarSizeInBits();
10785}
10786
10787SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10788 SelectionDAG &DAG) const {
10789 unsigned IntNo = Op.getConstantOperandVal(0);
10790 SDLoc DL(Op);
10791 MVT XLenVT = Subtarget.getXLenVT();
10792
10793 switch (IntNo) {
10794 default:
10795 break; // Don't custom lower most intrinsics.
10796 case Intrinsic::riscv_tuple_insert: {
10797 SDValue Vec = Op.getOperand(1);
10798 SDValue SubVec = Op.getOperand(2);
10799 SDValue Index = Op.getOperand(3);
10800
10801 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
10802 SubVec, Index);
10803 }
10804 case Intrinsic::riscv_tuple_extract: {
10805 SDValue Vec = Op.getOperand(1);
10806 SDValue Index = Op.getOperand(2);
10807
10808 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
10809 Index);
10810 }
10811 case Intrinsic::thread_pointer: {
10812 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10813 return DAG.getRegister(RISCV::X4, PtrVT);
10814 }
10815 case Intrinsic::riscv_orc_b:
10816 case Intrinsic::riscv_brev8:
10817 case Intrinsic::riscv_sha256sig0:
10818 case Intrinsic::riscv_sha256sig1:
10819 case Intrinsic::riscv_sha256sum0:
10820 case Intrinsic::riscv_sha256sum1:
10821 case Intrinsic::riscv_sm3p0:
10822 case Intrinsic::riscv_sm3p1: {
10823 unsigned Opc;
10824 switch (IntNo) {
10825 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10826 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10827 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10828 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10829 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10830 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10831 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10832 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10833 }
10834
10835 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10836 }
10837 case Intrinsic::riscv_sm4ks:
10838 case Intrinsic::riscv_sm4ed: {
10839 unsigned Opc =
10840 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10841
10842 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
10843 Op.getOperand(3));
10844 }
10845 case Intrinsic::riscv_zip:
10846 case Intrinsic::riscv_unzip: {
10847 unsigned Opc =
10848 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
10849 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10850 }
10851 case Intrinsic::riscv_mopr:
10852 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
10853 Op.getOperand(2));
10854
10855 case Intrinsic::riscv_moprr: {
10856 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
10857 Op.getOperand(2), Op.getOperand(3));
10858 }
10859 case Intrinsic::riscv_clmul:
10860 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
10861 Op.getOperand(2));
10862 case Intrinsic::riscv_clmulh:
10863 case Intrinsic::riscv_clmulr: {
10864 unsigned Opc =
10865 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
10866 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
10867 }
10868 case Intrinsic::experimental_get_vector_length:
10869 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
10870 case Intrinsic::experimental_cttz_elts:
10871 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
10872 case Intrinsic::riscv_vmv_x_s: {
10873 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
10874 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
10875 }
10876 case Intrinsic::riscv_vfmv_f_s:
10877 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
10878 case Intrinsic::riscv_vmv_v_x:
10879 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
10880 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
10881 Subtarget);
10882 case Intrinsic::riscv_vfmv_v_f:
10883 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
10884 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10885 case Intrinsic::riscv_vmv_s_x: {
10886 SDValue Scalar = Op.getOperand(2);
10887
10888 if (Scalar.getValueType().bitsLE(XLenVT)) {
10889 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
10890 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
10891 Op.getOperand(1), Scalar, Op.getOperand(3));
10892 }
10893
10894 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
10895
10896 // This is an i64 value that lives in two scalar registers. We have to
10897 // insert this in a convoluted way. First we build vXi64 splat containing
10898 // the two values that we assemble using some bit math. Next we'll use
10899 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
10900 // to merge element 0 from our splat into the source vector.
10901 // FIXME: This is probably not the best way to do this, but it is
10902 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
10903 // point.
10904 // sw lo, (a0)
10905 // sw hi, 4(a0)
10906 // vlse vX, (a0)
10907 //
10908 // vid.v vVid
10909 // vmseq.vx mMask, vVid, 0
10910 // vmerge.vvm vDest, vSrc, vVal, mMask
10911 MVT VT = Op.getSimpleValueType();
10912 SDValue Vec = Op.getOperand(1);
10913 SDValue VL = getVLOperand(Op);
10914
10915 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
10916 if (Op.getOperand(1).isUndef())
10917 return SplattedVal;
10918 SDValue SplattedIdx =
10919 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10920 DAG.getConstant(0, DL, MVT::i32), VL);
10921
10922 MVT MaskVT = getMaskTypeFor(VT);
10923 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
10924 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10925 SDValue SelectCond =
10926 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10927 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
10928 DAG.getUNDEF(MaskVT), Mask, VL});
10929 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
10930 Vec, DAG.getUNDEF(VT), VL);
10931 }
10932 case Intrinsic::riscv_vfmv_s_f:
10933 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
10934 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10935 // EGS * EEW >= 128 bits
10936 case Intrinsic::riscv_vaesdf_vv:
10937 case Intrinsic::riscv_vaesdf_vs:
10938 case Intrinsic::riscv_vaesdm_vv:
10939 case Intrinsic::riscv_vaesdm_vs:
10940 case Intrinsic::riscv_vaesef_vv:
10941 case Intrinsic::riscv_vaesef_vs:
10942 case Intrinsic::riscv_vaesem_vv:
10943 case Intrinsic::riscv_vaesem_vs:
10944 case Intrinsic::riscv_vaeskf1:
10945 case Intrinsic::riscv_vaeskf2:
10946 case Intrinsic::riscv_vaesz_vs:
10947 case Intrinsic::riscv_vsm4k:
10948 case Intrinsic::riscv_vsm4r_vv:
10949 case Intrinsic::riscv_vsm4r_vs: {
10950 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10951 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10952 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10953 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10954 return Op;
10955 }
10956 // EGS * EEW >= 256 bits
10957 case Intrinsic::riscv_vsm3c:
10958 case Intrinsic::riscv_vsm3me: {
10959 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
10960 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
10961 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
10962 return Op;
10963 }
10964 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
10965 case Intrinsic::riscv_vsha2ch:
10966 case Intrinsic::riscv_vsha2cl:
10967 case Intrinsic::riscv_vsha2ms: {
10968 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
10969 !Subtarget.hasStdExtZvknhb())
10970 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
10971 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10972 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10973 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10974 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10975 return Op;
10976 }
10977 case Intrinsic::riscv_sf_vc_v_x:
10978 case Intrinsic::riscv_sf_vc_v_i:
10979 case Intrinsic::riscv_sf_vc_v_xv:
10980 case Intrinsic::riscv_sf_vc_v_iv:
10981 case Intrinsic::riscv_sf_vc_v_vv:
10982 case Intrinsic::riscv_sf_vc_v_fv:
10983 case Intrinsic::riscv_sf_vc_v_xvv:
10984 case Intrinsic::riscv_sf_vc_v_ivv:
10985 case Intrinsic::riscv_sf_vc_v_vvv:
10986 case Intrinsic::riscv_sf_vc_v_fvv:
10987 case Intrinsic::riscv_sf_vc_v_xvw:
10988 case Intrinsic::riscv_sf_vc_v_ivw:
10989 case Intrinsic::riscv_sf_vc_v_vvw:
10990 case Intrinsic::riscv_sf_vc_v_fvw: {
10991 MVT VT = Op.getSimpleValueType();
10992
10993 SmallVector<SDValue> Operands{Op->op_values()};
10995
10996 MVT RetVT = VT;
10997 if (VT.isFixedLengthVector())
10999 else if (VT.isFloatingPoint())
11002
11003 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
11004
11005 if (VT.isFixedLengthVector())
11006 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
11007 else if (VT.isFloatingPoint())
11008 NewNode = DAG.getBitcast(VT, NewNode);
11009
11010 if (Op == NewNode)
11011 break;
11012
11013 return NewNode;
11014 }
11015 }
11016
11017 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11018}
11019
11021 unsigned Type) {
11022 SDLoc DL(Op);
11023 SmallVector<SDValue> Operands{Op->op_values()};
11024 Operands.erase(Operands.begin() + 1);
11025
11026 const RISCVSubtarget &Subtarget =
11028 MVT VT = Op.getSimpleValueType();
11029 MVT RetVT = VT;
11030 MVT FloatVT = VT;
11031
11032 if (VT.isFloatingPoint()) {
11033 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
11034 VT.getVectorElementCount());
11035 FloatVT = RetVT;
11036 }
11037 if (VT.isFixedLengthVector())
11039 Subtarget);
11040
11042
11043 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
11044 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
11045 SDValue Chain = NewNode.getValue(1);
11046
11047 if (VT.isFixedLengthVector())
11048 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
11049 if (VT.isFloatingPoint())
11050 NewNode = DAG.getBitcast(VT, NewNode);
11051
11052 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
11053
11054 return NewNode;
11055}
11056
11058 unsigned Type) {
11059 SmallVector<SDValue> Operands{Op->op_values()};
11060 Operands.erase(Operands.begin() + 1);
11062
11063 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
11064}
11065
11066static SDValue
11068 const RISCVSubtarget &Subtarget,
11069 SelectionDAG &DAG) {
11070 bool IsStrided;
11071 switch (IntNo) {
11072 case Intrinsic::riscv_seg2_load_mask:
11073 case Intrinsic::riscv_seg3_load_mask:
11074 case Intrinsic::riscv_seg4_load_mask:
11075 case Intrinsic::riscv_seg5_load_mask:
11076 case Intrinsic::riscv_seg6_load_mask:
11077 case Intrinsic::riscv_seg7_load_mask:
11078 case Intrinsic::riscv_seg8_load_mask:
11079 IsStrided = false;
11080 break;
11081 case Intrinsic::riscv_sseg2_load_mask:
11082 case Intrinsic::riscv_sseg3_load_mask:
11083 case Intrinsic::riscv_sseg4_load_mask:
11084 case Intrinsic::riscv_sseg5_load_mask:
11085 case Intrinsic::riscv_sseg6_load_mask:
11086 case Intrinsic::riscv_sseg7_load_mask:
11087 case Intrinsic::riscv_sseg8_load_mask:
11088 IsStrided = true;
11089 break;
11090 default:
11091 llvm_unreachable("unexpected intrinsic ID");
11092 };
11093
11094 static const Intrinsic::ID VlsegInts[7] = {
11095 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11096 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11097 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11098 Intrinsic::riscv_vlseg8_mask};
11099 static const Intrinsic::ID VlssegInts[7] = {
11100 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11101 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11102 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11103 Intrinsic::riscv_vlsseg8_mask};
11104
11105 SDLoc DL(Op);
11106 unsigned NF = Op->getNumValues() - 1;
11107 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11108 MVT XLenVT = Subtarget.getXLenVT();
11109 MVT VT = Op->getSimpleValueType(0);
11110 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11111 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11112 ContainerVT.getScalarSizeInBits();
11113 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11114
11115 // Operands: (chain, int_id, pointer, mask, vl) or
11116 // (chain, int_id, pointer, offset, mask, vl)
11117 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11118 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11119 MVT MaskVT = Mask.getSimpleValueType();
11120 MVT MaskContainerVT =
11121 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11122 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11123
11124 SDValue IntID = DAG.getTargetConstant(
11125 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11126 auto *Load = cast<MemIntrinsicSDNode>(Op);
11127
11128 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11130 Load->getChain(),
11131 IntID,
11132 DAG.getUNDEF(VecTupTy),
11133 Op.getOperand(2),
11134 Mask,
11135 VL,
11138 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11139 // Insert the stride operand.
11140 if (IsStrided)
11141 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11142
11143 SDValue Result =
11145 Load->getMemoryVT(), Load->getMemOperand());
11147 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11148 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11149 Result.getValue(0),
11150 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11151 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11152 }
11153 Results.push_back(Result.getValue(1));
11154 return DAG.getMergeValues(Results, DL);
11155}
11156
11157SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11158 SelectionDAG &DAG) const {
11159 unsigned IntNo = Op.getConstantOperandVal(1);
11160 switch (IntNo) {
11161 default:
11162 break;
11163 case Intrinsic::riscv_seg2_load_mask:
11164 case Intrinsic::riscv_seg3_load_mask:
11165 case Intrinsic::riscv_seg4_load_mask:
11166 case Intrinsic::riscv_seg5_load_mask:
11167 case Intrinsic::riscv_seg6_load_mask:
11168 case Intrinsic::riscv_seg7_load_mask:
11169 case Intrinsic::riscv_seg8_load_mask:
11170 case Intrinsic::riscv_sseg2_load_mask:
11171 case Intrinsic::riscv_sseg3_load_mask:
11172 case Intrinsic::riscv_sseg4_load_mask:
11173 case Intrinsic::riscv_sseg5_load_mask:
11174 case Intrinsic::riscv_sseg6_load_mask:
11175 case Intrinsic::riscv_sseg7_load_mask:
11176 case Intrinsic::riscv_sseg8_load_mask:
11177 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
11178
11179 case Intrinsic::riscv_sf_vc_v_x_se:
11180 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
11181 case Intrinsic::riscv_sf_vc_v_i_se:
11182 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
11183 case Intrinsic::riscv_sf_vc_v_xv_se:
11184 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
11185 case Intrinsic::riscv_sf_vc_v_iv_se:
11186 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
11187 case Intrinsic::riscv_sf_vc_v_vv_se:
11188 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
11189 case Intrinsic::riscv_sf_vc_v_fv_se:
11190 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
11191 case Intrinsic::riscv_sf_vc_v_xvv_se:
11192 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
11193 case Intrinsic::riscv_sf_vc_v_ivv_se:
11194 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
11195 case Intrinsic::riscv_sf_vc_v_vvv_se:
11196 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
11197 case Intrinsic::riscv_sf_vc_v_fvv_se:
11198 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
11199 case Intrinsic::riscv_sf_vc_v_xvw_se:
11200 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
11201 case Intrinsic::riscv_sf_vc_v_ivw_se:
11202 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
11203 case Intrinsic::riscv_sf_vc_v_vvw_se:
11204 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
11205 case Intrinsic::riscv_sf_vc_v_fvw_se:
11206 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
11207 }
11208
11209 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11210}
11211
11212static SDValue
11214 const RISCVSubtarget &Subtarget,
11215 SelectionDAG &DAG) {
11216 bool IsStrided;
11217 switch (IntNo) {
11218 case Intrinsic::riscv_seg2_store_mask:
11219 case Intrinsic::riscv_seg3_store_mask:
11220 case Intrinsic::riscv_seg4_store_mask:
11221 case Intrinsic::riscv_seg5_store_mask:
11222 case Intrinsic::riscv_seg6_store_mask:
11223 case Intrinsic::riscv_seg7_store_mask:
11224 case Intrinsic::riscv_seg8_store_mask:
11225 IsStrided = false;
11226 break;
11227 case Intrinsic::riscv_sseg2_store_mask:
11228 case Intrinsic::riscv_sseg3_store_mask:
11229 case Intrinsic::riscv_sseg4_store_mask:
11230 case Intrinsic::riscv_sseg5_store_mask:
11231 case Intrinsic::riscv_sseg6_store_mask:
11232 case Intrinsic::riscv_sseg7_store_mask:
11233 case Intrinsic::riscv_sseg8_store_mask:
11234 IsStrided = true;
11235 break;
11236 default:
11237 llvm_unreachable("unexpected intrinsic ID");
11238 }
11239
11240 SDLoc DL(Op);
11241 static const Intrinsic::ID VssegInts[] = {
11242 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
11243 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
11244 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
11245 Intrinsic::riscv_vsseg8_mask};
11246 static const Intrinsic::ID VsssegInts[] = {
11247 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
11248 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
11249 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
11250 Intrinsic::riscv_vssseg8_mask};
11251
11252 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
11253 // (chain, int_id, vec*, ptr, stride, mask, vl)
11254 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
11255 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11256 MVT XLenVT = Subtarget.getXLenVT();
11257 MVT VT = Op->getOperand(2).getSimpleValueType();
11258 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11259 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11260 ContainerVT.getScalarSizeInBits();
11261 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11262
11263 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11264 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11265 MVT MaskVT = Mask.getSimpleValueType();
11266 MVT MaskContainerVT =
11267 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11268 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11269
11270 SDValue IntID = DAG.getTargetConstant(
11271 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
11272 SDValue Ptr = Op->getOperand(NF + 2);
11273
11274 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
11275
11276 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11277 for (unsigned i = 0; i < NF; i++)
11278 StoredVal = DAG.getNode(
11279 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11280 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
11281 DAG, Subtarget),
11282 DAG.getTargetConstant(i, DL, MVT::i32));
11283
11285 FixedIntrinsic->getChain(),
11286 IntID,
11287 StoredVal,
11288 Ptr,
11289 Mask,
11290 VL,
11291 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11292 // Insert the stride operand.
11293 if (IsStrided)
11294 Ops.insert(std::next(Ops.begin(), 4),
11295 Op.getOperand(Op.getNumOperands() - 3));
11296
11297 return DAG.getMemIntrinsicNode(
11298 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11299 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
11300}
11301
11302SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11303 SelectionDAG &DAG) const {
11304 unsigned IntNo = Op.getConstantOperandVal(1);
11305 switch (IntNo) {
11306 default:
11307 break;
11308 case Intrinsic::riscv_seg2_store_mask:
11309 case Intrinsic::riscv_seg3_store_mask:
11310 case Intrinsic::riscv_seg4_store_mask:
11311 case Intrinsic::riscv_seg5_store_mask:
11312 case Intrinsic::riscv_seg6_store_mask:
11313 case Intrinsic::riscv_seg7_store_mask:
11314 case Intrinsic::riscv_seg8_store_mask:
11315 case Intrinsic::riscv_sseg2_store_mask:
11316 case Intrinsic::riscv_sseg3_store_mask:
11317 case Intrinsic::riscv_sseg4_store_mask:
11318 case Intrinsic::riscv_sseg5_store_mask:
11319 case Intrinsic::riscv_sseg6_store_mask:
11320 case Intrinsic::riscv_sseg7_store_mask:
11321 case Intrinsic::riscv_sseg8_store_mask:
11322 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
11323
11324 case Intrinsic::riscv_sf_vc_xv_se:
11325 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
11326 case Intrinsic::riscv_sf_vc_iv_se:
11327 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
11328 case Intrinsic::riscv_sf_vc_vv_se:
11329 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
11330 case Intrinsic::riscv_sf_vc_fv_se:
11331 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
11332 case Intrinsic::riscv_sf_vc_xvv_se:
11333 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
11334 case Intrinsic::riscv_sf_vc_ivv_se:
11335 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
11336 case Intrinsic::riscv_sf_vc_vvv_se:
11337 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
11338 case Intrinsic::riscv_sf_vc_fvv_se:
11339 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
11340 case Intrinsic::riscv_sf_vc_xvw_se:
11341 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
11342 case Intrinsic::riscv_sf_vc_ivw_se:
11343 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
11344 case Intrinsic::riscv_sf_vc_vvw_se:
11345 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
11346 case Intrinsic::riscv_sf_vc_fvw_se:
11347 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
11348 }
11349
11350 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11351}
11352
11353static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11354 switch (ISDOpcode) {
11355 default:
11356 llvm_unreachable("Unhandled reduction");
11357 case ISD::VP_REDUCE_ADD:
11358 case ISD::VECREDUCE_ADD:
11359 return RISCVISD::VECREDUCE_ADD_VL;
11360 case ISD::VP_REDUCE_UMAX:
11361 case ISD::VECREDUCE_UMAX:
11362 return RISCVISD::VECREDUCE_UMAX_VL;
11363 case ISD::VP_REDUCE_SMAX:
11364 case ISD::VECREDUCE_SMAX:
11365 return RISCVISD::VECREDUCE_SMAX_VL;
11366 case ISD::VP_REDUCE_UMIN:
11367 case ISD::VECREDUCE_UMIN:
11368 return RISCVISD::VECREDUCE_UMIN_VL;
11369 case ISD::VP_REDUCE_SMIN:
11370 case ISD::VECREDUCE_SMIN:
11371 return RISCVISD::VECREDUCE_SMIN_VL;
11372 case ISD::VP_REDUCE_AND:
11373 case ISD::VECREDUCE_AND:
11374 return RISCVISD::VECREDUCE_AND_VL;
11375 case ISD::VP_REDUCE_OR:
11376 case ISD::VECREDUCE_OR:
11377 return RISCVISD::VECREDUCE_OR_VL;
11378 case ISD::VP_REDUCE_XOR:
11379 case ISD::VECREDUCE_XOR:
11380 return RISCVISD::VECREDUCE_XOR_VL;
11381 case ISD::VP_REDUCE_FADD:
11382 return RISCVISD::VECREDUCE_FADD_VL;
11383 case ISD::VP_REDUCE_SEQ_FADD:
11384 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11385 case ISD::VP_REDUCE_FMAX:
11386 case ISD::VP_REDUCE_FMAXIMUM:
11387 return RISCVISD::VECREDUCE_FMAX_VL;
11388 case ISD::VP_REDUCE_FMIN:
11389 case ISD::VP_REDUCE_FMINIMUM:
11390 return RISCVISD::VECREDUCE_FMIN_VL;
11391 }
11392
11393}
11394
11395SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11396 SelectionDAG &DAG,
11397 bool IsVP) const {
11398 SDLoc DL(Op);
11399 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
11400 MVT VecVT = Vec.getSimpleValueType();
11401 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
11402 Op.getOpcode() == ISD::VECREDUCE_OR ||
11403 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11404 Op.getOpcode() == ISD::VP_REDUCE_AND ||
11405 Op.getOpcode() == ISD::VP_REDUCE_OR ||
11406 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11407 "Unexpected reduction lowering");
11408
11409 MVT XLenVT = Subtarget.getXLenVT();
11410
11411 MVT ContainerVT = VecVT;
11412 if (VecVT.isFixedLengthVector()) {
11413 ContainerVT = getContainerForFixedLengthVector(VecVT);
11414 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11415 }
11416
11417 SDValue Mask, VL;
11418 if (IsVP) {
11419 Mask = Op.getOperand(2);
11420 VL = Op.getOperand(3);
11421 } else {
11422 std::tie(Mask, VL) =
11423 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11424 }
11425
11426 ISD::CondCode CC;
11427 switch (Op.getOpcode()) {
11428 default:
11429 llvm_unreachable("Unhandled reduction");
11430 case ISD::VECREDUCE_AND:
11431 case ISD::VP_REDUCE_AND: {
11432 // vcpop ~x == 0
11433 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11434 if (IsVP || VecVT.isFixedLengthVector())
11435 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
11436 else
11437 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
11438 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11439 CC = ISD::SETEQ;
11440 break;
11441 }
11442 case ISD::VECREDUCE_OR:
11443 case ISD::VP_REDUCE_OR:
11444 // vcpop x != 0
11445 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11446 CC = ISD::SETNE;
11447 break;
11448 case ISD::VECREDUCE_XOR:
11449 case ISD::VP_REDUCE_XOR: {
11450 // ((vcpop x) & 1) != 0
11451 SDValue One = DAG.getConstant(1, DL, XLenVT);
11452 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11453 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
11454 CC = ISD::SETNE;
11455 break;
11456 }
11457 }
11458
11459 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11460 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
11461 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
11462
11463 if (!IsVP)
11464 return SetCC;
11465
11466 // Now include the start value in the operation.
11467 // Note that we must return the start value when no elements are operated
11468 // upon. The vcpop instructions we've emitted in each case above will return
11469 // 0 for an inactive vector, and so we've already received the neutral value:
11470 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11471 // can simply include the start value.
11472 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11473 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
11474}
11475
11476static bool isNonZeroAVL(SDValue AVL) {
11477 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
11478 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
11479 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
11480 (ImmAVL && ImmAVL->getZExtValue() >= 1);
11481}
11482
11483/// Helper to lower a reduction sequence of the form:
11484/// scalar = reduce_op vec, scalar_start
11485static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11486 SDValue StartValue, SDValue Vec, SDValue Mask,
11487 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
11488 const RISCVSubtarget &Subtarget) {
11489 const MVT VecVT = Vec.getSimpleValueType();
11490 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
11491 const MVT XLenVT = Subtarget.getXLenVT();
11492 const bool NonZeroAVL = isNonZeroAVL(VL);
11493
11494 // The reduction needs an LMUL1 input; do the splat at either LMUL1
11495 // or the original VT if fractional.
11496 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
11497 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
11498 // prove it is non-zero. For the AVL=0 case, we need the scalar to
11499 // be the result of the reduction operation.
11500 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
11501 SDValue InitialValue =
11502 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
11503 if (M1VT != InnerVT)
11504 InitialValue =
11505 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
11506 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
11508 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
11509 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
11510 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
11511}
11512
11513SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
11514 SelectionDAG &DAG) const {
11515 SDLoc DL(Op);
11516 SDValue Vec = Op.getOperand(0);
11517 EVT VecEVT = Vec.getValueType();
11518
11519 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11520
11521 // Due to ordering in legalize types we may have a vector type that needs to
11522 // be split. Do that manually so we can get down to a legal type.
11523 while (getTypeAction(*DAG.getContext(), VecEVT) ==
11525 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11526 VecEVT = Lo.getValueType();
11527 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
11528 }
11529
11530 // TODO: The type may need to be widened rather than split. Or widened before
11531 // it can be split.
11532 if (!isTypeLegal(VecEVT))
11533 return SDValue();
11534
11535 MVT VecVT = VecEVT.getSimpleVT();
11536 MVT VecEltVT = VecVT.getVectorElementType();
11537 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
11538
11539 MVT ContainerVT = VecVT;
11540 if (VecVT.isFixedLengthVector()) {
11541 ContainerVT = getContainerForFixedLengthVector(VecVT);
11542 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11543 }
11544
11545 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11546
11547 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
11548 switch (BaseOpc) {
11549 case ISD::AND:
11550 case ISD::OR:
11551 case ISD::UMAX:
11552 case ISD::UMIN:
11553 case ISD::SMAX:
11554 case ISD::SMIN:
11555 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
11556 }
11557 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
11558 Mask, VL, DL, DAG, Subtarget);
11559}
11560
11561// Given a reduction op, this function returns the matching reduction opcode,
11562// the vector SDValue and the scalar SDValue required to lower this to a
11563// RISCVISD node.
11564static std::tuple<unsigned, SDValue, SDValue>
11566 const RISCVSubtarget &Subtarget) {
11567 SDLoc DL(Op);
11568 auto Flags = Op->getFlags();
11569 unsigned Opcode = Op.getOpcode();
11570 switch (Opcode) {
11571 default:
11572 llvm_unreachable("Unhandled reduction");
11573 case ISD::VECREDUCE_FADD: {
11574 // Use positive zero if we can. It is cheaper to materialize.
11575 SDValue Zero =
11576 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
11577 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
11578 }
11579 case ISD::VECREDUCE_SEQ_FADD:
11580 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
11581 Op.getOperand(0));
11582 case ISD::VECREDUCE_FMINIMUM:
11583 case ISD::VECREDUCE_FMAXIMUM:
11584 case ISD::VECREDUCE_FMIN:
11585 case ISD::VECREDUCE_FMAX: {
11586 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
11587 unsigned RVVOpc =
11588 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
11589 ? RISCVISD::VECREDUCE_FMIN_VL
11590 : RISCVISD::VECREDUCE_FMAX_VL;
11591 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
11592 }
11593 }
11594}
11595
11596SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
11597 SelectionDAG &DAG) const {
11598 SDLoc DL(Op);
11599 MVT VecEltVT = Op.getSimpleValueType();
11600
11601 unsigned RVVOpcode;
11602 SDValue VectorVal, ScalarVal;
11603 std::tie(RVVOpcode, VectorVal, ScalarVal) =
11604 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
11605 MVT VecVT = VectorVal.getSimpleValueType();
11606
11607 MVT ContainerVT = VecVT;
11608 if (VecVT.isFixedLengthVector()) {
11609 ContainerVT = getContainerForFixedLengthVector(VecVT);
11610 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
11611 }
11612
11613 MVT ResVT = Op.getSimpleValueType();
11614 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11615 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
11616 VL, DL, DAG, Subtarget);
11617 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
11618 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
11619 return Res;
11620
11621 if (Op->getFlags().hasNoNaNs())
11622 return Res;
11623
11624 // Force output to NaN if any element is Nan.
11625 SDValue IsNan =
11626 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
11627 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
11628 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
11629 MVT XLenVT = Subtarget.getXLenVT();
11630 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
11631 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
11632 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11633 return DAG.getSelect(
11634 DL, ResVT, NoNaNs, Res,
11635 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11636}
11637
11638SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
11639 SelectionDAG &DAG) const {
11640 SDLoc DL(Op);
11641 unsigned Opc = Op.getOpcode();
11642 SDValue Start = Op.getOperand(0);
11643 SDValue Vec = Op.getOperand(1);
11644 EVT VecEVT = Vec.getValueType();
11645 MVT XLenVT = Subtarget.getXLenVT();
11646
11647 // TODO: The type may need to be widened rather than split. Or widened before
11648 // it can be split.
11649 if (!isTypeLegal(VecEVT))
11650 return SDValue();
11651
11652 MVT VecVT = VecEVT.getSimpleVT();
11653 unsigned RVVOpcode = getRVVReductionOp(Opc);
11654
11655 if (VecVT.isFixedLengthVector()) {
11656 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
11657 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11658 }
11659
11660 SDValue VL = Op.getOperand(3);
11661 SDValue Mask = Op.getOperand(2);
11662 SDValue Res =
11663 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
11664 Vec, Mask, VL, DL, DAG, Subtarget);
11665 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
11666 Op->getFlags().hasNoNaNs())
11667 return Res;
11668
11669 // Propagate NaNs.
11670 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
11671 // Check if any of the elements in Vec is NaN.
11672 SDValue IsNaN = DAG.getNode(
11673 RISCVISD::SETCC_VL, DL, PredVT,
11674 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
11675 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
11676 // Check if the start value is NaN.
11677 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
11678 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
11679 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
11680 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11681 MVT ResVT = Res.getSimpleValueType();
11682 return DAG.getSelect(
11683 DL, ResVT, NoNaNs, Res,
11684 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11685}
11686
11687SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
11688 SelectionDAG &DAG) const {
11689 SDValue Vec = Op.getOperand(0);
11690 SDValue SubVec = Op.getOperand(1);
11691 MVT VecVT = Vec.getSimpleValueType();
11692 MVT SubVecVT = SubVec.getSimpleValueType();
11693
11694 SDLoc DL(Op);
11695 MVT XLenVT = Subtarget.getXLenVT();
11696 unsigned OrigIdx = Op.getConstantOperandVal(2);
11697 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11698
11699 if (OrigIdx == 0 && Vec.isUndef())
11700 return Op;
11701
11702 // We don't have the ability to slide mask vectors up indexed by their i1
11703 // elements; the smallest we can do is i8. Often we are able to bitcast to
11704 // equivalent i8 vectors. Note that when inserting a fixed-length vector
11705 // into a scalable one, we might not necessarily have enough scalable
11706 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
11707 if (SubVecVT.getVectorElementType() == MVT::i1) {
11708 if (VecVT.getVectorMinNumElements() >= 8 &&
11709 SubVecVT.getVectorMinNumElements() >= 8) {
11710 assert(OrigIdx % 8 == 0 && "Invalid index");
11711 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11712 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11713 "Unexpected mask vector lowering");
11714 OrigIdx /= 8;
11715 SubVecVT =
11716 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11717 SubVecVT.isScalableVector());
11718 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11719 VecVT.isScalableVector());
11720 Vec = DAG.getBitcast(VecVT, Vec);
11721 SubVec = DAG.getBitcast(SubVecVT, SubVec);
11722 } else {
11723 // We can't slide this mask vector up indexed by its i1 elements.
11724 // This poses a problem when we wish to insert a scalable vector which
11725 // can't be re-expressed as a larger type. Just choose the slow path and
11726 // extend to a larger type, then truncate back down.
11727 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11728 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11729 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11730 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
11731 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
11732 Op.getOperand(2));
11733 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
11734 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
11735 }
11736 }
11737
11738 // If the subvector vector is a fixed-length type and we don't know VLEN
11739 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11740 // don't know which register of a LMUL group contains the specific subvector
11741 // as we only know the minimum register size. Therefore we must slide the
11742 // vector group up the full amount.
11743 const auto VLen = Subtarget.getRealVLen();
11744 if (SubVecVT.isFixedLengthVector() && !VLen) {
11745 MVT ContainerVT = VecVT;
11746 if (VecVT.isFixedLengthVector()) {
11747 ContainerVT = getContainerForFixedLengthVector(VecVT);
11748 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11749 }
11750
11751 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
11752
11753 SDValue Mask =
11754 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11755 // Set the vector length to only the number of elements we care about. Note
11756 // that for slideup this includes the offset.
11757 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
11758 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
11759
11760 // Use tail agnostic policy if we're inserting over Vec's tail.
11762 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
11764
11765 // If we're inserting into the lowest elements, use a tail undisturbed
11766 // vmv.v.v.
11767 if (OrigIdx == 0) {
11768 SubVec =
11769 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
11770 } else {
11771 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11772 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
11773 SlideupAmt, Mask, VL, Policy);
11774 }
11775
11776 if (VecVT.isFixedLengthVector())
11777 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11778 return DAG.getBitcast(Op.getValueType(), SubVec);
11779 }
11780
11781 MVT ContainerVecVT = VecVT;
11782 if (VecVT.isFixedLengthVector()) {
11783 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
11784 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
11785 }
11786
11787 MVT ContainerSubVecVT = SubVecVT;
11788 if (SubVecVT.isFixedLengthVector()) {
11789 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11790 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
11791 }
11792
11793 unsigned SubRegIdx;
11794 ElementCount RemIdx;
11795 // insert_subvector scales the index by vscale if the subvector is scalable,
11796 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11797 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11798 if (SubVecVT.isFixedLengthVector()) {
11799 assert(VLen);
11800 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11801 auto Decompose =
11803 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
11804 SubRegIdx = Decompose.first;
11805 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
11806 (OrigIdx % Vscale));
11807 } else {
11808 auto Decompose =
11810 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
11811 SubRegIdx = Decompose.first;
11812 RemIdx = ElementCount::getScalable(Decompose.second);
11813 }
11814
11815 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
11817 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
11818 bool ExactlyVecRegSized =
11819 Subtarget.expandVScale(SubVecVT.getSizeInBits())
11820 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
11821
11822 // 1. If the Idx has been completely eliminated and this subvector's size is
11823 // a vector register or a multiple thereof, or the surrounding elements are
11824 // undef, then this is a subvector insert which naturally aligns to a vector
11825 // register. These can easily be handled using subregister manipulation.
11826 // 2. If the subvector isn't an exact multiple of a valid register group size,
11827 // then the insertion must preserve the undisturbed elements of the register.
11828 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
11829 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
11830 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
11831 // of that LMUL=1 type back into the larger vector (resolving to another
11832 // subregister operation). See below for how our VSLIDEUP works. We go via a
11833 // LMUL=1 type to avoid allocating a large register group to hold our
11834 // subvector.
11835 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
11836 if (SubVecVT.isFixedLengthVector()) {
11837 // We may get NoSubRegister if inserting at index 0 and the subvec
11838 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
11839 if (SubRegIdx == RISCV::NoSubRegister) {
11840 assert(OrigIdx == 0);
11841 return Op;
11842 }
11843
11844 // Use a insert_subvector that will resolve to an insert subreg.
11845 assert(VLen);
11846 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11847 SDValue Insert =
11848 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
11849 if (VecVT.isFixedLengthVector())
11850 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
11851 return Insert;
11852 }
11853 return Op;
11854 }
11855
11856 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
11857 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
11858 // (in our case undisturbed). This means we can set up a subvector insertion
11859 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
11860 // size of the subvector.
11861 MVT InterSubVT = ContainerVecVT;
11862 SDValue AlignedExtract = Vec;
11863 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
11864 if (SubVecVT.isFixedLengthVector()) {
11865 assert(VLen);
11866 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
11867 }
11868 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
11869 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
11870 // Extract a subvector equal to the nearest full vector register type. This
11871 // should resolve to a EXTRACT_SUBREG instruction.
11872 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
11873 }
11874
11875 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
11876
11877 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
11878
11879 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
11880 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
11881
11882 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
11884 if (Subtarget.expandVScale(EndIndex) ==
11885 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
11887
11888 // If we're inserting into the lowest elements, use a tail undisturbed
11889 // vmv.v.v.
11890 if (RemIdx.isZero()) {
11891 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
11892 SubVec, VL);
11893 } else {
11894 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
11895
11896 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
11897 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
11898
11899 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
11900 SlideupAmt, Mask, VL, Policy);
11901 }
11902
11903 // If required, insert this subvector back into the correct vector register.
11904 // This should resolve to an INSERT_SUBREG instruction.
11905 if (ContainerVecVT.bitsGT(InterSubVT))
11906 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
11907
11908 if (VecVT.isFixedLengthVector())
11909 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11910
11911 // We might have bitcast from a mask type: cast back to the original type if
11912 // required.
11913 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
11914}
11915
11916SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
11917 SelectionDAG &DAG) const {
11918 SDValue Vec = Op.getOperand(0);
11919 MVT SubVecVT = Op.getSimpleValueType();
11920 MVT VecVT = Vec.getSimpleValueType();
11921
11922 SDLoc DL(Op);
11923 MVT XLenVT = Subtarget.getXLenVT();
11924 unsigned OrigIdx = Op.getConstantOperandVal(1);
11925 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11926
11927 // With an index of 0 this is a cast-like subvector, which can be performed
11928 // with subregister operations.
11929 if (OrigIdx == 0)
11930 return Op;
11931
11932 // We don't have the ability to slide mask vectors down indexed by their i1
11933 // elements; the smallest we can do is i8. Often we are able to bitcast to
11934 // equivalent i8 vectors. Note that when extracting a fixed-length vector
11935 // from a scalable one, we might not necessarily have enough scalable
11936 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
11937 if (SubVecVT.getVectorElementType() == MVT::i1) {
11938 if (VecVT.getVectorMinNumElements() >= 8 &&
11939 SubVecVT.getVectorMinNumElements() >= 8) {
11940 assert(OrigIdx % 8 == 0 && "Invalid index");
11941 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11942 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11943 "Unexpected mask vector lowering");
11944 OrigIdx /= 8;
11945 SubVecVT =
11946 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11947 SubVecVT.isScalableVector());
11948 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11949 VecVT.isScalableVector());
11950 Vec = DAG.getBitcast(VecVT, Vec);
11951 } else {
11952 // We can't slide this mask vector down, indexed by its i1 elements.
11953 // This poses a problem when we wish to extract a scalable vector which
11954 // can't be re-expressed as a larger type. Just choose the slow path and
11955 // extend to a larger type, then truncate back down.
11956 // TODO: We could probably improve this when extracting certain fixed
11957 // from fixed, where we can extract as i8 and shift the correct element
11958 // right to reach the desired subvector?
11959 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11960 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11961 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11962 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
11963 Op.getOperand(1));
11964 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
11965 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
11966 }
11967 }
11968
11969 const auto VLen = Subtarget.getRealVLen();
11970
11971 // If the subvector vector is a fixed-length type and we don't know VLEN
11972 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11973 // don't know which register of a LMUL group contains the specific subvector
11974 // as we only know the minimum register size. Therefore we must slide the
11975 // vector group down the full amount.
11976 if (SubVecVT.isFixedLengthVector() && !VLen) {
11977 MVT ContainerVT = VecVT;
11978 if (VecVT.isFixedLengthVector()) {
11979 ContainerVT = getContainerForFixedLengthVector(VecVT);
11980 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11981 }
11982
11983 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
11984 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
11985 if (auto ShrunkVT =
11986 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
11987 ContainerVT = *ShrunkVT;
11988 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
11989 }
11990
11991 SDValue Mask =
11992 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11993 // Set the vector length to only the number of elements we care about. This
11994 // avoids sliding down elements we're going to discard straight away.
11995 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
11996 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11997 SDValue Slidedown =
11998 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11999 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
12000 // Now we can use a cast-like subvector extract to get the result.
12001 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12002 return DAG.getBitcast(Op.getValueType(), Slidedown);
12003 }
12004
12005 if (VecVT.isFixedLengthVector()) {
12006 VecVT = getContainerForFixedLengthVector(VecVT);
12007 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
12008 }
12009
12010 MVT ContainerSubVecVT = SubVecVT;
12011 if (SubVecVT.isFixedLengthVector())
12012 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12013
12014 unsigned SubRegIdx;
12015 ElementCount RemIdx;
12016 // extract_subvector scales the index by vscale if the subvector is scalable,
12017 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12018 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12019 if (SubVecVT.isFixedLengthVector()) {
12020 assert(VLen);
12021 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12022 auto Decompose =
12024 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12025 SubRegIdx = Decompose.first;
12026 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12027 (OrigIdx % Vscale));
12028 } else {
12029 auto Decompose =
12031 VecVT, ContainerSubVecVT, OrigIdx, TRI);
12032 SubRegIdx = Decompose.first;
12033 RemIdx = ElementCount::getScalable(Decompose.second);
12034 }
12035
12036 // If the Idx has been completely eliminated then this is a subvector extract
12037 // which naturally aligns to a vector register. These can easily be handled
12038 // using subregister manipulation. We use an extract_subvector that will
12039 // resolve to an extract subreg.
12040 if (RemIdx.isZero()) {
12041 if (SubVecVT.isFixedLengthVector()) {
12042 assert(VLen);
12043 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12044 Vec =
12045 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
12046 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
12047 }
12048 return Op;
12049 }
12050
12051 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
12052 // was > M1 then the index would need to be a multiple of VLMAX, and so would
12053 // divide exactly.
12054 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
12055 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
12056
12057 // If the vector type is an LMUL-group type, extract a subvector equal to the
12058 // nearest full vector register type.
12059 MVT InterSubVT = VecVT;
12060 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
12061 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
12062 // we should have successfully decomposed the extract into a subregister.
12063 // We use an extract_subvector that will resolve to a subreg extract.
12064 assert(SubRegIdx != RISCV::NoSubRegister);
12065 (void)SubRegIdx;
12066 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
12067 if (SubVecVT.isFixedLengthVector()) {
12068 assert(VLen);
12069 Idx /= *VLen / RISCV::RVVBitsPerBlock;
12070 }
12071 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
12072 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
12073 }
12074
12075 // Slide this vector register down by the desired number of elements in order
12076 // to place the desired subvector starting at element 0.
12077 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12078 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
12079 if (SubVecVT.isFixedLengthVector())
12080 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12081 SDValue Slidedown =
12082 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
12083 Vec, SlidedownAmt, Mask, VL);
12084
12085 // Now the vector is in the right position, extract our final subvector. This
12086 // should resolve to a COPY.
12087 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12088
12089 // We might have bitcast from a mask type: cast back to the original type if
12090 // required.
12091 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12092}
12093
12094// Widen a vector's operands to i8, then truncate its results back to the
12095// original type, typically i1. All operand and result types must be the same.
12097 SelectionDAG &DAG) {
12098 MVT VT = N.getSimpleValueType();
12099 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12101 for (SDValue Op : N->ops()) {
12102 assert(Op.getSimpleValueType() == VT &&
12103 "Operands and result must be same type");
12104 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12105 }
12106
12107 unsigned NumVals = N->getNumValues();
12108
12110 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
12111 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12112 SmallVector<SDValue, 4> TruncVals;
12113 for (unsigned I = 0; I < NumVals; I++) {
12114 TruncVals.push_back(
12115 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12116 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12117 }
12118
12119 if (TruncVals.size() > 1)
12120 return DAG.getMergeValues(TruncVals, DL);
12121 return TruncVals.front();
12122}
12123
12124SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12125 SelectionDAG &DAG) const {
12126 SDLoc DL(Op);
12127 MVT VecVT = Op.getSimpleValueType();
12128
12129 const unsigned Factor = Op->getNumValues();
12130 assert(Factor <= 8);
12131
12132 // 1 bit element vectors need to be widened to e8
12133 if (VecVT.getVectorElementType() == MVT::i1)
12134 return widenVectorOpsToi8(Op, DL, DAG);
12135
12136 // Convert to scalable vectors first.
12137 if (VecVT.isFixedLengthVector()) {
12138 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12140 for (unsigned i = 0U; i < Factor; ++i)
12141 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12142 Subtarget);
12143
12144 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12145 SDValue NewDeinterleave =
12147
12148 SmallVector<SDValue, 8> Res(Factor);
12149 for (unsigned i = 0U; i < Factor; ++i)
12150 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12151 DAG, Subtarget);
12152 return DAG.getMergeValues(Res, DL);
12153 }
12154
12155 // If concatenating would exceed LMUL=8, we need to split.
12156 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12157 (8 * RISCV::RVVBitsPerBlock)) {
12158 SmallVector<SDValue, 8> Ops(Factor * 2);
12159 for (unsigned i = 0; i != Factor; ++i) {
12160 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12161 Ops[i * 2] = OpLo;
12162 Ops[i * 2 + 1] = OpHi;
12163 }
12164
12165 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12166
12168 ArrayRef(Ops).slice(0, Factor));
12170 ArrayRef(Ops).slice(Factor, Factor));
12171
12172 SmallVector<SDValue, 8> Res(Factor);
12173 for (unsigned i = 0; i != Factor; ++i)
12174 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
12175 Hi.getValue(i));
12176
12177 return DAG.getMergeValues(Res, DL);
12178 }
12179
12180 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
12181 MVT VT = Op->getSimpleValueType(0);
12182 SDValue V1 = Op->getOperand(0);
12183 SDValue V2 = Op->getOperand(1);
12184
12185 // For fractional LMUL, check if we can use a higher LMUL
12186 // instruction to avoid a vslidedown.
12187 if (SDValue Src = foldConcatVector(V1, V2);
12188 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
12189 EVT NewVT = VT.getDoubleNumVectorElementsVT();
12190 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
12191 // Freeze the source so we can increase its use count.
12192 Src = DAG.getFreeze(Src);
12193 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
12194 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12195 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
12196 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12197 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
12198 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
12199 return DAG.getMergeValues({Even, Odd}, DL);
12200 }
12201
12202 // Freeze the sources so we can increase their use count.
12203 V1 = DAG.getFreeze(V1);
12204 V2 = DAG.getFreeze(V2);
12205 SDValue Even =
12206 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12207 SDValue Odd =
12208 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12209 return DAG.getMergeValues({Even, Odd}, DL);
12210 }
12211
12212 SmallVector<SDValue, 8> Ops(Op->op_values());
12213
12214 // Concatenate the vectors as one vector to deinterleave
12215 MVT ConcatVT =
12218 PowerOf2Ceil(Factor)));
12219 if (Ops.size() < PowerOf2Ceil(Factor))
12220 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
12221 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
12222
12223 if (Factor == 2) {
12224 // We can deinterleave through vnsrl.wi if the element type is smaller than
12225 // ELEN
12226 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12227 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
12228 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
12229 return DAG.getMergeValues({Even, Odd}, DL);
12230 }
12231
12232 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
12233 // possibly mask vector, then extract the required subvector. Doing this
12234 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
12235 // creation to be rematerialized during register allocation to reduce
12236 // register pressure if needed.
12237
12238 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
12239
12240 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
12241 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
12242 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
12243
12244 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
12245 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
12246 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
12247
12248 // vcompress the even and odd elements into two separate vectors
12249 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12250 EvenMask, DAG.getUNDEF(ConcatVT));
12251 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12252 OddMask, DAG.getUNDEF(ConcatVT));
12253
12254 // Extract the result half of the gather for even and odd
12255 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
12256 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
12257
12258 return DAG.getMergeValues({Even, Odd}, DL);
12259 }
12260
12261 // Store with unit-stride store and load it back with segmented load.
12262 MVT XLenVT = Subtarget.getXLenVT();
12263 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12264 SDValue Passthru = DAG.getUNDEF(ConcatVT);
12265
12266 // Allocate a stack slot.
12267 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12269 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
12270 auto &MF = DAG.getMachineFunction();
12271 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12272 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12273
12274 SDValue StoreOps[] = {DAG.getEntryNode(),
12275 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
12276 Concat, StackPtr, VL};
12277
12278 SDValue Chain = DAG.getMemIntrinsicNode(
12279 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
12280 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12282
12283 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
12284 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
12285 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
12286 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
12287 Intrinsic::riscv_vlseg8_mask};
12288
12289 SDValue LoadOps[] = {
12290 Chain,
12291 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
12292 Passthru,
12293 StackPtr,
12294 Mask,
12295 VL,
12298 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
12299
12300 unsigned Sz =
12301 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12302 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12303
12305 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
12306 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12308
12309 SmallVector<SDValue, 8> Res(Factor);
12310
12311 for (unsigned i = 0U; i < Factor; ++i)
12312 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
12313 DAG.getTargetConstant(i, DL, MVT::i32));
12314
12315 return DAG.getMergeValues(Res, DL);
12316}
12317
12318SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
12319 SelectionDAG &DAG) const {
12320 SDLoc DL(Op);
12321 MVT VecVT = Op.getSimpleValueType();
12322
12323 const unsigned Factor = Op.getNumOperands();
12324 assert(Factor <= 8);
12325
12326 // i1 vectors need to be widened to i8
12327 if (VecVT.getVectorElementType() == MVT::i1)
12328 return widenVectorOpsToi8(Op, DL, DAG);
12329
12330 // Convert to scalable vectors first.
12331 if (VecVT.isFixedLengthVector()) {
12332 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12334 for (unsigned i = 0U; i < Factor; ++i)
12335 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12336 Subtarget);
12337
12338 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12339 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
12340
12341 SmallVector<SDValue, 8> Res(Factor);
12342 for (unsigned i = 0U; i < Factor; ++i)
12343 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
12344 Subtarget);
12345 return DAG.getMergeValues(Res, DL);
12346 }
12347
12348 MVT XLenVT = Subtarget.getXLenVT();
12349 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12350
12351 // If the VT is larger than LMUL=8, we need to split and reassemble.
12352 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12353 (8 * RISCV::RVVBitsPerBlock)) {
12354 SmallVector<SDValue, 8> Ops(Factor * 2);
12355 for (unsigned i = 0; i != Factor; ++i) {
12356 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12357 Ops[i] = OpLo;
12358 Ops[i + Factor] = OpHi;
12359 }
12360
12361 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12362
12363 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
12364 ArrayRef(Ops).take_front(Factor)),
12366 ArrayRef(Ops).drop_front(Factor))};
12367
12368 SmallVector<SDValue, 8> Concats(Factor);
12369 for (unsigned i = 0; i != Factor; ++i) {
12370 unsigned IdxLo = 2 * i;
12371 unsigned IdxHi = 2 * i + 1;
12372 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
12373 Res[IdxLo / Factor].getValue(IdxLo % Factor),
12374 Res[IdxHi / Factor].getValue(IdxHi % Factor));
12375 }
12376
12377 return DAG.getMergeValues(Concats, DL);
12378 }
12379
12380 SDValue Interleaved;
12381
12382 // Spill to the stack using a segment store for simplicity.
12383 if (Factor != 2) {
12384 EVT MemVT =
12386 VecVT.getVectorElementCount() * Factor);
12387
12388 // Allocate a stack slot.
12389 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12391 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12392 EVT PtrVT = StackPtr.getValueType();
12393 auto &MF = DAG.getMachineFunction();
12394 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12395 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12396
12397 static const Intrinsic::ID IntrIds[] = {
12398 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12399 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12400 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12401 Intrinsic::riscv_vsseg8_mask,
12402 };
12403
12404 unsigned Sz =
12405 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12406 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12407
12408 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12409 for (unsigned i = 0; i < Factor; i++)
12410 StoredVal =
12411 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12412 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
12413
12414 SDValue Ops[] = {DAG.getEntryNode(),
12415 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
12416 StoredVal,
12417 StackPtr,
12418 Mask,
12419 VL,
12421 DL, XLenVT)};
12422
12423 SDValue Chain = DAG.getMemIntrinsicNode(
12424 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12425 VecVT.getVectorElementType(), PtrInfo, Alignment,
12427
12428 SmallVector<SDValue, 8> Loads(Factor);
12429
12431 DAG.getVScale(DL, PtrVT,
12432 APInt(PtrVT.getFixedSizeInBits(),
12433 VecVT.getStoreSize().getKnownMinValue()));
12434 for (unsigned i = 0; i != Factor; ++i) {
12435 if (i != 0)
12436 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
12437
12438 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12439 }
12440
12441 return DAG.getMergeValues(Loads, DL);
12442 }
12443
12444 // Use ri.vzip2{a,b} if available
12445 // TODO: Figure out the best lowering for the spread variants
12446 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
12447 !Op.getOperand(1).isUndef()) {
12448 // Freeze the sources so we can increase their use count.
12449 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
12450 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
12451 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12452 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12453 return DAG.getMergeValues({Lo, Hi}, DL);
12454 }
12455
12456 // If the element type is smaller than ELEN, then we can interleave with
12457 // vwaddu.vv and vwmaccu.vx
12458 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12459 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
12460 DAG, Subtarget);
12461 } else {
12462 // Otherwise, fallback to using vrgathere16.vv
12463 MVT ConcatVT =
12466 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
12467 Op.getOperand(0), Op.getOperand(1));
12468
12469 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
12470
12471 // 0 1 2 3 4 5 6 7 ...
12472 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
12473
12474 // 1 1 1 1 1 1 1 1 ...
12475 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
12476
12477 // 1 0 1 0 1 0 1 0 ...
12478 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
12479 OddMask = DAG.getSetCC(
12480 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
12481 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
12483
12484 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
12485
12486 // Build up the index vector for interleaving the concatenated vector
12487 // 0 0 1 1 2 2 3 3 ...
12488 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
12489 // 0 n 1 n+1 2 n+2 3 n+3 ...
12490 Idx =
12491 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
12492
12493 // Then perform the interleave
12494 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
12495 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
12496 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
12497 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
12498 }
12499
12500 // Extract the two halves from the interleaved result
12501 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
12502 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
12503 VecVT.getVectorMinNumElements());
12504
12505 return DAG.getMergeValues({Lo, Hi}, DL);
12506}
12507
12508// Lower step_vector to the vid instruction. Any non-identity step value must
12509// be accounted for my manual expansion.
12510SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
12511 SelectionDAG &DAG) const {
12512 SDLoc DL(Op);
12513 MVT VT = Op.getSimpleValueType();
12514 assert(VT.isScalableVector() && "Expected scalable vector");
12515 MVT XLenVT = Subtarget.getXLenVT();
12516 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
12517 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
12518 uint64_t StepValImm = Op.getConstantOperandVal(0);
12519 if (StepValImm != 1) {
12520 if (isPowerOf2_64(StepValImm)) {
12521 SDValue StepVal =
12522 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
12523 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
12524 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
12525 } else {
12526 SDValue StepVal = lowerScalarSplat(
12527 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
12528 VL, VT, DL, DAG, Subtarget);
12529 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
12530 }
12531 }
12532 return StepVec;
12533}
12534
12535// Implement vector_reverse using vrgather.vv with indices determined by
12536// subtracting the id of each element from (VLMAX-1). This will convert
12537// the indices like so:
12538// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
12539// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12540SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
12541 SelectionDAG &DAG) const {
12542 SDLoc DL(Op);
12543 MVT VecVT = Op.getSimpleValueType();
12544 if (VecVT.getVectorElementType() == MVT::i1) {
12545 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
12546 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
12547 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
12548 return DAG.getSetCC(DL, VecVT, Op2,
12549 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
12550 }
12551
12552 MVT ContainerVT = VecVT;
12553 SDValue Vec = Op.getOperand(0);
12554 if (VecVT.isFixedLengthVector()) {
12555 ContainerVT = getContainerForFixedLengthVector(VecVT);
12556 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12557 }
12558
12559 MVT XLenVT = Subtarget.getXLenVT();
12560 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12561
12562 // On some uarchs vrgather.vv will read from every input register for each
12563 // output register, regardless of the indices. However to reverse a vector
12564 // each output register only needs to read from one register. So decompose it
12565 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
12566 // O(LMUL^2).
12567 //
12568 // vsetvli a1, zero, e64, m4, ta, ma
12569 // vrgatherei16.vv v12, v8, v16
12570 // ->
12571 // vsetvli a1, zero, e64, m1, ta, ma
12572 // vrgather.vv v15, v8, v16
12573 // vrgather.vv v14, v9, v16
12574 // vrgather.vv v13, v10, v16
12575 // vrgather.vv v12, v11, v16
12576 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
12577 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
12578 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
12579 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
12580 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
12581 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
12582
12583 // Fixed length vectors might not fit exactly into their container, and so
12584 // leave a gap in the front of the vector after being reversed. Slide this
12585 // away.
12586 //
12587 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
12588 // 0 1 2 3 x x x x <- reverse
12589 // x x x x 0 1 2 3 <- vslidedown.vx
12590 if (VecVT.isFixedLengthVector()) {
12591 SDValue Offset = DAG.getNode(
12592 ISD::SUB, DL, XLenVT,
12593 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
12594 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
12595 Concat =
12596 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12597 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
12598 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
12599 }
12600 return Concat;
12601 }
12602
12603 unsigned EltSize = ContainerVT.getScalarSizeInBits();
12604 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
12605 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12606 unsigned MaxVLMAX =
12607 VecVT.isFixedLengthVector()
12608 ? VecVT.getVectorNumElements()
12609 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12610
12611 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12612 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
12613
12614 // If this is SEW=8 and VLMAX is potentially more than 256, we need
12615 // to use vrgatherei16.vv.
12616 if (MaxVLMAX > 256 && EltSize == 8) {
12617 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
12618 // Reverse each half, then reassemble them in reverse order.
12619 // NOTE: It's also possible that after splitting that VLMAX no longer
12620 // requires vrgatherei16.vv.
12621 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12622 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
12623 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
12624 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12625 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12626 // Reassemble the low and high pieces reversed.
12627 // FIXME: This is a CONCAT_VECTORS.
12628 SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0);
12629 return DAG.getInsertSubvector(DL, Res, Lo,
12630 LoVT.getVectorMinNumElements());
12631 }
12632
12633 // Just promote the int type to i16 which will double the LMUL.
12634 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
12635 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12636 }
12637
12638 // At LMUL > 1, do the index computation in 16 bits to reduce register
12639 // pressure.
12640 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
12641 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
12642 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
12643 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12644 IntVT = IntVT.changeVectorElementType(MVT::i16);
12645 }
12646
12647 // Calculate VLMAX-1 for the desired SEW.
12648 SDValue VLMinus1 = DAG.getNode(
12649 ISD::SUB, DL, XLenVT,
12650 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
12651 DAG.getConstant(1, DL, XLenVT));
12652
12653 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
12654 bool IsRV32E64 =
12655 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
12656 SDValue SplatVL;
12657 if (!IsRV32E64)
12658 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
12659 else
12660 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
12661 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
12662
12663 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
12664 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
12665 DAG.getUNDEF(IntVT), Mask, VL);
12666
12667 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
12668 DAG.getUNDEF(ContainerVT), Mask, VL);
12669 if (VecVT.isFixedLengthVector())
12670 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
12671 return Gather;
12672}
12673
12674SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
12675 SelectionDAG &DAG) const {
12676 SDLoc DL(Op);
12677 SDValue V1 = Op.getOperand(0);
12678 SDValue V2 = Op.getOperand(1);
12679 MVT XLenVT = Subtarget.getXLenVT();
12680 MVT VecVT = Op.getSimpleValueType();
12681
12682 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
12683
12684 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
12685 SDValue DownOffset, UpOffset;
12686 if (ImmValue >= 0) {
12687 // The operand is a TargetConstant, we need to rebuild it as a regular
12688 // constant.
12689 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12690 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
12691 } else {
12692 // The operand is a TargetConstant, we need to rebuild it as a regular
12693 // constant rather than negating the original operand.
12694 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12695 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
12696 }
12697
12698 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
12699
12700 SDValue SlideDown = getVSlidedown(
12701 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
12702 Subtarget.hasVLDependentLatency() ? UpOffset
12703 : DAG.getRegister(RISCV::X0, XLenVT));
12704 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
12705 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
12707}
12708
12709SDValue
12710RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
12711 SelectionDAG &DAG) const {
12712 SDLoc DL(Op);
12713 auto *Load = cast<LoadSDNode>(Op);
12714
12716 Load->getMemoryVT(),
12717 *Load->getMemOperand()) &&
12718 "Expecting a correctly-aligned load");
12719
12720 MVT VT = Op.getSimpleValueType();
12721 MVT XLenVT = Subtarget.getXLenVT();
12722 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12723
12724 // If we know the exact VLEN and our fixed length vector completely fills
12725 // the container, use a whole register load instead.
12726 const auto [MinVLMAX, MaxVLMAX] =
12727 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12728 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12729 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12730 MachineMemOperand *MMO = Load->getMemOperand();
12731 SDValue NewLoad =
12732 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
12733 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
12734 MMO->getAAInfo(), MMO->getRanges());
12735 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12736 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12737 }
12738
12739 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12740
12741 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12742 SDValue IntID = DAG.getTargetConstant(
12743 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
12744 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
12745 if (!IsMaskOp)
12746 Ops.push_back(DAG.getUNDEF(ContainerVT));
12747 Ops.push_back(Load->getBasePtr());
12748 Ops.push_back(VL);
12749 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12750 SDValue NewLoad =
12752 Load->getMemoryVT(), Load->getMemOperand());
12753
12754 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12755 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12756}
12757
12758SDValue
12759RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
12760 SelectionDAG &DAG) const {
12761 SDLoc DL(Op);
12762 auto *Store = cast<StoreSDNode>(Op);
12763
12765 Store->getMemoryVT(),
12766 *Store->getMemOperand()) &&
12767 "Expecting a correctly-aligned store");
12768
12769 SDValue StoreVal = Store->getValue();
12770 MVT VT = StoreVal.getSimpleValueType();
12771 MVT XLenVT = Subtarget.getXLenVT();
12772
12773 // If the size less than a byte, we need to pad with zeros to make a byte.
12774 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
12775 VT = MVT::v8i1;
12776 StoreVal =
12777 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
12778 }
12779
12780 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12781
12782 SDValue NewValue =
12783 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12784
12785 // If we know the exact VLEN and our fixed length vector completely fills
12786 // the container, use a whole register store instead.
12787 const auto [MinVLMAX, MaxVLMAX] =
12788 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12789 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12790 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12791 MachineMemOperand *MMO = Store->getMemOperand();
12792 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
12793 MMO->getPointerInfo(), MMO->getBaseAlign(),
12794 MMO->getFlags(), MMO->getAAInfo());
12795 }
12796
12797 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12798
12799 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12800 SDValue IntID = DAG.getTargetConstant(
12801 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
12802 return DAG.getMemIntrinsicNode(
12803 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
12804 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
12805 Store->getMemoryVT(), Store->getMemOperand());
12806}
12807
12808SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
12809 SelectionDAG &DAG) const {
12810 SDLoc DL(Op);
12811 MVT VT = Op.getSimpleValueType();
12812
12813 const auto *MemSD = cast<MemSDNode>(Op);
12814 EVT MemVT = MemSD->getMemoryVT();
12815 MachineMemOperand *MMO = MemSD->getMemOperand();
12816 SDValue Chain = MemSD->getChain();
12817 SDValue BasePtr = MemSD->getBasePtr();
12818
12819 SDValue Mask, PassThru, VL;
12820 bool IsExpandingLoad = false;
12821 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
12822 Mask = VPLoad->getMask();
12823 PassThru = DAG.getUNDEF(VT);
12824 VL = VPLoad->getVectorLength();
12825 } else {
12826 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
12827 Mask = MLoad->getMask();
12828 PassThru = MLoad->getPassThru();
12829 IsExpandingLoad = MLoad->isExpandingLoad();
12830 }
12831
12832 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12833
12834 MVT XLenVT = Subtarget.getXLenVT();
12835
12836 MVT ContainerVT = VT;
12837 if (VT.isFixedLengthVector()) {
12838 ContainerVT = getContainerForFixedLengthVector(VT);
12839 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12840 if (!IsUnmasked) {
12841 MVT MaskVT = getMaskTypeFor(ContainerVT);
12842 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12843 }
12844 }
12845
12846 if (!VL)
12847 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12848
12849 SDValue ExpandingVL;
12850 if (!IsUnmasked && IsExpandingLoad) {
12851 ExpandingVL = VL;
12852 VL =
12853 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12854 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12855 }
12856
12857 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
12858 : Intrinsic::riscv_vle_mask;
12859 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12860 if (IntID == Intrinsic::riscv_vle)
12861 Ops.push_back(DAG.getUNDEF(ContainerVT));
12862 else
12863 Ops.push_back(PassThru);
12864 Ops.push_back(BasePtr);
12865 if (IntID == Intrinsic::riscv_vle_mask)
12866 Ops.push_back(Mask);
12867 Ops.push_back(VL);
12868 if (IntID == Intrinsic::riscv_vle_mask)
12869 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
12870
12871 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12872
12873 SDValue Result =
12874 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12875 Chain = Result.getValue(1);
12876 if (ExpandingVL) {
12877 MVT IndexVT = ContainerVT;
12878 if (ContainerVT.isFloatingPoint())
12879 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
12880
12881 MVT IndexEltVT = IndexVT.getVectorElementType();
12882 bool UseVRGATHEREI16 = false;
12883 // If index vector is an i8 vector and the element count exceeds 256, we
12884 // should change the element type of index vector to i16 to avoid
12885 // overflow.
12886 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
12887 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
12888 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
12889 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
12890 UseVRGATHEREI16 = true;
12891 }
12892
12893 SDValue Iota =
12894 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
12895 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
12896 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
12897 Result =
12898 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
12899 : RISCVISD::VRGATHER_VV_VL,
12900 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
12901 }
12902
12903 if (VT.isFixedLengthVector())
12904 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12905
12906 return DAG.getMergeValues({Result, Chain}, DL);
12907}
12908
12909SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
12910 SDLoc DL(Op);
12911 MVT VT = Op->getSimpleValueType(0);
12912
12913 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
12914 EVT MemVT = VPLoadFF->getMemoryVT();
12915 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
12916 SDValue Chain = VPLoadFF->getChain();
12917 SDValue BasePtr = VPLoadFF->getBasePtr();
12918
12919 SDValue Mask = VPLoadFF->getMask();
12920 SDValue VL = VPLoadFF->getVectorLength();
12921
12922 MVT XLenVT = Subtarget.getXLenVT();
12923
12924 MVT ContainerVT = VT;
12925 if (VT.isFixedLengthVector()) {
12926 ContainerVT = getContainerForFixedLengthVector(VT);
12927 MVT MaskVT = getMaskTypeFor(ContainerVT);
12928 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12929 }
12930
12931 unsigned IntID = Intrinsic::riscv_vleff_mask;
12932 SDValue Ops[] = {
12933 Chain,
12934 DAG.getTargetConstant(IntID, DL, XLenVT),
12935 DAG.getUNDEF(ContainerVT),
12936 BasePtr,
12937 Mask,
12938 VL,
12940
12941 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
12942
12943 SDValue Result =
12944 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12945 SDValue OutVL = Result.getValue(1);
12946 Chain = Result.getValue(2);
12947
12948 if (VT.isFixedLengthVector())
12949 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12950
12951 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
12952}
12953
12954SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
12955 SelectionDAG &DAG) const {
12956 SDLoc DL(Op);
12957
12958 const auto *MemSD = cast<MemSDNode>(Op);
12959 EVT MemVT = MemSD->getMemoryVT();
12960 MachineMemOperand *MMO = MemSD->getMemOperand();
12961 SDValue Chain = MemSD->getChain();
12962 SDValue BasePtr = MemSD->getBasePtr();
12963 SDValue Val, Mask, VL;
12964
12965 bool IsCompressingStore = false;
12966 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
12967 Val = VPStore->getValue();
12968 Mask = VPStore->getMask();
12969 VL = VPStore->getVectorLength();
12970 } else {
12971 const auto *MStore = cast<MaskedStoreSDNode>(Op);
12972 Val = MStore->getValue();
12973 Mask = MStore->getMask();
12974 IsCompressingStore = MStore->isCompressingStore();
12975 }
12976
12977 bool IsUnmasked =
12978 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
12979
12980 MVT VT = Val.getSimpleValueType();
12981 MVT XLenVT = Subtarget.getXLenVT();
12982
12983 MVT ContainerVT = VT;
12984 if (VT.isFixedLengthVector()) {
12985 ContainerVT = getContainerForFixedLengthVector(VT);
12986
12987 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12988 if (!IsUnmasked || IsCompressingStore) {
12989 MVT MaskVT = getMaskTypeFor(ContainerVT);
12990 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12991 }
12992 }
12993
12994 if (!VL)
12995 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12996
12997 if (IsCompressingStore) {
12998 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
12999 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13000 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
13001 VL =
13002 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
13003 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
13004 }
13005
13006 unsigned IntID =
13007 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
13008 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13009 Ops.push_back(Val);
13010 Ops.push_back(BasePtr);
13011 if (!IsUnmasked)
13012 Ops.push_back(Mask);
13013 Ops.push_back(VL);
13014
13016 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
13017}
13018
13019SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
13020 SelectionDAG &DAG) const {
13021 SDLoc DL(Op);
13022 SDValue Val = Op.getOperand(0);
13023 SDValue Mask = Op.getOperand(1);
13024 SDValue Passthru = Op.getOperand(2);
13025
13026 MVT VT = Val.getSimpleValueType();
13027 MVT XLenVT = Subtarget.getXLenVT();
13028 MVT ContainerVT = VT;
13029 if (VT.isFixedLengthVector()) {
13030 ContainerVT = getContainerForFixedLengthVector(VT);
13031 MVT MaskVT = getMaskTypeFor(ContainerVT);
13032 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13033 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13034 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
13035 }
13036
13037 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13038 SDValue Res =
13039 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13040 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13041 Passthru, Val, Mask, VL);
13042
13043 if (VT.isFixedLengthVector())
13044 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
13045
13046 return Res;
13047}
13048
13049SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
13050 SelectionDAG &DAG) const {
13051 unsigned Opc = Op.getOpcode();
13052 SDLoc DL(Op);
13053 SDValue Chain = Op.getOperand(0);
13054 SDValue Op1 = Op.getOperand(1);
13055 SDValue Op2 = Op.getOperand(2);
13056 SDValue CC = Op.getOperand(3);
13057 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13058 MVT VT = Op.getSimpleValueType();
13059 MVT InVT = Op1.getSimpleValueType();
13060
13061 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
13062 // condition code.
13063 if (Opc == ISD::STRICT_FSETCCS) {
13064 // Expand strict_fsetccs(x, oeq) to
13065 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
13066 SDVTList VTList = Op->getVTList();
13067 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
13068 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
13069 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13070 Op2, OLECCVal);
13071 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
13072 Op1, OLECCVal);
13073 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13074 Tmp1.getValue(1), Tmp2.getValue(1));
13075 // Tmp1 and Tmp2 might be the same node.
13076 if (Tmp1 != Tmp2)
13077 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
13078 return DAG.getMergeValues({Tmp1, OutChain}, DL);
13079 }
13080
13081 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
13082 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
13083 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13084 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13085 Op2, OEQCCVal);
13086 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13087 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13088 }
13089 }
13090
13091 MVT ContainerInVT = InVT;
13092 if (InVT.isFixedLengthVector()) {
13093 ContainerInVT = getContainerForFixedLengthVector(InVT);
13094 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13095 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13096 }
13097 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13098
13099 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13100
13101 SDValue Res;
13102 if (Opc == ISD::STRICT_FSETCC &&
13103 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13104 CCVal == ISD::SETOLE)) {
13105 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13106 // active when both input elements are ordered.
13107 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13108 SDValue OrderMask1 = DAG.getNode(
13109 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13110 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13111 True, VL});
13112 SDValue OrderMask2 = DAG.getNode(
13113 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13114 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13115 True, VL});
13116 Mask =
13117 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13118 // Use Mask as the passthru operand to let the result be 0 if either of the
13119 // inputs is unordered.
13120 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13121 DAG.getVTList(MaskVT, MVT::Other),
13122 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13123 } else {
13124 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13125 : RISCVISD::STRICT_FSETCCS_VL;
13126 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13127 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13128 }
13129
13130 if (VT.isFixedLengthVector()) {
13131 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13132 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13133 }
13134 return Res;
13135}
13136
13137// Lower vector ABS to smax(X, sub(0, X)).
13138SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13139 SDLoc DL(Op);
13140 MVT VT = Op.getSimpleValueType();
13141 SDValue X = Op.getOperand(0);
13142
13143 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13144 "Unexpected type for ISD::ABS");
13145
13146 MVT ContainerVT = VT;
13147 if (VT.isFixedLengthVector()) {
13148 ContainerVT = getContainerForFixedLengthVector(VT);
13149 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13150 }
13151
13152 SDValue Mask, VL;
13153 if (Op->getOpcode() == ISD::VP_ABS) {
13154 Mask = Op->getOperand(1);
13155 if (VT.isFixedLengthVector())
13156 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13157 Subtarget);
13158 VL = Op->getOperand(2);
13159 } else
13160 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13161
13162 SDValue SplatZero = DAG.getNode(
13163 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13164 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13165 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13166 DAG.getUNDEF(ContainerVT), Mask, VL);
13167 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13168 DAG.getUNDEF(ContainerVT), Mask, VL);
13169
13170 if (VT.isFixedLengthVector())
13171 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
13172 return Max;
13173}
13174
13175SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
13176 SelectionDAG &DAG) const {
13177 const auto &TSInfo =
13178 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13179
13180 unsigned NewOpc = getRISCVVLOp(Op);
13181 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
13182 bool HasMask = TSInfo.hasMaskOp(NewOpc);
13183
13184 MVT VT = Op.getSimpleValueType();
13185 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13186
13187 // Create list of operands by converting existing ones to scalable types.
13189 for (const SDValue &V : Op->op_values()) {
13190 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13191
13192 // Pass through non-vector operands.
13193 if (!V.getValueType().isVector()) {
13194 Ops.push_back(V);
13195 continue;
13196 }
13197
13198 // "cast" fixed length vector to a scalable vector.
13199 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
13200 "Only fixed length vectors are supported!");
13201 MVT VContainerVT = ContainerVT.changeVectorElementType(
13202 V.getSimpleValueType().getVectorElementType());
13203 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
13204 }
13205
13206 SDLoc DL(Op);
13207 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13208 if (HasPassthruOp)
13209 Ops.push_back(DAG.getUNDEF(ContainerVT));
13210 if (HasMask)
13211 Ops.push_back(Mask);
13212 Ops.push_back(VL);
13213
13214 // StrictFP operations have two result values. Their lowered result should
13215 // have same result count.
13216 if (Op->isStrictFPOpcode()) {
13217 SDValue ScalableRes =
13218 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
13219 Op->getFlags());
13220 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13221 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
13222 }
13223
13224 SDValue ScalableRes =
13225 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
13226 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13227}
13228
13229// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
13230// * Operands of each node are assumed to be in the same order.
13231// * The EVL operand is promoted from i32 to i64 on RV64.
13232// * Fixed-length vectors are converted to their scalable-vector container
13233// types.
13234SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
13235 const auto &TSInfo =
13236 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13237
13238 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13239 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
13240
13241 SDLoc DL(Op);
13242 MVT VT = Op.getSimpleValueType();
13244
13245 MVT ContainerVT = VT;
13246 if (VT.isFixedLengthVector())
13247 ContainerVT = getContainerForFixedLengthVector(VT);
13248
13249 for (const auto &OpIdx : enumerate(Op->ops())) {
13250 SDValue V = OpIdx.value();
13251 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13252 // Add dummy passthru value before the mask. Or if there isn't a mask,
13253 // before EVL.
13254 if (HasPassthruOp) {
13255 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
13256 if (MaskIdx) {
13257 if (*MaskIdx == OpIdx.index())
13258 Ops.push_back(DAG.getUNDEF(ContainerVT));
13259 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
13260 OpIdx.index()) {
13261 if (Op.getOpcode() == ISD::VP_MERGE) {
13262 // For VP_MERGE, copy the false operand instead of an undef value.
13263 Ops.push_back(Ops.back());
13264 } else {
13265 assert(Op.getOpcode() == ISD::VP_SELECT);
13266 // For VP_SELECT, add an undef value.
13267 Ops.push_back(DAG.getUNDEF(ContainerVT));
13268 }
13269 }
13270 }
13271 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
13272 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
13273 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
13275 Subtarget.getXLenVT()));
13276 // Pass through operands which aren't fixed-length vectors.
13277 if (!V.getValueType().isFixedLengthVector()) {
13278 Ops.push_back(V);
13279 continue;
13280 }
13281 // "cast" fixed length vector to a scalable vector.
13282 MVT OpVT = V.getSimpleValueType();
13283 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
13284 assert(useRVVForFixedLengthVectorVT(OpVT) &&
13285 "Only fixed length vectors are supported!");
13286 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
13287 }
13288
13289 if (!VT.isFixedLengthVector())
13290 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
13291
13292 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
13293
13294 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
13295}
13296
13297SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
13298 SelectionDAG &DAG) const {
13299 SDLoc DL(Op);
13300 MVT VT = Op.getSimpleValueType();
13301
13302 SDValue Src = Op.getOperand(0);
13303 // NOTE: Mask is dropped.
13304 SDValue VL = Op.getOperand(2);
13305
13306 MVT ContainerVT = VT;
13307 if (VT.isFixedLengthVector()) {
13308 ContainerVT = getContainerForFixedLengthVector(VT);
13309 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
13310 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13311 }
13312
13313 MVT XLenVT = Subtarget.getXLenVT();
13314 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13315 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13316 DAG.getUNDEF(ContainerVT), Zero, VL);
13317
13318 SDValue SplatValue = DAG.getSignedConstant(
13319 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
13320 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13321 DAG.getUNDEF(ContainerVT), SplatValue, VL);
13322
13323 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
13324 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
13325 if (!VT.isFixedLengthVector())
13326 return Result;
13327 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13328}
13329
13330SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13331 SelectionDAG &DAG) const {
13332 SDLoc DL(Op);
13333 MVT VT = Op.getSimpleValueType();
13334
13335 SDValue Op1 = Op.getOperand(0);
13336 SDValue Op2 = Op.getOperand(1);
13337 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13338 // NOTE: Mask is dropped.
13339 SDValue VL = Op.getOperand(4);
13340
13341 MVT ContainerVT = VT;
13342 if (VT.isFixedLengthVector()) {
13343 ContainerVT = getContainerForFixedLengthVector(VT);
13344 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13345 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13346 }
13347
13349 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13350
13351 switch (Condition) {
13352 default:
13353 break;
13354 // X != Y --> (X^Y)
13355 case ISD::SETNE:
13356 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13357 break;
13358 // X == Y --> ~(X^Y)
13359 case ISD::SETEQ: {
13360 SDValue Temp =
13361 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13362 Result =
13363 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
13364 break;
13365 }
13366 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13367 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13368 case ISD::SETGT:
13369 case ISD::SETULT: {
13370 SDValue Temp =
13371 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13372 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
13373 break;
13374 }
13375 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13376 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13377 case ISD::SETLT:
13378 case ISD::SETUGT: {
13379 SDValue Temp =
13380 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13381 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
13382 break;
13383 }
13384 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13385 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13386 case ISD::SETGE:
13387 case ISD::SETULE: {
13388 SDValue Temp =
13389 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13390 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
13391 break;
13392 }
13393 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13394 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13395 case ISD::SETLE:
13396 case ISD::SETUGE: {
13397 SDValue Temp =
13398 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13399 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
13400 break;
13401 }
13402 }
13403
13404 if (!VT.isFixedLengthVector())
13405 return Result;
13406 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13407}
13408
13409// Lower Floating-Point/Integer Type-Convert VP SDNodes
13410SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13411 SelectionDAG &DAG) const {
13412 SDLoc DL(Op);
13413
13414 SDValue Src = Op.getOperand(0);
13415 SDValue Mask = Op.getOperand(1);
13416 SDValue VL = Op.getOperand(2);
13417 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13418
13419 MVT DstVT = Op.getSimpleValueType();
13420 MVT SrcVT = Src.getSimpleValueType();
13421 if (DstVT.isFixedLengthVector()) {
13422 DstVT = getContainerForFixedLengthVector(DstVT);
13423 SrcVT = getContainerForFixedLengthVector(SrcVT);
13424 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13425 MVT MaskVT = getMaskTypeFor(DstVT);
13426 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13427 }
13428
13429 unsigned DstEltSize = DstVT.getScalarSizeInBits();
13430 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13431
13433 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13434 if (SrcVT.isInteger()) {
13435 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13436
13437 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13438 ? RISCVISD::VSEXT_VL
13439 : RISCVISD::VZEXT_VL;
13440
13441 // Do we need to do any pre-widening before converting?
13442 if (SrcEltSize == 1) {
13443 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13444 MVT XLenVT = Subtarget.getXLenVT();
13445 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13446 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13447 DAG.getUNDEF(IntVT), Zero, VL);
13448 SDValue One = DAG.getSignedConstant(
13449 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
13450 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13451 DAG.getUNDEF(IntVT), One, VL);
13452 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
13453 ZeroSplat, DAG.getUNDEF(IntVT), VL);
13454 } else if (DstEltSize > (2 * SrcEltSize)) {
13455 // Widen before converting.
13456 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
13457 DstVT.getVectorElementCount());
13458 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
13459 }
13460
13461 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13462 } else {
13463 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13464 "Wrong input/output vector types");
13465
13466 // Convert f16 to f32 then convert f32 to i64.
13467 if (DstEltSize > (2 * SrcEltSize)) {
13468 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13469 MVT InterimFVT =
13470 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13471 Src =
13472 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
13473 }
13474
13475 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13476 }
13477 } else { // Narrowing + Conversion
13478 if (SrcVT.isInteger()) {
13479 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13480 // First do a narrowing convert to an FP type half the size, then round
13481 // the FP type to a small FP type if needed.
13482
13483 MVT InterimFVT = DstVT;
13484 if (SrcEltSize > (2 * DstEltSize)) {
13485 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
13486 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13487 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13488 }
13489
13490 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
13491
13492 if (InterimFVT != DstVT) {
13493 Src = Result;
13494 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
13495 }
13496 } else {
13497 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13498 "Wrong input/output vector types");
13499 // First do a narrowing conversion to an integer half the size, then
13500 // truncate if needed.
13501
13502 if (DstEltSize == 1) {
13503 // First convert to the same size integer, then convert to mask using
13504 // setcc.
13505 assert(SrcEltSize >= 16 && "Unexpected FP type!");
13506 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
13507 DstVT.getVectorElementCount());
13508 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13509
13510 // Compare the integer result to 0. The integer should be 0 or 1/-1,
13511 // otherwise the conversion was undefined.
13512 MVT XLenVT = Subtarget.getXLenVT();
13513 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
13514 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
13515 DAG.getUNDEF(InterimIVT), SplatZero, VL);
13516 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
13517 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
13518 DAG.getUNDEF(DstVT), Mask, VL});
13519 } else {
13520 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13521 DstVT.getVectorElementCount());
13522
13523 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13524
13525 while (InterimIVT != DstVT) {
13526 SrcEltSize /= 2;
13527 Src = Result;
13528 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13529 DstVT.getVectorElementCount());
13530 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
13531 Src, Mask, VL);
13532 }
13533 }
13534 }
13535 }
13536
13537 MVT VT = Op.getSimpleValueType();
13538 if (!VT.isFixedLengthVector())
13539 return Result;
13540 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13541}
13542
13543SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
13544 SelectionDAG &DAG) const {
13545 SDLoc DL(Op);
13546 MVT VT = Op.getSimpleValueType();
13547 MVT XLenVT = Subtarget.getXLenVT();
13548
13549 SDValue Mask = Op.getOperand(0);
13550 SDValue TrueVal = Op.getOperand(1);
13551 SDValue FalseVal = Op.getOperand(2);
13552 SDValue VL = Op.getOperand(3);
13553
13554 // Use default legalization if a vector of EVL type would be legal.
13555 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
13557 if (isTypeLegal(EVLVecVT))
13558 return SDValue();
13559
13560 MVT ContainerVT = VT;
13561 if (VT.isFixedLengthVector()) {
13562 ContainerVT = getContainerForFixedLengthVector(VT);
13563 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
13564 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
13565 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
13566 }
13567
13568 // Promote to a vector of i8.
13569 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
13570
13571 // Promote TrueVal and FalseVal using VLMax.
13572 // FIXME: Is there a better way to do this?
13573 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
13574 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13575 DAG.getUNDEF(PromotedVT),
13576 DAG.getConstant(1, DL, XLenVT), VLMax);
13577 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13578 DAG.getUNDEF(PromotedVT),
13579 DAG.getConstant(0, DL, XLenVT), VLMax);
13580 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
13581 SplatZero, DAG.getUNDEF(PromotedVT), VL);
13582 // Any element past VL uses FalseVal, so use VLMax
13583 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
13584 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
13585
13586 // VP_MERGE the two promoted values.
13587 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
13588 TrueVal, FalseVal, FalseVal, VL);
13589
13590 // Convert back to mask.
13591 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13592 SDValue Result = DAG.getNode(
13593 RISCVISD::SETCC_VL, DL, ContainerVT,
13594 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
13595 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
13596
13597 if (VT.isFixedLengthVector())
13598 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13599 return Result;
13600}
13601
13602SDValue
13603RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
13604 SelectionDAG &DAG) const {
13605 using namespace SDPatternMatch;
13606
13607 SDLoc DL(Op);
13608
13609 SDValue Op1 = Op.getOperand(0);
13610 SDValue Op2 = Op.getOperand(1);
13611 SDValue Offset = Op.getOperand(2);
13612 SDValue Mask = Op.getOperand(3);
13613 SDValue EVL1 = Op.getOperand(4);
13614 SDValue EVL2 = Op.getOperand(5);
13615
13616 const MVT XLenVT = Subtarget.getXLenVT();
13617 MVT VT = Op.getSimpleValueType();
13618 MVT ContainerVT = VT;
13619 if (VT.isFixedLengthVector()) {
13620 ContainerVT = getContainerForFixedLengthVector(VT);
13621 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13622 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13623 MVT MaskVT = getMaskTypeFor(ContainerVT);
13624 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13625 }
13626
13627 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
13628 if (IsMaskVector) {
13629 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
13630
13631 // Expand input operands
13632 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13633 DAG.getUNDEF(ContainerVT),
13634 DAG.getConstant(1, DL, XLenVT), EVL1);
13635 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13636 DAG.getUNDEF(ContainerVT),
13637 DAG.getConstant(0, DL, XLenVT), EVL1);
13638 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
13639 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
13640
13641 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13642 DAG.getUNDEF(ContainerVT),
13643 DAG.getConstant(1, DL, XLenVT), EVL2);
13644 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13645 DAG.getUNDEF(ContainerVT),
13646 DAG.getConstant(0, DL, XLenVT), EVL2);
13647 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
13648 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
13649 }
13650
13651 auto getVectorFirstEle = [](SDValue Vec) {
13652 SDValue FirstEle;
13653 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13654 return FirstEle;
13655
13656 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
13658 return Vec.getOperand(0);
13659
13660 return SDValue();
13661 };
13662
13663 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13664 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13665 MVT EltVT = ContainerVT.getVectorElementType();
13667 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
13668 EltVT == MVT::bf16) {
13669 EltVT = EltVT.changeTypeToInteger();
13670 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13671 Op2 = DAG.getBitcast(ContainerVT, Op2);
13672 FirstEle =
13673 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
13674 }
13675 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13676 : RISCVISD::VSLIDE1UP_VL,
13677 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
13678 FirstEle, Mask, EVL2);
13679 Result = DAG.getBitcast(
13681 Result);
13682 return VT.isFixedLengthVector()
13683 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
13684 : Result;
13685 }
13686
13687 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
13688 SDValue DownOffset, UpOffset;
13689 if (ImmValue >= 0) {
13690 // The operand is a TargetConstant, we need to rebuild it as a regular
13691 // constant.
13692 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
13693 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
13694 } else {
13695 // The operand is a TargetConstant, we need to rebuild it as a regular
13696 // constant rather than negating the original operand.
13697 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
13698 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
13699 }
13700
13701 if (ImmValue != 0)
13702 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13703 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
13704 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
13705 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
13706 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
13707
13708 if (IsMaskVector) {
13709 // Truncate Result back to a mask vector (Result has same EVL as Op2)
13710 Result = DAG.getNode(
13711 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
13712 {Result, DAG.getConstant(0, DL, ContainerVT),
13713 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
13714 Mask, EVL2});
13715 }
13716
13717 if (!VT.isFixedLengthVector())
13718 return Result;
13719 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13720}
13721
13722SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
13723 SelectionDAG &DAG) const {
13724 SDLoc DL(Op);
13725 SDValue Val = Op.getOperand(0);
13726 SDValue Mask = Op.getOperand(1);
13727 SDValue VL = Op.getOperand(2);
13728 MVT VT = Op.getSimpleValueType();
13729
13730 MVT ContainerVT = VT;
13731 if (VT.isFixedLengthVector()) {
13732 ContainerVT = getContainerForFixedLengthVector(VT);
13733 MVT MaskVT = getMaskTypeFor(ContainerVT);
13734 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13735 }
13736
13738 if (VT.getScalarType() == MVT::i1) {
13739 if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
13740 Result =
13741 DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
13742 ContainerVT, VL);
13743 } else {
13744 MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8);
13745 SDValue LHS =
13746 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT),
13747 DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL);
13748 SDValue RHS = DAG.getConstant(0, DL, WidenVT);
13749 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13750 {LHS, RHS, DAG.getCondCode(ISD::SETNE),
13751 DAG.getUNDEF(ContainerVT), Mask, VL});
13752 }
13753 } else {
13754 Result =
13755 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
13756 }
13757
13758 if (!VT.isFixedLengthVector())
13759 return Result;
13760 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13761}
13762
13763SDValue
13764RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
13765 SelectionDAG &DAG) const {
13766 SDLoc DL(Op);
13767 MVT VT = Op.getSimpleValueType();
13768 MVT XLenVT = Subtarget.getXLenVT();
13769
13770 SDValue Op1 = Op.getOperand(0);
13771 SDValue Mask = Op.getOperand(1);
13772 SDValue EVL = Op.getOperand(2);
13773
13774 MVT ContainerVT = VT;
13775 if (VT.isFixedLengthVector()) {
13776 ContainerVT = getContainerForFixedLengthVector(VT);
13777 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13778 MVT MaskVT = getMaskTypeFor(ContainerVT);
13779 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13780 }
13781
13782 MVT GatherVT = ContainerVT;
13783 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
13784 // Check if we are working with mask vectors
13785 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
13786 if (IsMaskVector) {
13787 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
13788
13789 // Expand input operand
13790 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13791 DAG.getUNDEF(IndicesVT),
13792 DAG.getConstant(1, DL, XLenVT), EVL);
13793 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13794 DAG.getUNDEF(IndicesVT),
13795 DAG.getConstant(0, DL, XLenVT), EVL);
13796 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
13797 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
13798 }
13799
13800 unsigned EltSize = GatherVT.getScalarSizeInBits();
13801 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
13802 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13803 unsigned MaxVLMAX =
13804 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13805
13806 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13807 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
13808 // to use vrgatherei16.vv.
13809 // TODO: It's also possible to use vrgatherei16.vv for other types to
13810 // decrease register width for the index calculation.
13811 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13812 if (MaxVLMAX > 256 && EltSize == 8) {
13813 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
13814 // Split the vector in half and reverse each half using a full register
13815 // reverse.
13816 // Swap the halves and concatenate them.
13817 // Slide the concatenated result by (VLMax - VL).
13818 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13819 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
13820 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
13821
13822 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13823 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13824
13825 // Reassemble the low and high pieces reversed.
13826 // NOTE: this Result is unmasked (because we do not need masks for
13827 // shuffles). If in the future this has to change, we can use a SELECT_VL
13828 // between Result and UNDEF using the mask originally passed to VP_REVERSE
13829 SDValue Result =
13830 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
13831
13832 // Slide off any elements from past EVL that were reversed into the low
13833 // elements.
13834 unsigned MinElts = GatherVT.getVectorMinNumElements();
13835 SDValue VLMax =
13836 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
13837 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
13838
13839 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
13840 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
13841
13842 if (IsMaskVector) {
13843 // Truncate Result back to a mask vector
13844 Result =
13845 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13846 {Result, DAG.getConstant(0, DL, GatherVT),
13848 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13849 }
13850
13851 if (!VT.isFixedLengthVector())
13852 return Result;
13853 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13854 }
13855
13856 // Just promote the int type to i16 which will double the LMUL.
13857 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
13858 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13859 }
13860
13861 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
13862 SDValue VecLen =
13863 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
13864 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13865 DAG.getUNDEF(IndicesVT), VecLen, EVL);
13866 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
13867 DAG.getUNDEF(IndicesVT), Mask, EVL);
13868 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
13869 DAG.getUNDEF(GatherVT), Mask, EVL);
13870
13871 if (IsMaskVector) {
13872 // Truncate Result back to a mask vector
13873 Result = DAG.getNode(
13874 RISCVISD::SETCC_VL, DL, ContainerVT,
13875 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
13876 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13877 }
13878
13879 if (!VT.isFixedLengthVector())
13880 return Result;
13881 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13882}
13883
13884SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
13885 SelectionDAG &DAG) const {
13886 MVT VT = Op.getSimpleValueType();
13887 if (VT.getVectorElementType() != MVT::i1)
13888 return lowerVPOp(Op, DAG);
13889
13890 // It is safe to drop mask parameter as masked-off elements are undef.
13891 SDValue Op1 = Op->getOperand(0);
13892 SDValue Op2 = Op->getOperand(1);
13893 SDValue VL = Op->getOperand(3);
13894
13895 MVT ContainerVT = VT;
13896 const bool IsFixed = VT.isFixedLengthVector();
13897 if (IsFixed) {
13898 ContainerVT = getContainerForFixedLengthVector(VT);
13899 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13900 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13901 }
13902
13903 SDLoc DL(Op);
13904 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
13905 if (!IsFixed)
13906 return Val;
13907 return convertFromScalableVector(VT, Val, DAG, Subtarget);
13908}
13909
13910SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
13911 SelectionDAG &DAG) const {
13912 SDLoc DL(Op);
13913 MVT XLenVT = Subtarget.getXLenVT();
13914 MVT VT = Op.getSimpleValueType();
13915 MVT ContainerVT = VT;
13916 if (VT.isFixedLengthVector())
13917 ContainerVT = getContainerForFixedLengthVector(VT);
13918
13919 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13920
13921 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
13922 // Check if the mask is known to be all ones
13923 SDValue Mask = VPNode->getMask();
13924 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13925
13926 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
13927 : Intrinsic::riscv_vlse_mask,
13928 DL, XLenVT);
13929 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
13930 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
13931 VPNode->getStride()};
13932 if (!IsUnmasked) {
13933 if (VT.isFixedLengthVector()) {
13934 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13935 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13936 }
13937 Ops.push_back(Mask);
13938 }
13939 Ops.push_back(VPNode->getVectorLength());
13940 if (!IsUnmasked) {
13941 SDValue Policy =
13943 Ops.push_back(Policy);
13944 }
13945
13946 SDValue Result =
13948 VPNode->getMemoryVT(), VPNode->getMemOperand());
13949 SDValue Chain = Result.getValue(1);
13950
13951 if (VT.isFixedLengthVector())
13952 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13953
13954 return DAG.getMergeValues({Result, Chain}, DL);
13955}
13956
13957SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
13958 SelectionDAG &DAG) const {
13959 SDLoc DL(Op);
13960 MVT XLenVT = Subtarget.getXLenVT();
13961
13962 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
13963 SDValue StoreVal = VPNode->getValue();
13964 MVT VT = StoreVal.getSimpleValueType();
13965 MVT ContainerVT = VT;
13966 if (VT.isFixedLengthVector()) {
13967 ContainerVT = getContainerForFixedLengthVector(VT);
13968 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
13969 }
13970
13971 // Check if the mask is known to be all ones
13972 SDValue Mask = VPNode->getMask();
13973 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13974
13975 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
13976 : Intrinsic::riscv_vsse_mask,
13977 DL, XLenVT);
13978 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
13979 VPNode->getBasePtr(), VPNode->getStride()};
13980 if (!IsUnmasked) {
13981 if (VT.isFixedLengthVector()) {
13982 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13983 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13984 }
13985 Ops.push_back(Mask);
13986 }
13987 Ops.push_back(VPNode->getVectorLength());
13988
13989 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
13990 Ops, VPNode->getMemoryVT(),
13991 VPNode->getMemOperand());
13992}
13993
13994// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
13995// matched to a RVV indexed load. The RVV indexed load instructions only
13996// support the "unsigned unscaled" addressing mode; indices are implicitly
13997// zero-extended or truncated to XLEN and are treated as byte offsets. Any
13998// signed or scaled indexing is extended to the XLEN value type and scaled
13999// accordingly.
14000SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
14001 SelectionDAG &DAG) const {
14002 SDLoc DL(Op);
14003 MVT VT = Op.getSimpleValueType();
14004
14005 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14006 EVT MemVT = MemSD->getMemoryVT();
14007 MachineMemOperand *MMO = MemSD->getMemOperand();
14008 SDValue Chain = MemSD->getChain();
14009 SDValue BasePtr = MemSD->getBasePtr();
14010
14011 [[maybe_unused]] ISD::LoadExtType LoadExtType;
14012 SDValue Index, Mask, PassThru, VL;
14013
14014 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
14015 Index = VPGN->getIndex();
14016 Mask = VPGN->getMask();
14017 PassThru = DAG.getUNDEF(VT);
14018 VL = VPGN->getVectorLength();
14019 // VP doesn't support extending loads.
14021 } else {
14022 // Else it must be a MGATHER.
14023 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
14024 Index = MGN->getIndex();
14025 Mask = MGN->getMask();
14026 PassThru = MGN->getPassThru();
14027 LoadExtType = MGN->getExtensionType();
14028 }
14029
14030 MVT IndexVT = Index.getSimpleValueType();
14031 MVT XLenVT = Subtarget.getXLenVT();
14032
14034 "Unexpected VTs!");
14035 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14036 // Targets have to explicitly opt-in for extending vector loads.
14037 assert(LoadExtType == ISD::NON_EXTLOAD &&
14038 "Unexpected extending MGATHER/VP_GATHER");
14039
14040 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14041 // the selection of the masked intrinsics doesn't do this for us.
14042 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14043
14044 MVT ContainerVT = VT;
14045 if (VT.isFixedLengthVector()) {
14046 ContainerVT = getContainerForFixedLengthVector(VT);
14047 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14048 ContainerVT.getVectorElementCount());
14049
14050 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14051
14052 if (!IsUnmasked) {
14053 MVT MaskVT = getMaskTypeFor(ContainerVT);
14054 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14055 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
14056 }
14057 }
14058
14059 if (!VL)
14060 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14061
14062 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14063 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14064 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14065 }
14066
14067 unsigned IntID =
14068 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
14069 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14070 if (IsUnmasked)
14071 Ops.push_back(DAG.getUNDEF(ContainerVT));
14072 else
14073 Ops.push_back(PassThru);
14074 Ops.push_back(BasePtr);
14075 Ops.push_back(Index);
14076 if (!IsUnmasked)
14077 Ops.push_back(Mask);
14078 Ops.push_back(VL);
14079 if (!IsUnmasked)
14080 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
14081
14082 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14083 SDValue Result =
14084 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14085 Chain = Result.getValue(1);
14086
14087 if (VT.isFixedLengthVector())
14088 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14089
14090 return DAG.getMergeValues({Result, Chain}, DL);
14091}
14092
14093// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14094// matched to a RVV indexed store. The RVV indexed store instructions only
14095// support the "unsigned unscaled" addressing mode; indices are implicitly
14096// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14097// signed or scaled indexing is extended to the XLEN value type and scaled
14098// accordingly.
14099SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14100 SelectionDAG &DAG) const {
14101 SDLoc DL(Op);
14102 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14103 EVT MemVT = MemSD->getMemoryVT();
14104 MachineMemOperand *MMO = MemSD->getMemOperand();
14105 SDValue Chain = MemSD->getChain();
14106 SDValue BasePtr = MemSD->getBasePtr();
14107
14108 [[maybe_unused]] bool IsTruncatingStore = false;
14109 SDValue Index, Mask, Val, VL;
14110
14111 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14112 Index = VPSN->getIndex();
14113 Mask = VPSN->getMask();
14114 Val = VPSN->getValue();
14115 VL = VPSN->getVectorLength();
14116 // VP doesn't support truncating stores.
14117 IsTruncatingStore = false;
14118 } else {
14119 // Else it must be a MSCATTER.
14120 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14121 Index = MSN->getIndex();
14122 Mask = MSN->getMask();
14123 Val = MSN->getValue();
14124 IsTruncatingStore = MSN->isTruncatingStore();
14125 }
14126
14127 MVT VT = Val.getSimpleValueType();
14128 MVT IndexVT = Index.getSimpleValueType();
14129 MVT XLenVT = Subtarget.getXLenVT();
14130
14132 "Unexpected VTs!");
14133 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14134 // Targets have to explicitly opt-in for extending vector loads and
14135 // truncating vector stores.
14136 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14137
14138 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14139 // the selection of the masked intrinsics doesn't do this for us.
14140 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14141
14142 MVT ContainerVT = VT;
14143 if (VT.isFixedLengthVector()) {
14144 ContainerVT = getContainerForFixedLengthVector(VT);
14145 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14146 ContainerVT.getVectorElementCount());
14147
14148 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14149 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14150
14151 if (!IsUnmasked) {
14152 MVT MaskVT = getMaskTypeFor(ContainerVT);
14153 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14154 }
14155 }
14156
14157 if (!VL)
14158 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14159
14160 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14161 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14162 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14163 }
14164
14165 unsigned IntID =
14166 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14167 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14168 Ops.push_back(Val);
14169 Ops.push_back(BasePtr);
14170 Ops.push_back(Index);
14171 if (!IsUnmasked)
14172 Ops.push_back(Mask);
14173 Ops.push_back(VL);
14174
14176 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14177}
14178
14179SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14180 SelectionDAG &DAG) const {
14181 const MVT XLenVT = Subtarget.getXLenVT();
14182 SDLoc DL(Op);
14183 SDValue Chain = Op->getOperand(0);
14184 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14185 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14186 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14187
14188 // Encoding used for rounding mode in RISC-V differs from that used in
14189 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14190 // table, which consists of a sequence of 4-bit fields, each representing
14191 // corresponding FLT_ROUNDS mode.
14192 static const int Table =
14198
14199 SDValue Shift =
14200 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14201 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14202 DAG.getConstant(Table, DL, XLenVT), Shift);
14203 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14204 DAG.getConstant(7, DL, XLenVT));
14205
14206 return DAG.getMergeValues({Masked, Chain}, DL);
14207}
14208
14209SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14210 SelectionDAG &DAG) const {
14211 const MVT XLenVT = Subtarget.getXLenVT();
14212 SDLoc DL(Op);
14213 SDValue Chain = Op->getOperand(0);
14214 SDValue RMValue = Op->getOperand(1);
14215 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14216
14217 // Encoding used for rounding mode in RISC-V differs from that used in
14218 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
14219 // a table, which consists of a sequence of 4-bit fields, each representing
14220 // corresponding RISC-V mode.
14221 static const unsigned Table =
14227
14228 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
14229
14230 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
14231 DAG.getConstant(2, DL, XLenVT));
14232 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14233 DAG.getConstant(Table, DL, XLenVT), Shift);
14234 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14235 DAG.getConstant(0x7, DL, XLenVT));
14236 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14237 RMValue);
14238}
14239
14240SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
14241 SelectionDAG &DAG) const {
14242 const MVT XLenVT = Subtarget.getXLenVT();
14243 SDLoc DL(Op);
14244 SDValue Chain = Op->getOperand(0);
14245 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14246 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14247 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14248}
14249
14250SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
14251 SelectionDAG &DAG) const {
14252 const MVT XLenVT = Subtarget.getXLenVT();
14253 SDLoc DL(Op);
14254 SDValue Chain = Op->getOperand(0);
14255 SDValue EnvValue = Op->getOperand(1);
14256 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14257
14258 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14259 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14260 EnvValue);
14261}
14262
14263SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
14264 SelectionDAG &DAG) const {
14265 const MVT XLenVT = Subtarget.getXLenVT();
14266 SDLoc DL(Op);
14267 SDValue Chain = Op->getOperand(0);
14268 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
14269 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14270
14271 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14272 EnvValue);
14273}
14274
14277
14278SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
14279 SelectionDAG &DAG) const {
14280 const MVT XLenVT = Subtarget.getXLenVT();
14281 SDLoc DL(Op);
14282 SDValue Chain = Op->getOperand(0);
14283 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14284 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14285 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14286 Chain = Result.getValue(1);
14287 return DAG.getMergeValues({Result, Chain}, DL);
14288}
14289
14290SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
14291 SelectionDAG &DAG) const {
14292 const MVT XLenVT = Subtarget.getXLenVT();
14293 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14294 SDLoc DL(Op);
14295 SDValue Chain = Op->getOperand(0);
14296 SDValue EnvValue = Op->getOperand(1);
14297 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14298 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14299
14300 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14301 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
14302 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14303 ModeMask);
14304 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
14305 EnvValue);
14306}
14307
14308SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
14309 SelectionDAG &DAG) const {
14310 const MVT XLenVT = Subtarget.getXLenVT();
14311 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14312 SDLoc DL(Op);
14313 SDValue Chain = Op->getOperand(0);
14314 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14315 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14316
14317 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14318 ModeMask);
14319}
14320
14321SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
14322 SelectionDAG &DAG) const {
14323 MachineFunction &MF = DAG.getMachineFunction();
14324
14325 bool isRISCV64 = Subtarget.is64Bit();
14326 EVT PtrVT = getPointerTy(DAG.getDataLayout());
14327
14328 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
14329 return DAG.getFrameIndex(FI, PtrVT);
14330}
14331
14332// Returns the opcode of the target-specific SDNode that implements the 32-bit
14333// form of the given Opcode.
14334static unsigned getRISCVWOpcode(unsigned Opcode) {
14335 switch (Opcode) {
14336 default:
14337 llvm_unreachable("Unexpected opcode");
14338 case ISD::SHL:
14339 return RISCVISD::SLLW;
14340 case ISD::SRA:
14341 return RISCVISD::SRAW;
14342 case ISD::SRL:
14343 return RISCVISD::SRLW;
14344 case ISD::SDIV:
14345 return RISCVISD::DIVW;
14346 case ISD::UDIV:
14347 return RISCVISD::DIVUW;
14348 case ISD::UREM:
14349 return RISCVISD::REMUW;
14350 case ISD::ROTL:
14351 return RISCVISD::ROLW;
14352 case ISD::ROTR:
14353 return RISCVISD::RORW;
14354 }
14355}
14356
14357// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
14358// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
14359// otherwise be promoted to i64, making it difficult to select the
14360// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
14361// type i8/i16/i32 is lost.
14363 unsigned ExtOpc = ISD::ANY_EXTEND) {
14364 SDLoc DL(N);
14365 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
14366 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
14367 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
14368 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
14369 // ReplaceNodeResults requires we maintain the same type for the return value.
14370 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
14371}
14372
14373// Converts the given 32-bit operation to a i64 operation with signed extension
14374// semantic to reduce the signed extension instructions.
14376 SDLoc DL(N);
14377 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14378 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14379 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
14380 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14381 DAG.getValueType(MVT::i32));
14382 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
14383}
14384
14387 SelectionDAG &DAG) const {
14388 SDLoc DL(N);
14389 switch (N->getOpcode()) {
14390 default:
14391 llvm_unreachable("Don't know how to custom type legalize this operation!");
14394 case ISD::FP_TO_SINT:
14395 case ISD::FP_TO_UINT: {
14396 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14397 "Unexpected custom legalisation");
14398 bool IsStrict = N->isStrictFPOpcode();
14399 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
14400 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14401 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
14402 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14404 if (!isTypeLegal(Op0.getValueType()))
14405 return;
14406 if (IsStrict) {
14407 SDValue Chain = N->getOperand(0);
14408 // In absence of Zfh, promote f16 to f32, then convert.
14409 if (Op0.getValueType() == MVT::f16 &&
14410 !Subtarget.hasStdExtZfhOrZhinx()) {
14411 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
14412 {Chain, Op0});
14413 Chain = Op0.getValue(1);
14414 }
14415 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14416 : RISCVISD::STRICT_FCVT_WU_RV64;
14417 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14418 SDValue Res = DAG.getNode(
14419 Opc, DL, VTs, Chain, Op0,
14420 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14421 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14422 Results.push_back(Res.getValue(1));
14423 return;
14424 }
14425 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14426 // convert.
14427 if ((Op0.getValueType() == MVT::f16 &&
14428 !Subtarget.hasStdExtZfhOrZhinx()) ||
14429 Op0.getValueType() == MVT::bf16)
14430 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14431
14432 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14433 SDValue Res =
14434 DAG.getNode(Opc, DL, MVT::i64, Op0,
14435 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14436 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14437 return;
14438 }
14439 // If the FP type needs to be softened, emit a library call using the 'si'
14440 // version. If we left it to default legalization we'd end up with 'di'. If
14441 // the FP type doesn't need to be softened just let generic type
14442 // legalization promote the result type.
14443 RTLIB::Libcall LC;
14444 if (IsSigned)
14445 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
14446 else
14447 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
14448 MakeLibCallOptions CallOptions;
14449 EVT OpVT = Op0.getValueType();
14450 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
14451 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
14452 SDValue Result;
14453 std::tie(Result, Chain) =
14454 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
14455 Results.push_back(Result);
14456 if (IsStrict)
14457 Results.push_back(Chain);
14458 break;
14459 }
14460 case ISD::LROUND: {
14461 SDValue Op0 = N->getOperand(0);
14462 EVT Op0VT = Op0.getValueType();
14463 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14465 if (!isTypeLegal(Op0VT))
14466 return;
14467
14468 // In absence of Zfh, promote f16 to f32, then convert.
14469 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14470 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14471
14472 SDValue Res =
14473 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
14474 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
14475 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14476 return;
14477 }
14478 // If the FP type needs to be softened, emit a library call to lround. We'll
14479 // need to truncate the result. We assume any value that doesn't fit in i32
14480 // is allowed to return an unspecified value.
14481 RTLIB::Libcall LC =
14482 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14483 MakeLibCallOptions CallOptions;
14484 EVT OpVT = Op0.getValueType();
14485 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
14486 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
14487 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
14488 Results.push_back(Result);
14489 break;
14490 }
14491 case ISD::READCYCLECOUNTER:
14492 case ISD::READSTEADYCOUNTER: {
14493 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14494 "has custom type legalization on riscv32");
14495
14496 SDValue LoCounter, HiCounter;
14497 MVT XLenVT = Subtarget.getXLenVT();
14498 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14499 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
14500 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
14501 } else {
14502 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
14503 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
14504 }
14505 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
14506 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
14507 N->getOperand(0), LoCounter, HiCounter);
14508
14509 Results.push_back(
14510 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
14511 Results.push_back(RCW.getValue(2));
14512 break;
14513 }
14514 case ISD::LOAD: {
14515 if (!ISD::isNON_EXTLoad(N))
14516 return;
14517
14518 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14519 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
14521
14522 if (N->getValueType(0) == MVT::i64) {
14523 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14524 "Unexpected custom legalisation");
14525
14526 if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
14527 return;
14528
14529 SDLoc DL(N);
14530 SDValue Result = DAG.getMemIntrinsicNode(
14531 RISCVISD::LD_RV32, DL,
14532 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
14533 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
14534 SDValue Lo = Result.getValue(0);
14535 SDValue Hi = Result.getValue(1);
14536 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
14537 Results.append({Pair, Result.getValue(2)});
14538 return;
14539 }
14540
14541 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14542 "Unexpected custom legalisation");
14543
14544 SDLoc dl(N);
14545 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
14546 Ld->getBasePtr(), Ld->getMemoryVT(),
14547 Ld->getMemOperand());
14548 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
14549 Results.push_back(Res.getValue(1));
14550 return;
14551 }
14552 case ISD::MUL: {
14553 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
14554 unsigned XLen = Subtarget.getXLen();
14555 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
14556 if (Size > XLen) {
14557 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
14558 SDValue LHS = N->getOperand(0);
14559 SDValue RHS = N->getOperand(1);
14560 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
14561
14562 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
14563 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
14564 // We need exactly one side to be unsigned.
14565 if (LHSIsU == RHSIsU)
14566 return;
14567
14568 auto MakeMULPair = [&](SDValue S, SDValue U) {
14569 MVT XLenVT = Subtarget.getXLenVT();
14570 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
14571 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
14572 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
14573 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
14574 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
14575 };
14576
14577 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
14578 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
14579
14580 // The other operand should be signed, but still prefer MULH when
14581 // possible.
14582 if (RHSIsU && LHSIsS && !RHSIsS)
14583 Results.push_back(MakeMULPair(LHS, RHS));
14584 else if (LHSIsU && RHSIsS && !LHSIsS)
14585 Results.push_back(MakeMULPair(RHS, LHS));
14586
14587 return;
14588 }
14589 [[fallthrough]];
14590 }
14591 case ISD::ADD:
14592 case ISD::SUB:
14593 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14594 "Unexpected custom legalisation");
14595 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
14596 break;
14597 case ISD::SHL:
14598 case ISD::SRA:
14599 case ISD::SRL:
14600 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14601 "Unexpected custom legalisation");
14602 if (N->getOperand(1).getOpcode() != ISD::Constant) {
14603 // If we can use a BSET instruction, allow default promotion to apply.
14604 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
14605 isOneConstant(N->getOperand(0)))
14606 break;
14607 Results.push_back(customLegalizeToWOp(N, DAG));
14608 break;
14609 }
14610
14611 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
14612 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
14613 // shift amount.
14614 if (N->getOpcode() == ISD::SHL) {
14615 SDLoc DL(N);
14616 SDValue NewOp0 =
14617 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14618 SDValue NewOp1 =
14619 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
14620 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
14621 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14622 DAG.getValueType(MVT::i32));
14623 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14624 }
14625
14626 break;
14627 case ISD::ROTL:
14628 case ISD::ROTR:
14629 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14630 "Unexpected custom legalisation");
14631 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
14632 Subtarget.hasVendorXTHeadBb()) &&
14633 "Unexpected custom legalization");
14634 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
14635 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
14636 return;
14637 Results.push_back(customLegalizeToWOp(N, DAG));
14638 break;
14639 case ISD::CTTZ:
14641 case ISD::CTLZ:
14642 case ISD::CTLZ_ZERO_UNDEF: {
14643 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14644 "Unexpected custom legalisation");
14645
14646 SDValue NewOp0 =
14647 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14648 bool IsCTZ =
14649 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
14650 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
14651 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
14652 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14653 return;
14654 }
14655 case ISD::SDIV:
14656 case ISD::UDIV:
14657 case ISD::UREM: {
14658 MVT VT = N->getSimpleValueType(0);
14659 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
14660 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
14661 "Unexpected custom legalisation");
14662 // Don't promote division/remainder by constant since we should expand those
14663 // to multiply by magic constant.
14664 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
14665 if (N->getOperand(1).getOpcode() == ISD::Constant &&
14666 !isIntDivCheap(N->getValueType(0), Attr))
14667 return;
14668
14669 // If the input is i32, use ANY_EXTEND since the W instructions don't read
14670 // the upper 32 bits. For other types we need to sign or zero extend
14671 // based on the opcode.
14672 unsigned ExtOpc = ISD::ANY_EXTEND;
14673 if (VT != MVT::i32)
14674 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
14676
14677 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
14678 break;
14679 }
14680 case ISD::SADDO: {
14681 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14682 "Unexpected custom legalisation");
14683
14684 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
14685 // use the default legalization.
14686 if (!isa<ConstantSDNode>(N->getOperand(1)))
14687 return;
14688
14689 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14690 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
14691 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
14692 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14693 DAG.getValueType(MVT::i32));
14694
14695 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14696
14697 // For an addition, the result should be less than one of the operands (LHS)
14698 // if and only if the other operand (RHS) is negative, otherwise there will
14699 // be overflow.
14700 // For a subtraction, the result should be less than one of the operands
14701 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
14702 // otherwise there will be overflow.
14703 EVT OType = N->getValueType(1);
14704 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
14705 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
14706
14707 SDValue Overflow =
14708 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
14709 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14710 Results.push_back(Overflow);
14711 return;
14712 }
14713 case ISD::UADDO:
14714 case ISD::USUBO: {
14715 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14716 "Unexpected custom legalisation");
14717 bool IsAdd = N->getOpcode() == ISD::UADDO;
14718 // Create an ADDW or SUBW.
14719 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14720 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14721 SDValue Res =
14722 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
14723 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14724 DAG.getValueType(MVT::i32));
14725
14726 SDValue Overflow;
14727 if (IsAdd && isOneConstant(RHS)) {
14728 // Special case uaddo X, 1 overflowed if the addition result is 0.
14729 // The general case (X + C) < C is not necessarily beneficial. Although we
14730 // reduce the live range of X, we may introduce the materialization of
14731 // constant C, especially when the setcc result is used by branch. We have
14732 // no compare with constant and branch instructions.
14733 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
14734 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
14735 } else if (IsAdd && isAllOnesConstant(RHS)) {
14736 // Special case uaddo X, -1 overflowed if X != 0.
14737 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
14738 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
14739 } else {
14740 // Sign extend the LHS and perform an unsigned compare with the ADDW
14741 // result. Since the inputs are sign extended from i32, this is equivalent
14742 // to comparing the lower 32 bits.
14743 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14744 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
14745 IsAdd ? ISD::SETULT : ISD::SETUGT);
14746 }
14747
14748 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14749 Results.push_back(Overflow);
14750 return;
14751 }
14752 case ISD::UADDSAT:
14753 case ISD::USUBSAT: {
14754 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14755 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
14756 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
14757 // promotion for UADDO/USUBO.
14758 Results.push_back(expandAddSubSat(N, DAG));
14759 return;
14760 }
14761 case ISD::SADDSAT:
14762 case ISD::SSUBSAT: {
14763 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14764 "Unexpected custom legalisation");
14765 Results.push_back(expandAddSubSat(N, DAG));
14766 return;
14767 }
14768 case ISD::ABS: {
14769 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14770 "Unexpected custom legalisation");
14771
14772 if (Subtarget.hasStdExtZbb()) {
14773 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
14774 // This allows us to remember that the result is sign extended. Expanding
14775 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
14776 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
14777 N->getOperand(0));
14778 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
14779 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
14780 return;
14781 }
14782
14783 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
14784 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14785
14786 // Freeze the source so we can increase it's use count.
14787 Src = DAG.getFreeze(Src);
14788
14789 // Copy sign bit to all bits using the sraiw pattern.
14790 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
14791 DAG.getValueType(MVT::i32));
14792 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
14793 DAG.getConstant(31, DL, MVT::i64));
14794
14795 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
14796 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
14797
14798 // NOTE: The result is only required to be anyextended, but sext is
14799 // consistent with type legalization of sub.
14800 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
14801 DAG.getValueType(MVT::i32));
14802 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14803 return;
14804 }
14805 case ISD::BITCAST: {
14806 EVT VT = N->getValueType(0);
14807 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
14808 SDValue Op0 = N->getOperand(0);
14809 EVT Op0VT = Op0.getValueType();
14810 MVT XLenVT = Subtarget.getXLenVT();
14811 if (VT == MVT::i16 &&
14812 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
14813 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
14814 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
14815 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
14816 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
14817 Subtarget.hasStdExtFOrZfinx()) {
14818 SDValue FPConv =
14819 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
14820 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
14821 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
14822 Subtarget.hasStdExtDOrZdinx()) {
14823 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
14824 DAG.getVTList(MVT::i32, MVT::i32), Op0);
14825 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
14826 NewReg.getValue(0), NewReg.getValue(1));
14827 Results.push_back(RetReg);
14828 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
14829 isTypeLegal(Op0VT)) {
14830 // Custom-legalize bitcasts from fixed-length vector types to illegal
14831 // scalar types in order to improve codegen. Bitcast the vector to a
14832 // one-element vector type whose element type is the same as the result
14833 // type, and extract the first element.
14834 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
14835 if (isTypeLegal(BVT)) {
14836 SDValue BVec = DAG.getBitcast(BVT, Op0);
14837 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
14838 }
14839 }
14840 break;
14841 }
14842 case ISD::BITREVERSE: {
14843 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
14844 "Unexpected custom legalisation");
14845 MVT XLenVT = Subtarget.getXLenVT();
14846 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14847 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
14848 // ReplaceNodeResults requires we maintain the same type for the return
14849 // value.
14850 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
14851 break;
14852 }
14853 case RISCVISD::BREV8:
14854 case RISCVISD::ORC_B: {
14855 MVT VT = N->getSimpleValueType(0);
14856 MVT XLenVT = Subtarget.getXLenVT();
14857 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
14858 "Unexpected custom legalisation");
14859 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
14860 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
14861 "Unexpected extension");
14862 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14863 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
14864 // ReplaceNodeResults requires we maintain the same type for the return
14865 // value.
14866 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
14867 break;
14868 }
14870 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
14871 // type is illegal (currently only vXi64 RV32).
14872 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
14873 // transferred to the destination register. We issue two of these from the
14874 // upper- and lower- halves of the SEW-bit vector element, slid down to the
14875 // first element.
14876 SDValue Vec = N->getOperand(0);
14877 SDValue Idx = N->getOperand(1);
14878
14879 // The vector type hasn't been legalized yet so we can't issue target
14880 // specific nodes if it needs legalization.
14881 // FIXME: We would manually legalize if it's important.
14882 if (!isTypeLegal(Vec.getValueType()))
14883 return;
14884
14885 MVT VecVT = Vec.getSimpleValueType();
14886
14887 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
14888 VecVT.getVectorElementType() == MVT::i64 &&
14889 "Unexpected EXTRACT_VECTOR_ELT legalization");
14890
14891 // If this is a fixed vector, we need to convert it to a scalable vector.
14892 MVT ContainerVT = VecVT;
14893 if (VecVT.isFixedLengthVector()) {
14894 ContainerVT = getContainerForFixedLengthVector(VecVT);
14895 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
14896 }
14897
14898 MVT XLenVT = Subtarget.getXLenVT();
14899
14900 // Use a VL of 1 to avoid processing more elements than we need.
14901 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
14902
14903 // Unless the index is known to be 0, we must slide the vector down to get
14904 // the desired element into index 0.
14905 if (!isNullConstant(Idx)) {
14906 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
14907 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
14908 }
14909
14910 // Extract the lower XLEN bits of the correct vector element.
14911 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
14912
14913 // To extract the upper XLEN bits of the vector element, shift the first
14914 // element right by 32 bits and re-extract the lower XLEN bits.
14915 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14916 DAG.getUNDEF(ContainerVT),
14917 DAG.getConstant(32, DL, XLenVT), VL);
14918 SDValue LShr32 =
14919 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
14920 DAG.getUNDEF(ContainerVT), Mask, VL);
14921
14922 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
14923
14924 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
14925 break;
14926 }
14928 unsigned IntNo = N->getConstantOperandVal(0);
14929 switch (IntNo) {
14930 default:
14932 "Don't know how to custom type legalize this intrinsic!");
14933 case Intrinsic::experimental_get_vector_length: {
14934 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
14935 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14936 return;
14937 }
14938 case Intrinsic::experimental_cttz_elts: {
14939 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
14940 Results.push_back(
14941 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
14942 return;
14943 }
14944 case Intrinsic::riscv_orc_b:
14945 case Intrinsic::riscv_brev8:
14946 case Intrinsic::riscv_sha256sig0:
14947 case Intrinsic::riscv_sha256sig1:
14948 case Intrinsic::riscv_sha256sum0:
14949 case Intrinsic::riscv_sha256sum1:
14950 case Intrinsic::riscv_sm3p0:
14951 case Intrinsic::riscv_sm3p1: {
14952 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14953 return;
14954 unsigned Opc;
14955 switch (IntNo) {
14956 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
14957 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
14958 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
14959 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
14960 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
14961 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
14962 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
14963 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
14964 }
14965
14966 SDValue NewOp =
14967 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14968 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
14969 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14970 return;
14971 }
14972 case Intrinsic::riscv_sm4ks:
14973 case Intrinsic::riscv_sm4ed: {
14974 unsigned Opc =
14975 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
14976 SDValue NewOp0 =
14977 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14978 SDValue NewOp1 =
14979 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
14980 SDValue Res =
14981 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
14982 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14983 return;
14984 }
14985 case Intrinsic::riscv_mopr: {
14986 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14987 return;
14988 SDValue NewOp =
14989 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14990 SDValue Res = DAG.getNode(
14991 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
14992 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
14993 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14994 return;
14995 }
14996 case Intrinsic::riscv_moprr: {
14997 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14998 return;
14999 SDValue NewOp0 =
15000 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15001 SDValue NewOp1 =
15002 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15003 SDValue Res = DAG.getNode(
15004 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
15005 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
15006 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15007 return;
15008 }
15009 case Intrinsic::riscv_clmul: {
15010 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15011 return;
15012
15013 SDValue NewOp0 =
15014 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15015 SDValue NewOp1 =
15016 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15017 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
15018 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15019 return;
15020 }
15021 case Intrinsic::riscv_clmulh:
15022 case Intrinsic::riscv_clmulr: {
15023 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15024 return;
15025
15026 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
15027 // to the full 128-bit clmul result of multiplying two xlen values.
15028 // Perform clmulr or clmulh on the shifted values. Finally, extract the
15029 // upper 32 bits.
15030 //
15031 // The alternative is to mask the inputs to 32 bits and use clmul, but
15032 // that requires two shifts to mask each input without zext.w.
15033 // FIXME: If the inputs are known zero extended or could be freely
15034 // zero extended, the mask form would be better.
15035 SDValue NewOp0 =
15036 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15037 SDValue NewOp1 =
15038 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15039 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
15040 DAG.getConstant(32, DL, MVT::i64));
15041 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
15042 DAG.getConstant(32, DL, MVT::i64));
15043 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
15044 : RISCVISD::CLMULR;
15045 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
15046 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
15047 DAG.getConstant(32, DL, MVT::i64));
15048 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15049 return;
15050 }
15051 case Intrinsic::riscv_vmv_x_s: {
15052 EVT VT = N->getValueType(0);
15053 MVT XLenVT = Subtarget.getXLenVT();
15054 if (VT.bitsLT(XLenVT)) {
15055 // Simple case just extract using vmv.x.s and truncate.
15056 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
15057 Subtarget.getXLenVT(), N->getOperand(1));
15058 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
15059 return;
15060 }
15061
15062 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
15063 "Unexpected custom legalization");
15064
15065 // We need to do the move in two steps.
15066 SDValue Vec = N->getOperand(1);
15067 MVT VecVT = Vec.getSimpleValueType();
15068
15069 // First extract the lower XLEN bits of the element.
15070 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15071
15072 // To extract the upper XLEN bits of the vector element, shift the first
15073 // element right by 32 bits and re-extract the lower XLEN bits.
15074 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
15075
15076 SDValue ThirtyTwoV =
15077 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
15078 DAG.getConstant(32, DL, XLenVT), VL);
15079 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
15080 DAG.getUNDEF(VecVT), Mask, VL);
15081 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15082
15083 Results.push_back(
15084 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15085 break;
15086 }
15087 }
15088 break;
15089 }
15090 case ISD::VECREDUCE_ADD:
15091 case ISD::VECREDUCE_AND:
15092 case ISD::VECREDUCE_OR:
15093 case ISD::VECREDUCE_XOR:
15094 case ISD::VECREDUCE_SMAX:
15095 case ISD::VECREDUCE_UMAX:
15096 case ISD::VECREDUCE_SMIN:
15097 case ISD::VECREDUCE_UMIN:
15098 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
15099 Results.push_back(V);
15100 break;
15101 case ISD::VP_REDUCE_ADD:
15102 case ISD::VP_REDUCE_AND:
15103 case ISD::VP_REDUCE_OR:
15104 case ISD::VP_REDUCE_XOR:
15105 case ISD::VP_REDUCE_SMAX:
15106 case ISD::VP_REDUCE_UMAX:
15107 case ISD::VP_REDUCE_SMIN:
15108 case ISD::VP_REDUCE_UMIN:
15109 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
15110 Results.push_back(V);
15111 break;
15112 case ISD::GET_ROUNDING: {
15113 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
15114 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
15115 Results.push_back(Res.getValue(0));
15116 Results.push_back(Res.getValue(1));
15117 break;
15118 }
15119 }
15120}
15121
15122/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
15123/// which corresponds to it.
15124static unsigned getVecReduceOpcode(unsigned Opc) {
15125 switch (Opc) {
15126 default:
15127 llvm_unreachable("Unhandled binary to transform reduction");
15128 case ISD::ADD:
15129 return ISD::VECREDUCE_ADD;
15130 case ISD::UMAX:
15131 return ISD::VECREDUCE_UMAX;
15132 case ISD::SMAX:
15133 return ISD::VECREDUCE_SMAX;
15134 case ISD::UMIN:
15135 return ISD::VECREDUCE_UMIN;
15136 case ISD::SMIN:
15137 return ISD::VECREDUCE_SMIN;
15138 case ISD::AND:
15139 return ISD::VECREDUCE_AND;
15140 case ISD::OR:
15141 return ISD::VECREDUCE_OR;
15142 case ISD::XOR:
15143 return ISD::VECREDUCE_XOR;
15144 case ISD::FADD:
15145 // Note: This is the associative form of the generic reduction opcode.
15146 return ISD::VECREDUCE_FADD;
15147 case ISD::FMAXNUM:
15148 return ISD::VECREDUCE_FMAX;
15149 case ISD::FMINNUM:
15150 return ISD::VECREDUCE_FMIN;
15151 }
15152}
15153
15154/// Perform two related transforms whose purpose is to incrementally recognize
15155/// an explode_vector followed by scalar reduction as a vector reduction node.
15156/// This exists to recover from a deficiency in SLP which can't handle
15157/// forests with multiple roots sharing common nodes. In some cases, one
15158/// of the trees will be vectorized, and the other will remain (unprofitably)
15159/// scalarized.
15160static SDValue
15162 const RISCVSubtarget &Subtarget) {
15163
15164 // This transforms need to run before all integer types have been legalized
15165 // to i64 (so that the vector element type matches the add type), and while
15166 // it's safe to introduce odd sized vector types.
15168 return SDValue();
15169
15170 // Without V, this transform isn't useful. We could form the (illegal)
15171 // operations and let them be scalarized again, but there's really no point.
15172 if (!Subtarget.hasVInstructions())
15173 return SDValue();
15174
15175 const SDLoc DL(N);
15176 const EVT VT = N->getValueType(0);
15177 const unsigned Opc = N->getOpcode();
15178
15179 if (!VT.isInteger()) {
15180 switch (Opc) {
15181 default:
15182 return SDValue();
15183 case ISD::FADD:
15184 // For FADD, we only handle the case with reassociation allowed. We
15185 // could handle strict reduction order, but at the moment, there's no
15186 // known reason to, and the complexity isn't worth it.
15187 if (!N->getFlags().hasAllowReassociation())
15188 return SDValue();
15189 break;
15190 case ISD::FMAXNUM:
15191 case ISD::FMINNUM:
15192 break;
15193 }
15194 }
15195
15196 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
15197 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
15198 "Inconsistent mappings");
15199 SDValue LHS = N->getOperand(0);
15200 SDValue RHS = N->getOperand(1);
15201
15202 if (!LHS.hasOneUse() || !RHS.hasOneUse())
15203 return SDValue();
15204
15205 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15206 std::swap(LHS, RHS);
15207
15208 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15209 !isa<ConstantSDNode>(RHS.getOperand(1)))
15210 return SDValue();
15211
15212 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
15213 SDValue SrcVec = RHS.getOperand(0);
15214 EVT SrcVecVT = SrcVec.getValueType();
15215 assert(SrcVecVT.getVectorElementType() == VT);
15216 if (SrcVecVT.isScalableVector())
15217 return SDValue();
15218
15219 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
15220 return SDValue();
15221
15222 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
15223 // reduce_op (extract_subvector [2 x VT] from V). This will form the
15224 // root of our reduction tree. TODO: We could extend this to any two
15225 // adjacent aligned constant indices if desired.
15226 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15227 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
15228 uint64_t LHSIdx =
15229 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
15230 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
15231 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
15232 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15233 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
15234 }
15235 }
15236
15237 // Match (binop (reduce (extract_subvector V, 0),
15238 // (extract_vector_elt V, sizeof(SubVec))))
15239 // into a reduction of one more element from the original vector V.
15240 if (LHS.getOpcode() != ReduceOpc)
15241 return SDValue();
15242
15243 SDValue ReduceVec = LHS.getOperand(0);
15244 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15245 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
15246 isNullConstant(ReduceVec.getOperand(1)) &&
15247 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
15248 // For illegal types (e.g. 3xi32), most will be combined again into a
15249 // wider (hopefully legal) type. If this is a terminal state, we are
15250 // relying on type legalization here to produce something reasonable
15251 // and this lowering quality could probably be improved. (TODO)
15252 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
15253 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15254 return DAG.getNode(ReduceOpc, DL, VT, Vec,
15255 ReduceVec->getFlags() & N->getFlags());
15256 }
15257
15258 return SDValue();
15259}
15260
15261
15262// Try to fold (<bop> x, (reduction.<bop> vec, start))
15264 const RISCVSubtarget &Subtarget) {
15265 auto BinOpToRVVReduce = [](unsigned Opc) {
15266 switch (Opc) {
15267 default:
15268 llvm_unreachable("Unhandled binary to transform reduction");
15269 case ISD::ADD:
15270 return RISCVISD::VECREDUCE_ADD_VL;
15271 case ISD::UMAX:
15272 return RISCVISD::VECREDUCE_UMAX_VL;
15273 case ISD::SMAX:
15274 return RISCVISD::VECREDUCE_SMAX_VL;
15275 case ISD::UMIN:
15276 return RISCVISD::VECREDUCE_UMIN_VL;
15277 case ISD::SMIN:
15278 return RISCVISD::VECREDUCE_SMIN_VL;
15279 case ISD::AND:
15280 return RISCVISD::VECREDUCE_AND_VL;
15281 case ISD::OR:
15282 return RISCVISD::VECREDUCE_OR_VL;
15283 case ISD::XOR:
15284 return RISCVISD::VECREDUCE_XOR_VL;
15285 case ISD::FADD:
15286 return RISCVISD::VECREDUCE_FADD_VL;
15287 case ISD::FMAXNUM:
15288 return RISCVISD::VECREDUCE_FMAX_VL;
15289 case ISD::FMINNUM:
15290 return RISCVISD::VECREDUCE_FMIN_VL;
15291 }
15292 };
15293
15294 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
15295 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15296 isNullConstant(V.getOperand(1)) &&
15297 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
15298 };
15299
15300 unsigned Opc = N->getOpcode();
15301 unsigned ReduceIdx;
15302 if (IsReduction(N->getOperand(0), Opc))
15303 ReduceIdx = 0;
15304 else if (IsReduction(N->getOperand(1), Opc))
15305 ReduceIdx = 1;
15306 else
15307 return SDValue();
15308
15309 // Skip if FADD disallows reassociation but the combiner needs.
15310 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
15311 return SDValue();
15312
15313 SDValue Extract = N->getOperand(ReduceIdx);
15314 SDValue Reduce = Extract.getOperand(0);
15315 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
15316 return SDValue();
15317
15318 SDValue ScalarV = Reduce.getOperand(2);
15319 EVT ScalarVT = ScalarV.getValueType();
15320 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
15321 ScalarV.getOperand(0)->isUndef() &&
15322 isNullConstant(ScalarV.getOperand(2)))
15323 ScalarV = ScalarV.getOperand(1);
15324
15325 // Make sure that ScalarV is a splat with VL=1.
15326 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
15327 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
15328 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
15329 return SDValue();
15330
15331 if (!isNonZeroAVL(ScalarV.getOperand(2)))
15332 return SDValue();
15333
15334 // Check the scalar of ScalarV is neutral element
15335 // TODO: Deal with value other than neutral element.
15336 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
15337 0))
15338 return SDValue();
15339
15340 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
15341 // FIXME: We might be able to improve this if operand 0 is undef.
15342 if (!isNonZeroAVL(Reduce.getOperand(5)))
15343 return SDValue();
15344
15345 SDValue NewStart = N->getOperand(1 - ReduceIdx);
15346
15347 SDLoc DL(N);
15348 SDValue NewScalarV =
15349 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
15350 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
15351
15352 // If we looked through an INSERT_SUBVECTOR we need to restore it.
15353 if (ScalarVT != ScalarV.getValueType())
15354 NewScalarV =
15355 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
15356
15357 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
15358 NewScalarV, Reduce.getOperand(3),
15359 Reduce.getOperand(4), Reduce.getOperand(5)};
15360 SDValue NewReduce =
15361 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
15362 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
15363 Extract.getOperand(1));
15364}
15365
15366// Optimize (add (shl x, c0), (shl y, c1)) ->
15367// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
15368// or
15369// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
15371 const RISCVSubtarget &Subtarget) {
15372 // Perform this optimization only in the zba/xandesperf/xqciac/xtheadba
15373 // extension.
15374 if (!Subtarget.hasShlAdd(3))
15375 return SDValue();
15376
15377 // Skip for vector types and larger types.
15378 EVT VT = N->getValueType(0);
15379 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15380 return SDValue();
15381
15382 // The two operand nodes must be SHL and have no other use.
15383 SDValue N0 = N->getOperand(0);
15384 SDValue N1 = N->getOperand(1);
15385 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
15386 !N0->hasOneUse() || !N1->hasOneUse())
15387 return SDValue();
15388
15389 // Check c0 and c1.
15390 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15391 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
15392 if (!N0C || !N1C)
15393 return SDValue();
15394 int64_t C0 = N0C->getSExtValue();
15395 int64_t C1 = N1C->getSExtValue();
15396 if (C0 <= 0 || C1 <= 0)
15397 return SDValue();
15398
15399 int64_t Diff = std::abs(C0 - C1);
15400 if (!Subtarget.hasShlAdd(Diff))
15401 return SDValue();
15402
15403 // Build nodes.
15404 SDLoc DL(N);
15405 int64_t Bits = std::min(C0, C1);
15406 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
15407 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
15408 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
15409 DAG.getConstant(Diff, DL, VT), NS);
15410 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
15411}
15412
15413// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15414// or 3.
15416 SelectionDAG &DAG) {
15417 using namespace llvm::SDPatternMatch;
15418
15419 // Looking for a reg-reg add and not an addi.
15420 if (isa<ConstantSDNode>(N->getOperand(1)))
15421 return SDValue();
15422
15423 // Based on testing it seems that performance degrades if the ADDI has
15424 // more than 2 uses.
15425 if (AddI->use_size() > 2)
15426 return SDValue();
15427
15428 APInt AddVal;
15429 SDValue SHLVal;
15430 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
15431 return SDValue();
15432
15433 APInt VShift;
15434 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
15435 return SDValue();
15436
15437 if (VShift.slt(1) || VShift.sgt(3))
15438 return SDValue();
15439
15440 SDLoc DL(N);
15441 EVT VT = N->getValueType(0);
15442 // The shift must be positive but the add can be signed.
15443 uint64_t ShlConst = VShift.getZExtValue();
15444 int64_t AddConst = AddVal.getSExtValue();
15445
15446 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
15447 DAG.getConstant(ShlConst, DL, VT), Other);
15448 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
15449 DAG.getSignedConstant(AddConst, DL, VT));
15450}
15451
15452// Optimize (add (add (shl x, c0), c1), y) ->
15453// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
15455 const RISCVSubtarget &Subtarget) {
15456 // Perform this optimization only in the zba extension.
15457 if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))
15458 return SDValue();
15459
15460 // Skip for vector types and larger types.
15461 EVT VT = N->getValueType(0);
15462 if (VT != Subtarget.getXLenVT())
15463 return SDValue();
15464
15465 SDValue AddI = N->getOperand(0);
15466 SDValue Other = N->getOperand(1);
15467 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15468 return V;
15469 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
15470 return V;
15471 return SDValue();
15472}
15473
15474// Combine a constant select operand into its use:
15475//
15476// (and (select cond, -1, c), x)
15477// -> (select cond, x, (and x, c)) [AllOnes=1]
15478// (or (select cond, 0, c), x)
15479// -> (select cond, x, (or x, c)) [AllOnes=0]
15480// (xor (select cond, 0, c), x)
15481// -> (select cond, x, (xor x, c)) [AllOnes=0]
15482// (add (select cond, 0, c), x)
15483// -> (select cond, x, (add x, c)) [AllOnes=0]
15484// (sub x, (select cond, 0, c))
15485// -> (select cond, x, (sub x, c)) [AllOnes=0]
15487 SelectionDAG &DAG, bool AllOnes,
15488 const RISCVSubtarget &Subtarget) {
15489 EVT VT = N->getValueType(0);
15490
15491 // Skip vectors.
15492 if (VT.isVector())
15493 return SDValue();
15494
15495 if (!Subtarget.hasConditionalMoveFusion()) {
15496 // (select cond, x, (and x, c)) has custom lowering with Zicond.
15497 if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
15498 return SDValue();
15499
15500 // Maybe harmful when condition code has multiple use.
15501 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
15502 return SDValue();
15503
15504 // Maybe harmful when VT is wider than XLen.
15505 if (VT.getSizeInBits() > Subtarget.getXLen())
15506 return SDValue();
15507 }
15508
15509 if ((Slct.getOpcode() != ISD::SELECT &&
15510 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
15511 !Slct.hasOneUse())
15512 return SDValue();
15513
15514 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
15516 };
15517
15518 bool SwapSelectOps;
15519 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
15520 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
15521 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
15522 SDValue NonConstantVal;
15523 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
15524 SwapSelectOps = false;
15525 NonConstantVal = FalseVal;
15526 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
15527 SwapSelectOps = true;
15528 NonConstantVal = TrueVal;
15529 } else
15530 return SDValue();
15531
15532 // Slct is now know to be the desired identity constant when CC is true.
15533 TrueVal = OtherOp;
15534 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
15535 // Unless SwapSelectOps says the condition should be false.
15536 if (SwapSelectOps)
15537 std::swap(TrueVal, FalseVal);
15538
15539 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
15540 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
15541 {Slct.getOperand(0), Slct.getOperand(1),
15542 Slct.getOperand(2), TrueVal, FalseVal});
15543
15544 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
15545 {Slct.getOperand(0), TrueVal, FalseVal});
15546}
15547
15548// Attempt combineSelectAndUse on each operand of a commutative operator N.
15550 bool AllOnes,
15551 const RISCVSubtarget &Subtarget) {
15552 SDValue N0 = N->getOperand(0);
15553 SDValue N1 = N->getOperand(1);
15554 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
15555 return Result;
15556 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
15557 return Result;
15558 return SDValue();
15559}
15560
15561// Transform (add (mul x, c0), c1) ->
15562// (add (mul (add x, c1/c0), c0), c1%c0).
15563// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
15564// that should be excluded is when c0*(c1/c0) is simm12, which will lead
15565// to an infinite loop in DAGCombine if transformed.
15566// Or transform (add (mul x, c0), c1) ->
15567// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
15568// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
15569// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
15570// lead to an infinite loop in DAGCombine if transformed.
15571// Or transform (add (mul x, c0), c1) ->
15572// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
15573// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
15574// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
15575// lead to an infinite loop in DAGCombine if transformed.
15576// Or transform (add (mul x, c0), c1) ->
15577// (mul (add x, c1/c0), c0).
15578// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
15580 const RISCVSubtarget &Subtarget) {
15581 // Skip for vector types and larger types.
15582 EVT VT = N->getValueType(0);
15583 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15584 return SDValue();
15585 // The first operand node must be a MUL and has no other use.
15586 SDValue N0 = N->getOperand(0);
15587 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
15588 return SDValue();
15589 // Check if c0 and c1 match above conditions.
15590 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15591 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15592 if (!N0C || !N1C)
15593 return SDValue();
15594 // If N0C has multiple uses it's possible one of the cases in
15595 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
15596 // in an infinite loop.
15597 if (!N0C->hasOneUse())
15598 return SDValue();
15599 int64_t C0 = N0C->getSExtValue();
15600 int64_t C1 = N1C->getSExtValue();
15601 int64_t CA, CB;
15602 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
15603 return SDValue();
15604 // Search for proper CA (non-zero) and CB that both are simm12.
15605 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
15606 !isInt<12>(C0 * (C1 / C0))) {
15607 CA = C1 / C0;
15608 CB = C1 % C0;
15609 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
15610 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
15611 CA = C1 / C0 + 1;
15612 CB = C1 % C0 - C0;
15613 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
15614 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
15615 CA = C1 / C0 - 1;
15616 CB = C1 % C0 + C0;
15617 } else
15618 return SDValue();
15619 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
15620 SDLoc DL(N);
15621 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
15622 DAG.getSignedConstant(CA, DL, VT));
15623 SDValue New1 =
15624 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
15625 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
15626}
15627
15628// add (zext, zext) -> zext (add (zext, zext))
15629// sub (zext, zext) -> sext (sub (zext, zext))
15630// mul (zext, zext) -> zext (mul (zext, zext))
15631// sdiv (zext, zext) -> zext (sdiv (zext, zext))
15632// udiv (zext, zext) -> zext (udiv (zext, zext))
15633// srem (zext, zext) -> zext (srem (zext, zext))
15634// urem (zext, zext) -> zext (urem (zext, zext))
15635//
15636// where the sum of the extend widths match, and the the range of the bin op
15637// fits inside the width of the narrower bin op. (For profitability on rvv, we
15638// use a power of two for both inner and outer extend.)
15640
15641 EVT VT = N->getValueType(0);
15642 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
15643 return SDValue();
15644
15645 SDValue N0 = N->getOperand(0);
15646 SDValue N1 = N->getOperand(1);
15648 return SDValue();
15649 if (!N0.hasOneUse() || !N1.hasOneUse())
15650 return SDValue();
15651
15652 SDValue Src0 = N0.getOperand(0);
15653 SDValue Src1 = N1.getOperand(0);
15654 EVT SrcVT = Src0.getValueType();
15655 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
15656 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
15657 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
15658 return SDValue();
15659
15660 LLVMContext &C = *DAG.getContext();
15662 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
15663
15664 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
15665 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
15666
15667 // Src0 and Src1 are zero extended, so they're always positive if signed.
15668 //
15669 // sub can produce a negative from two positive operands, so it needs sign
15670 // extended. Other nodes produce a positive from two positive operands, so
15671 // zero extend instead.
15672 unsigned OuterExtend =
15673 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15674
15675 return DAG.getNode(
15676 OuterExtend, SDLoc(N), VT,
15677 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
15678}
15679
15680// Try to turn (add (xor bool, 1) -1) into (neg bool).
15682 SDValue N0 = N->getOperand(0);
15683 SDValue N1 = N->getOperand(1);
15684 EVT VT = N->getValueType(0);
15685 SDLoc DL(N);
15686
15687 // RHS should be -1.
15688 if (!isAllOnesConstant(N1))
15689 return SDValue();
15690
15691 // Look for (xor X, 1).
15692 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
15693 return SDValue();
15694
15695 // First xor input should be 0 or 1.
15697 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
15698 return SDValue();
15699
15700 // Emit a negate of the setcc.
15701 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
15702 N0.getOperand(0));
15703}
15704
15707 const RISCVSubtarget &Subtarget) {
15708 SelectionDAG &DAG = DCI.DAG;
15709 if (SDValue V = combineAddOfBooleanXor(N, DAG))
15710 return V;
15711 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
15712 return V;
15713 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
15714 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
15715 return V;
15716 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
15717 return V;
15718 }
15719 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15720 return V;
15721 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15722 return V;
15723 if (SDValue V = combineBinOpOfZExt(N, DAG))
15724 return V;
15725
15726 // fold (add (select lhs, rhs, cc, 0, y), x) ->
15727 // (select lhs, rhs, cc, x, (add x, y))
15728 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
15729}
15730
15731// Try to turn a sub boolean RHS and constant LHS into an addi.
15733 SDValue N0 = N->getOperand(0);
15734 SDValue N1 = N->getOperand(1);
15735 EVT VT = N->getValueType(0);
15736 SDLoc DL(N);
15737
15738 // Require a constant LHS.
15739 auto *N0C = dyn_cast<ConstantSDNode>(N0);
15740 if (!N0C)
15741 return SDValue();
15742
15743 // All our optimizations involve subtracting 1 from the immediate and forming
15744 // an ADDI. Make sure the new immediate is valid for an ADDI.
15745 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
15746 if (!ImmValMinus1.isSignedIntN(12))
15747 return SDValue();
15748
15749 SDValue NewLHS;
15750 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
15751 // (sub constant, (setcc x, y, eq/neq)) ->
15752 // (add (setcc x, y, neq/eq), constant - 1)
15753 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15754 EVT SetCCOpVT = N1.getOperand(0).getValueType();
15755 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
15756 return SDValue();
15757 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15758 NewLHS =
15759 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
15760 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
15761 N1.getOperand(0).getOpcode() == ISD::SETCC) {
15762 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
15763 // Since setcc returns a bool the xor is equivalent to 1-setcc.
15764 NewLHS = N1.getOperand(0);
15765 } else
15766 return SDValue();
15767
15768 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
15769 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
15770}
15771
15772// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
15773// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
15774// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
15775// valid with Y=3, while 0b0000_1000_0000_0100 is not.
15777 const RISCVSubtarget &Subtarget) {
15778 if (!Subtarget.hasStdExtZbb())
15779 return SDValue();
15780
15781 EVT VT = N->getValueType(0);
15782
15783 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
15784 return SDValue();
15785
15786 SDValue N0 = N->getOperand(0);
15787 SDValue N1 = N->getOperand(1);
15788
15789 if (N0->getOpcode() != ISD::SHL)
15790 return SDValue();
15791
15792 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
15793 if (!ShAmtCLeft)
15794 return SDValue();
15795 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
15796
15797 if (ShiftedAmount >= 8)
15798 return SDValue();
15799
15800 SDValue LeftShiftOperand = N0->getOperand(0);
15801 SDValue RightShiftOperand = N1;
15802
15803 if (ShiftedAmount != 0) { // Right operand must be a right shift.
15804 if (N1->getOpcode() != ISD::SRL)
15805 return SDValue();
15806 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
15807 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
15808 return SDValue();
15809 RightShiftOperand = N1.getOperand(0);
15810 }
15811
15812 // At least one shift should have a single use.
15813 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
15814 return SDValue();
15815
15816 if (LeftShiftOperand != RightShiftOperand)
15817 return SDValue();
15818
15819 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
15820 Mask <<= ShiftedAmount;
15821 // Check that X has indeed the right shape (only the Y-th bit can be set in
15822 // every byte).
15823 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
15824 return SDValue();
15825
15826 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
15827}
15828
15830 const RISCVSubtarget &Subtarget) {
15831 if (SDValue V = combineSubOfBoolean(N, DAG))
15832 return V;
15833
15834 EVT VT = N->getValueType(0);
15835 SDValue N0 = N->getOperand(0);
15836 SDValue N1 = N->getOperand(1);
15837 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
15838 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
15839 isNullConstant(N1.getOperand(1)) &&
15840 N1.getValueType() == N1.getOperand(0).getValueType()) {
15841 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15842 if (CCVal == ISD::SETLT) {
15843 SDLoc DL(N);
15844 unsigned ShAmt = N0.getValueSizeInBits() - 1;
15845 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
15846 DAG.getConstant(ShAmt, DL, VT));
15847 }
15848 }
15849
15850 if (SDValue V = combineBinOpOfZExt(N, DAG))
15851 return V;
15852 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
15853 return V;
15854
15855 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
15856 // (select lhs, rhs, cc, x, (sub x, y))
15857 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
15858}
15859
15860// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
15861// Legalizing setcc can introduce xors like this. Doing this transform reduces
15862// the number of xors and may allow the xor to fold into a branch condition.
15864 SDValue N0 = N->getOperand(0);
15865 SDValue N1 = N->getOperand(1);
15866 bool IsAnd = N->getOpcode() == ISD::AND;
15867
15868 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
15869 return SDValue();
15870
15871 if (!N0.hasOneUse() || !N1.hasOneUse())
15872 return SDValue();
15873
15874 SDValue N01 = N0.getOperand(1);
15875 SDValue N11 = N1.getOperand(1);
15876
15877 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
15878 // (xor X, -1) based on the upper bits of the other operand being 0. If the
15879 // operation is And, allow one of the Xors to use -1.
15880 if (isOneConstant(N01)) {
15881 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
15882 return SDValue();
15883 } else if (isOneConstant(N11)) {
15884 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
15885 if (!(IsAnd && isAllOnesConstant(N01)))
15886 return SDValue();
15887 } else
15888 return SDValue();
15889
15890 EVT VT = N->getValueType(0);
15891
15892 SDValue N00 = N0.getOperand(0);
15893 SDValue N10 = N1.getOperand(0);
15894
15895 // The LHS of the xors needs to be 0/1.
15897 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
15898 return SDValue();
15899
15900 // Invert the opcode and insert a new xor.
15901 SDLoc DL(N);
15902 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15903 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
15904 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
15905}
15906
15907// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
15908// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
15909// value to an unsigned value. This will be lowered to vmax and series of
15910// vnclipu instructions later. This can be extended to other truncated types
15911// other than i8 by replacing 256 and 255 with the equivalent constants for the
15912// type.
15914 EVT VT = N->getValueType(0);
15915 SDValue N0 = N->getOperand(0);
15916 EVT SrcVT = N0.getValueType();
15917
15918 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15919 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
15920 return SDValue();
15921
15922 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
15923 return SDValue();
15924
15925 SDValue Cond = N0.getOperand(0);
15926 SDValue True = N0.getOperand(1);
15927 SDValue False = N0.getOperand(2);
15928
15929 if (Cond.getOpcode() != ISD::SETCC)
15930 return SDValue();
15931
15932 // FIXME: Support the version of this pattern with the select operands
15933 // swapped.
15934 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15935 if (CCVal != ISD::SETULT)
15936 return SDValue();
15937
15938 SDValue CondLHS = Cond.getOperand(0);
15939 SDValue CondRHS = Cond.getOperand(1);
15940
15941 if (CondLHS != True)
15942 return SDValue();
15943
15944 unsigned ScalarBits = VT.getScalarSizeInBits();
15945
15946 // FIXME: Support other constants.
15947 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
15948 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
15949 return SDValue();
15950
15951 if (False.getOpcode() != ISD::SIGN_EXTEND)
15952 return SDValue();
15953
15954 False = False.getOperand(0);
15955
15956 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
15957 return SDValue();
15958
15959 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
15960 if (!FalseRHSC || !FalseRHSC->isZero())
15961 return SDValue();
15962
15963 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
15964 if (CCVal2 != ISD::SETGT)
15965 return SDValue();
15966
15967 // Emit the signed to unsigned saturation pattern.
15968 SDLoc DL(N);
15969 SDValue Max =
15970 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
15971 SDValue Min =
15972 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
15973 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
15974 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
15975}
15976
15978 const RISCVSubtarget &Subtarget) {
15979 SDValue N0 = N->getOperand(0);
15980 EVT VT = N->getValueType(0);
15981
15982 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
15983 // extending X. This is safe since we only need the LSB after the shift and
15984 // shift amounts larger than 31 would produce poison. If we wait until
15985 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
15986 // to use a BEXT instruction.
15987 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
15988 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
15989 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
15990 SDLoc DL(N0);
15991 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
15992 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
15993 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
15994 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
15995 }
15996
15997 return combineTruncSelectToSMaxUSat(N, DAG);
15998}
15999
16000// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
16001// truncation. But RVV doesn't have truncation instructions for more than twice
16002// the bitwidth.
16003//
16004// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
16005//
16006// vsetvli a0, zero, e32, m2, ta, ma
16007// vnsrl.wi v12, v8, 0
16008// vsetvli zero, zero, e16, m1, ta, ma
16009// vnsrl.wi v8, v12, 0
16010// vsetvli zero, zero, e8, mf2, ta, ma
16011// vnsrl.wi v8, v8, 0
16012//
16013// So reverse the combine so we generate an vmseq/vmsne again:
16014//
16015// and (lshr (trunc X), ShAmt), 1
16016// -->
16017// zext (icmp ne (and X, (1 << ShAmt)), 0)
16018//
16019// and (lshr (not (trunc X)), ShAmt), 1
16020// -->
16021// zext (icmp eq (and X, (1 << ShAmt)), 0)
16023 const RISCVSubtarget &Subtarget) {
16024 using namespace SDPatternMatch;
16025 SDLoc DL(N);
16026
16027 if (!Subtarget.hasVInstructions())
16028 return SDValue();
16029
16030 EVT VT = N->getValueType(0);
16031 if (!VT.isVector())
16032 return SDValue();
16033
16034 APInt ShAmt;
16035 SDValue Inner;
16036 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
16037 m_One())))
16038 return SDValue();
16039
16040 SDValue X;
16041 bool IsNot;
16042 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
16043 IsNot = true;
16044 else if (sd_match(Inner, m_Trunc(m_Value(X))))
16045 IsNot = false;
16046 else
16047 return SDValue();
16048
16049 EVT WideVT = X.getValueType();
16050 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
16051 return SDValue();
16052
16053 SDValue Res =
16054 DAG.getNode(ISD::AND, DL, WideVT, X,
16055 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
16056 Res = DAG.getSetCC(DL,
16057 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16058 WideVT.getVectorElementCount()),
16059 Res, DAG.getConstant(0, DL, WideVT),
16060 IsNot ? ISD::SETEQ : ISD::SETNE);
16061 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
16062}
16063
16066 SelectionDAG &DAG = DCI.DAG;
16067 if (N->getOpcode() != ISD::AND)
16068 return SDValue();
16069
16070 SDValue N0 = N->getOperand(0);
16071 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
16072 return SDValue();
16073 if (!N0.hasOneUse())
16074 return SDValue();
16075
16078 return SDValue();
16079
16080 EVT LoadedVT = ALoad->getMemoryVT();
16081 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
16082 if (!MaskConst)
16083 return SDValue();
16084 uint64_t Mask = MaskConst->getZExtValue();
16085 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
16086 if (Mask != ExpectedMask)
16087 return SDValue();
16088
16089 SDValue ZextLoad = DAG.getAtomicLoad(
16090 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
16091 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
16092 DCI.CombineTo(N, ZextLoad);
16093 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
16095 return SDValue(N, 0);
16096}
16097
16098// Combines two comparison operation and logic operation to one selection
16099// operation(min, max) and logic operation. Returns new constructed Node if
16100// conditions for optimization are satisfied.
16103 const RISCVSubtarget &Subtarget) {
16104 SelectionDAG &DAG = DCI.DAG;
16105
16106 SDValue N0 = N->getOperand(0);
16107 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
16108 // extending X. This is safe since we only need the LSB after the shift and
16109 // shift amounts larger than 31 would produce poison. If we wait until
16110 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16111 // to use a BEXT instruction.
16112 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16113 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
16114 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
16115 N0.hasOneUse()) {
16116 SDLoc DL(N);
16117 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16118 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16119 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16120 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
16121 DAG.getConstant(1, DL, MVT::i64));
16122 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16123 }
16124
16125 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
16126 return V;
16127
16128 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16129 return V;
16130 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16131 return V;
16132 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
16133 return V;
16134
16135 if (DCI.isAfterLegalizeDAG())
16136 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16137 return V;
16138
16139 // fold (and (select lhs, rhs, cc, -1, y), x) ->
16140 // (select lhs, rhs, cc, x, (and x, y))
16141 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
16142}
16143
16144// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
16145// FIXME: Generalize to other binary operators with same operand.
16147 SelectionDAG &DAG) {
16148 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
16149
16150 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
16151 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
16152 !N0.hasOneUse() || !N1.hasOneUse())
16153 return SDValue();
16154
16155 // Should have the same condition.
16156 SDValue Cond = N0.getOperand(1);
16157 if (Cond != N1.getOperand(1))
16158 return SDValue();
16159
16160 SDValue TrueV = N0.getOperand(0);
16161 SDValue FalseV = N1.getOperand(0);
16162
16163 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
16164 TrueV.getOperand(1) != FalseV.getOperand(1) ||
16165 !isOneConstant(TrueV.getOperand(1)) ||
16166 !TrueV.hasOneUse() || !FalseV.hasOneUse())
16167 return SDValue();
16168
16169 EVT VT = N->getValueType(0);
16170 SDLoc DL(N);
16171
16172 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
16173 Cond);
16174 SDValue NewN1 =
16175 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
16176 SDValue NewOr =
16177 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
16178 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
16179}
16180
16181// (xor X, (xor (and X, C2), Y))
16182// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
16183// where C2 is a shifted mask with width = Width and shift = ShAmt
16184// qc_insb might become qc.insb or qc.insbi depending on the operands.
16186 const RISCVSubtarget &Subtarget) {
16187 if (!Subtarget.hasVendorXqcibm())
16188 return SDValue();
16189
16190 using namespace SDPatternMatch;
16191
16192 SDValue Base, Inserted;
16193 APInt CMask;
16194 if (!sd_match(N, m_Xor(m_Value(Base),
16196 m_ConstInt(CMask))),
16197 m_Value(Inserted))))))
16198 return SDValue();
16199
16200 if (N->getValueType(0) != MVT::i32)
16201 return SDValue();
16202
16203 unsigned Width, ShAmt;
16204 if (!CMask.isShiftedMask(ShAmt, Width))
16205 return SDValue();
16206
16207 // Check if all zero bits in CMask are also zero in Inserted
16208 if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
16209 return SDValue();
16210
16211 SDLoc DL(N);
16212
16213 // `Inserted` needs to be right shifted before it is put into the
16214 // instruction.
16215 Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
16216 DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
16217
16218 SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
16219 DAG.getConstant(ShAmt, DL, MVT::i32)};
16220 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16221}
16222
16224 const RISCVSubtarget &Subtarget) {
16225 SelectionDAG &DAG = DCI.DAG;
16226
16227 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16228 return V;
16229 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16230 return V;
16231
16232 if (DCI.isAfterLegalizeDAG())
16233 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16234 return V;
16235
16236 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
16237 // We may be able to pull a common operation out of the true and false value.
16238 SDValue N0 = N->getOperand(0);
16239 SDValue N1 = N->getOperand(1);
16240 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
16241 return V;
16242 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
16243 return V;
16244
16245 // fold (or (select cond, 0, y), x) ->
16246 // (select cond, x, (or x, y))
16247 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16248}
16249
16251 const RISCVSubtarget &Subtarget) {
16252 SDValue N0 = N->getOperand(0);
16253 SDValue N1 = N->getOperand(1);
16254
16255 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
16256 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
16257 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
16258 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16259 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
16260 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
16261 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16262 SDLoc DL(N);
16263 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16264 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16265 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
16266 SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
16267 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
16268 }
16269
16270 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
16271 // NOTE: Assumes ROL being legal means ROLW is legal.
16272 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16273 if (N0.getOpcode() == RISCVISD::SLLW &&
16275 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
16276 SDLoc DL(N);
16277 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
16278 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
16279 }
16280
16281 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
16282 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
16283 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
16285 if (ConstN00 && CC == ISD::SETLT) {
16286 EVT VT = N0.getValueType();
16287 SDLoc DL(N0);
16288 const APInt &Imm = ConstN00->getAPIntValue();
16289 if ((Imm + 1).isSignedIntN(12))
16290 return DAG.getSetCC(DL, VT, N0.getOperand(1),
16291 DAG.getConstant(Imm + 1, DL, VT), CC);
16292 }
16293 }
16294
16295 if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
16296 return V;
16297
16298 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16299 return V;
16300 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16301 return V;
16302
16303 // fold (xor (select cond, 0, y), x) ->
16304 // (select cond, x, (xor x, y))
16305 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16306}
16307
16308// Try to expand a multiply to a sequence of shifts and add/subs,
16309// for a machine without native mul instruction.
16311 uint64_t MulAmt) {
16312 SDLoc DL(N);
16313 EVT VT = N->getValueType(0);
16315
16316 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
16317 SDValue N0 = N->getOperand(0);
16318
16319 // Find the Non-adjacent form of the multiplier.
16320 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
16321 if (E & 1) {
16322 bool IsAdd = (E & 3) == 1;
16323 E -= IsAdd ? 1 : -1;
16324 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
16325 DAG.getShiftAmountConstant(I, VT, DL));
16326 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
16327 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
16328 }
16329 }
16330
16331 return Result;
16332}
16333
16334// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
16336 uint64_t MulAmt) {
16337 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16339 uint64_t ShiftAmt1;
16340 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
16341 Op = ISD::SUB;
16342 ShiftAmt1 = MulAmt + MulAmtLowBit;
16343 } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
16344 Op = ISD::ADD;
16345 ShiftAmt1 = MulAmt - MulAmtLowBit;
16346 } else {
16347 return SDValue();
16348 }
16349 EVT VT = N->getValueType(0);
16350 SDLoc DL(N);
16351 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16352 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16353 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16354 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
16355 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
16356}
16357
16358// Try to expand a scalar multiply to a faster sequence.
16361 const RISCVSubtarget &Subtarget) {
16362
16363 EVT VT = N->getValueType(0);
16364
16365 // LI + MUL is usually smaller than the alternative sequence.
16367 return SDValue();
16368
16369 if (VT != Subtarget.getXLenVT())
16370 return SDValue();
16371
16372 bool ShouldExpandMul =
16373 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
16374 !Subtarget.hasStdExtZmmul();
16375 if (!ShouldExpandMul)
16376 return SDValue();
16377
16378 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
16379 if (!CNode)
16380 return SDValue();
16381 uint64_t MulAmt = CNode->getZExtValue();
16382
16383 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
16384 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
16385 return SDValue();
16386
16387 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
16388 // We're adding additional uses of X here, and in principle, we should be freezing
16389 // X before doing so. However, adding freeze here causes real regressions, and no
16390 // other target properly freezes X in these cases either.
16391 SDValue X = N->getOperand(0);
16392
16393 if (Subtarget.hasShlAdd(3)) {
16394 for (uint64_t Divisor : {3, 5, 9}) {
16395 if (MulAmt % Divisor != 0)
16396 continue;
16397 uint64_t MulAmt2 = MulAmt / Divisor;
16398 // 3/5/9 * 2^N -> shl (shXadd X, X), N
16399 if (isPowerOf2_64(MulAmt2)) {
16400 SDLoc DL(N);
16401 SDValue X = N->getOperand(0);
16402 // Put the shift first if we can fold a zext into the
16403 // shift forming a slli.uw.
16404 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
16405 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
16406 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
16407 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16408 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
16409 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
16410 Shl);
16411 }
16412 // Otherwise, put rhe shl second so that it can fold with following
16413 // instructions (e.g. sext or add).
16414 SDValue Mul359 =
16415 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16416 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16417 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
16418 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
16419 }
16420
16421 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
16422 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
16423 SDLoc DL(N);
16424 SDValue Mul359 =
16425 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16426 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16427 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16428 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
16429 Mul359);
16430 }
16431 }
16432
16433 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
16434 // shXadd. First check if this a sum of two power of 2s because that's
16435 // easy. Then count how many zeros are up to the first bit.
16436 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
16437 unsigned ScaleShift = llvm::countr_zero(MulAmt);
16438 if (ScaleShift >= 1 && ScaleShift < 4) {
16439 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
16440 SDLoc DL(N);
16441 SDValue Shift1 =
16442 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16443 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16444 DAG.getConstant(ScaleShift, DL, VT), Shift1);
16445 }
16446 }
16447
16448 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
16449 // This is the two instruction form, there are also three instruction
16450 // variants we could implement. e.g.
16451 // (2^(1,2,3) * 3,5,9 + 1) << C2
16452 // 2^(C1>3) * 3,5,9 +/- 1
16453 for (uint64_t Divisor : {3, 5, 9}) {
16454 uint64_t C = MulAmt - 1;
16455 if (C <= Divisor)
16456 continue;
16457 unsigned TZ = llvm::countr_zero(C);
16458 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
16459 SDLoc DL(N);
16460 SDValue Mul359 =
16461 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16462 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16463 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16464 DAG.getConstant(TZ, DL, VT), X);
16465 }
16466 }
16467
16468 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
16469 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
16470 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
16471 if (ScaleShift >= 1 && ScaleShift < 4) {
16472 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
16473 SDLoc DL(N);
16474 SDValue Shift1 =
16475 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16476 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
16477 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16478 DAG.getConstant(ScaleShift, DL, VT), X));
16479 }
16480 }
16481
16482 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
16483 for (uint64_t Offset : {3, 5, 9}) {
16484 if (isPowerOf2_64(MulAmt + Offset)) {
16485 unsigned ShAmt = Log2_64(MulAmt + Offset);
16486 if (ShAmt >= VT.getSizeInBits())
16487 continue;
16488 SDLoc DL(N);
16489 SDValue Shift1 =
16490 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
16491 SDValue Mul359 =
16492 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16493 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
16494 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
16495 }
16496 }
16497
16498 for (uint64_t Divisor : {3, 5, 9}) {
16499 if (MulAmt % Divisor != 0)
16500 continue;
16501 uint64_t MulAmt2 = MulAmt / Divisor;
16502 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
16503 // of 25 which happen to be quite common.
16504 for (uint64_t Divisor2 : {3, 5, 9}) {
16505 if (MulAmt2 % Divisor2 != 0)
16506 continue;
16507 uint64_t MulAmt3 = MulAmt2 / Divisor2;
16508 if (isPowerOf2_64(MulAmt3)) {
16509 SDLoc DL(N);
16510 SDValue Mul359A =
16511 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16512 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16513 SDValue Mul359B = DAG.getNode(
16514 RISCVISD::SHL_ADD, DL, VT, Mul359A,
16515 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
16516 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
16517 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
16518 }
16519 }
16520 }
16521 }
16522
16523 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
16524 return V;
16525
16526 if (!Subtarget.hasStdExtZmmul())
16527 return expandMulToNAFSequence(N, DAG, MulAmt);
16528
16529 return SDValue();
16530}
16531
16532// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
16533// (bitcast (sra (v2Xi16 (bitcast X)), 15))
16534// Same for other equivalent types with other equivalent constants.
16536 EVT VT = N->getValueType(0);
16537 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16538
16539 // Do this for legal vectors unless they are i1 or i8 vectors.
16540 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
16541 return SDValue();
16542
16543 if (N->getOperand(0).getOpcode() != ISD::AND ||
16544 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
16545 return SDValue();
16546
16547 SDValue And = N->getOperand(0);
16548 SDValue Srl = And.getOperand(0);
16549
16550 APInt V1, V2, V3;
16551 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
16552 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
16554 return SDValue();
16555
16556 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
16557 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
16558 V3 != (HalfSize - 1))
16559 return SDValue();
16560
16561 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
16562 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
16563 VT.getVectorElementCount() * 2);
16564 SDLoc DL(N);
16565 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
16566 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
16567 DAG.getConstant(HalfSize - 1, DL, HalfVT));
16568 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
16569}
16570
16573 const RISCVSubtarget &Subtarget) {
16574 EVT VT = N->getValueType(0);
16575 if (!VT.isVector())
16576 return expandMul(N, DAG, DCI, Subtarget);
16577
16578 SDLoc DL(N);
16579 SDValue N0 = N->getOperand(0);
16580 SDValue N1 = N->getOperand(1);
16581 SDValue MulOper;
16582 unsigned AddSubOpc;
16583
16584 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
16585 // (mul x, add (y, 1)) -> (add x, (mul x, y))
16586 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
16587 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
16588 auto IsAddSubWith1 = [&](SDValue V) -> bool {
16589 AddSubOpc = V->getOpcode();
16590 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
16591 SDValue Opnd = V->getOperand(1);
16592 MulOper = V->getOperand(0);
16593 if (AddSubOpc == ISD::SUB)
16594 std::swap(Opnd, MulOper);
16595 if (isOneOrOneSplat(Opnd))
16596 return true;
16597 }
16598 return false;
16599 };
16600
16601 if (IsAddSubWith1(N0)) {
16602 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
16603 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
16604 }
16605
16606 if (IsAddSubWith1(N1)) {
16607 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
16608 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
16609 }
16610
16611 if (SDValue V = combineBinOpOfZExt(N, DAG))
16612 return V;
16613
16615 return V;
16616
16617 return SDValue();
16618}
16619
16620/// According to the property that indexed load/store instructions zero-extend
16621/// their indices, try to narrow the type of index operand.
16622static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
16623 if (isIndexTypeSigned(IndexType))
16624 return false;
16625
16626 if (!N->hasOneUse())
16627 return false;
16628
16629 EVT VT = N.getValueType();
16630 SDLoc DL(N);
16631
16632 // In general, what we're doing here is seeing if we can sink a truncate to
16633 // a smaller element type into the expression tree building our index.
16634 // TODO: We can generalize this and handle a bunch more cases if useful.
16635
16636 // Narrow a buildvector to the narrowest element type. This requires less
16637 // work and less register pressure at high LMUL, and creates smaller constants
16638 // which may be cheaper to materialize.
16639 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
16640 KnownBits Known = DAG.computeKnownBits(N);
16641 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
16642 LLVMContext &C = *DAG.getContext();
16643 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
16644 if (ResultVT.bitsLT(VT.getVectorElementType())) {
16645 N = DAG.getNode(ISD::TRUNCATE, DL,
16646 VT.changeVectorElementType(ResultVT), N);
16647 return true;
16648 }
16649 }
16650
16651 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
16652 if (N.getOpcode() != ISD::SHL)
16653 return false;
16654
16655 SDValue N0 = N.getOperand(0);
16656 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
16657 N0.getOpcode() != RISCVISD::VZEXT_VL)
16658 return false;
16659 if (!N0->hasOneUse())
16660 return false;
16661
16662 APInt ShAmt;
16663 SDValue N1 = N.getOperand(1);
16664 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
16665 return false;
16666
16667 SDValue Src = N0.getOperand(0);
16668 EVT SrcVT = Src.getValueType();
16669 unsigned SrcElen = SrcVT.getScalarSizeInBits();
16670 unsigned ShAmtV = ShAmt.getZExtValue();
16671 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
16672 NewElen = std::max(NewElen, 8U);
16673
16674 // Skip if NewElen is not narrower than the original extended type.
16675 if (NewElen >= N0.getValueType().getScalarSizeInBits())
16676 return false;
16677
16678 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
16679 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
16680
16681 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
16682 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
16683 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
16684 return true;
16685}
16686
16687/// Try to map an integer comparison with size > XLEN to vector instructions
16688/// before type legalization splits it up into chunks.
16689static SDValue
16691 const SDLoc &DL, SelectionDAG &DAG,
16692 const RISCVSubtarget &Subtarget) {
16693 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
16694
16695 if (!Subtarget.hasVInstructions())
16696 return SDValue();
16697
16698 MVT XLenVT = Subtarget.getXLenVT();
16699 EVT OpVT = X.getValueType();
16700 // We're looking for an oversized integer equality comparison.
16701 if (!OpVT.isScalarInteger())
16702 return SDValue();
16703
16704 unsigned OpSize = OpVT.getSizeInBits();
16705 // The size should be larger than XLen and smaller than the maximum vector
16706 // size.
16707 if (OpSize <= Subtarget.getXLen() ||
16708 OpSize > Subtarget.getRealMinVLen() *
16710 return SDValue();
16711
16712 // Don't perform this combine if constructing the vector will be expensive.
16713 auto IsVectorBitCastCheap = [](SDValue X) {
16715 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
16716 X.getOpcode() == ISD::LOAD;
16717 };
16718 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
16719 return SDValue();
16720
16722 Attribute::NoImplicitFloat))
16723 return SDValue();
16724
16725 // Bail out for non-byte-sized types.
16726 if (!OpVT.isByteSized())
16727 return SDValue();
16728
16729 unsigned VecSize = OpSize / 8;
16730 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
16731 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
16732
16733 SDValue VecX = DAG.getBitcast(VecVT, X);
16734 SDValue VecY = DAG.getBitcast(VecVT, Y);
16735 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
16736 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
16737
16738 SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
16739 DAG.getCondCode(ISD::SETNE), Mask, VL);
16740 return DAG.getSetCC(DL, VT,
16741 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
16742 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
16743 VL),
16744 DAG.getConstant(0, DL, XLenVT), CC);
16745}
16746
16749 const RISCVSubtarget &Subtarget) {
16750 SelectionDAG &DAG = DCI.DAG;
16751 SDLoc dl(N);
16752 SDValue N0 = N->getOperand(0);
16753 SDValue N1 = N->getOperand(1);
16754 EVT VT = N->getValueType(0);
16755 EVT OpVT = N0.getValueType();
16756
16757 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
16758 // Looking for an equality compare.
16759 if (!isIntEqualitySetCC(Cond))
16760 return SDValue();
16761
16762 if (SDValue V =
16763 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
16764 return V;
16765
16766 if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
16767 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
16769 const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
16770 // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
16771 if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
16772 AndRHSC.isNegatedPowerOf2()) {
16773 unsigned ShiftBits = AndRHSC.countr_zero();
16774 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
16775 DAG.getConstant(ShiftBits, dl, OpVT));
16776 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
16777 }
16778
16779 // Similar to above but handling the lower 32 bits by using sraiw. Allow
16780 // comparing with constants other than 0 if the constant can be folded into
16781 // addi or xori after shifting.
16782 uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
16783 uint64_t AndRHSInt = AndRHSC.getZExtValue();
16784 if (OpVT == MVT::i64 && AndRHSInt <= 0xffffffff &&
16785 isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
16786 unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
16787 int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
16788 if (NewC >= -2048 && NewC <= 2048) {
16789 SDValue SExt =
16790 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
16791 DAG.getValueType(MVT::i32));
16792 SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
16793 DAG.getConstant(ShiftBits, dl, OpVT));
16794 return DAG.getSetCC(dl, VT, Shift,
16795 DAG.getSignedConstant(NewC, dl, OpVT), Cond);
16796 }
16797 }
16798 }
16799
16800 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
16801 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
16802 // bit 31. Same for setne. C1' may be cheaper to materialize and the
16803 // sext_inreg can become a sext.w instead of a shift pair.
16804 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
16805 return SDValue();
16806
16807 // RHS needs to be a constant.
16808 auto *N1C = dyn_cast<ConstantSDNode>(N1);
16809 if (!N1C)
16810 return SDValue();
16811
16812 // LHS needs to be (and X, 0xffffffff).
16813 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
16815 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
16816 return SDValue();
16817
16818 // Don't do this if the sign bit is provably zero, it will be turned back into
16819 // an AND.
16820 APInt SignMask = APInt::getOneBitSet(64, 31);
16821 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
16822 return SDValue();
16823
16824 const APInt &C1 = N1C->getAPIntValue();
16825
16826 // If the constant is larger than 2^32 - 1 it is impossible for both sides
16827 // to be equal.
16828 if (C1.getActiveBits() > 32)
16829 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
16830
16831 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
16832 N0.getOperand(0), DAG.getValueType(MVT::i32));
16833 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
16834 dl, OpVT), Cond);
16835}
16836
16837static SDValue
16839 const RISCVSubtarget &Subtarget) {
16840 SelectionDAG &DAG = DCI.DAG;
16841 SDValue Src = N->getOperand(0);
16842 EVT VT = N->getValueType(0);
16843 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16844 unsigned Opc = Src.getOpcode();
16845 SDLoc DL(N);
16846
16847 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
16848 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
16849 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
16850 Subtarget.hasStdExtZfhmin())
16851 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
16852
16853 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
16854 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
16855 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
16856 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
16857 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
16858 Src.getOperand(1));
16859
16860 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
16861 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
16862 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
16863
16864 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
16865 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
16866 isAllOnesConstant(Src.getOperand(1)) &&
16867 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
16868 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
16869 DAG.getAllOnesConstant(DL, VT));
16870
16871 return SDValue();
16872}
16873
16874namespace {
16875// Forward declaration of the structure holding the necessary information to
16876// apply a combine.
16877struct CombineResult;
16878
16879enum ExtKind : uint8_t {
16880 ZExt = 1 << 0,
16881 SExt = 1 << 1,
16882 FPExt = 1 << 2,
16883 BF16Ext = 1 << 3
16884};
16885/// Helper class for folding sign/zero extensions.
16886/// In particular, this class is used for the following combines:
16887/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
16888/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
16889/// mul | mul_vl -> vwmul(u) | vwmul_su
16890/// shl | shl_vl -> vwsll
16891/// fadd -> vfwadd | vfwadd_w
16892/// fsub -> vfwsub | vfwsub_w
16893/// fmul -> vfwmul
16894/// An object of this class represents an operand of the operation we want to
16895/// combine.
16896/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
16897/// NodeExtensionHelper for `a` and one for `b`.
16898///
16899/// This class abstracts away how the extension is materialized and
16900/// how its number of users affect the combines.
16901///
16902/// In particular:
16903/// - VWADD_W is conceptually == add(op0, sext(op1))
16904/// - VWADDU_W == add(op0, zext(op1))
16905/// - VWSUB_W == sub(op0, sext(op1))
16906/// - VWSUBU_W == sub(op0, zext(op1))
16907/// - VFWADD_W == fadd(op0, fpext(op1))
16908/// - VFWSUB_W == fsub(op0, fpext(op1))
16909/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
16910/// zext|sext(smaller_value).
16911struct NodeExtensionHelper {
16912 /// Records if this operand is like being zero extended.
16913 bool SupportsZExt;
16914 /// Records if this operand is like being sign extended.
16915 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
16916 /// instance, a splat constant (e.g., 3), would support being both sign and
16917 /// zero extended.
16918 bool SupportsSExt;
16919 /// Records if this operand is like being floating point extended.
16920 bool SupportsFPExt;
16921 /// Records if this operand is extended from bf16.
16922 bool SupportsBF16Ext;
16923 /// This boolean captures whether we care if this operand would still be
16924 /// around after the folding happens.
16925 bool EnforceOneUse;
16926 /// Original value that this NodeExtensionHelper represents.
16927 SDValue OrigOperand;
16928
16929 /// Get the value feeding the extension or the value itself.
16930 /// E.g., for zext(a), this would return a.
16931 SDValue getSource() const {
16932 switch (OrigOperand.getOpcode()) {
16933 case ISD::ZERO_EXTEND:
16934 case ISD::SIGN_EXTEND:
16935 case RISCVISD::VSEXT_VL:
16936 case RISCVISD::VZEXT_VL:
16937 case RISCVISD::FP_EXTEND_VL:
16938 return OrigOperand.getOperand(0);
16939 default:
16940 return OrigOperand;
16941 }
16942 }
16943
16944 /// Check if this instance represents a splat.
16945 bool isSplat() const {
16946 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
16947 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
16948 }
16949
16950 /// Get the extended opcode.
16951 unsigned getExtOpc(ExtKind SupportsExt) const {
16952 switch (SupportsExt) {
16953 case ExtKind::SExt:
16954 return RISCVISD::VSEXT_VL;
16955 case ExtKind::ZExt:
16956 return RISCVISD::VZEXT_VL;
16957 case ExtKind::FPExt:
16958 case ExtKind::BF16Ext:
16959 return RISCVISD::FP_EXTEND_VL;
16960 }
16961 llvm_unreachable("Unknown ExtKind enum");
16962 }
16963
16964 /// Get or create a value that can feed \p Root with the given extension \p
16965 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
16966 /// operand. \see ::getSource().
16967 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
16968 const RISCVSubtarget &Subtarget,
16969 std::optional<ExtKind> SupportsExt) const {
16970 if (!SupportsExt.has_value())
16971 return OrigOperand;
16972
16973 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
16974
16975 SDValue Source = getSource();
16976 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
16977 if (Source.getValueType() == NarrowVT)
16978 return Source;
16979
16980 unsigned ExtOpc = getExtOpc(*SupportsExt);
16981
16982 // If we need an extension, we should be changing the type.
16983 SDLoc DL(OrigOperand);
16984 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
16985 switch (OrigOperand.getOpcode()) {
16986 case ISD::ZERO_EXTEND:
16987 case ISD::SIGN_EXTEND:
16988 case RISCVISD::VSEXT_VL:
16989 case RISCVISD::VZEXT_VL:
16990 case RISCVISD::FP_EXTEND_VL:
16991 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
16992 case ISD::SPLAT_VECTOR:
16993 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
16994 case RISCVISD::VMV_V_X_VL:
16995 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
16996 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
16997 case RISCVISD::VFMV_V_F_VL:
16998 Source = Source.getOperand(1);
16999 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
17000 Source = Source.getOperand(0);
17001 assert(Source.getValueType() == NarrowVT.getVectorElementType());
17002 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
17003 DAG.getUNDEF(NarrowVT), Source, VL);
17004 default:
17005 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
17006 // and that operand should already have the right NarrowVT so no
17007 // extension should be required at this point.
17008 llvm_unreachable("Unsupported opcode");
17009 }
17010 }
17011
17012 /// Helper function to get the narrow type for \p Root.
17013 /// The narrow type is the type of \p Root where we divided the size of each
17014 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
17015 /// \pre Both the narrow type and the original type should be legal.
17016 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
17017 MVT VT = Root->getSimpleValueType(0);
17018
17019 // Determine the narrow size.
17020 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17021
17022 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
17023 : SupportsExt == ExtKind::FPExt
17024 ? MVT::getFloatingPointVT(NarrowSize)
17025 : MVT::getIntegerVT(NarrowSize);
17026
17027 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
17028 "Trying to extend something we can't represent");
17029 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
17030 return NarrowVT;
17031 }
17032
17033 /// Get the opcode to materialize:
17034 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
17035 static unsigned getSExtOpcode(unsigned Opcode) {
17036 switch (Opcode) {
17037 case ISD::ADD:
17038 case RISCVISD::ADD_VL:
17039 case RISCVISD::VWADD_W_VL:
17040 case RISCVISD::VWADDU_W_VL:
17041 case ISD::OR:
17042 case RISCVISD::OR_VL:
17043 return RISCVISD::VWADD_VL;
17044 case ISD::SUB:
17045 case RISCVISD::SUB_VL:
17046 case RISCVISD::VWSUB_W_VL:
17047 case RISCVISD::VWSUBU_W_VL:
17048 return RISCVISD::VWSUB_VL;
17049 case ISD::MUL:
17050 case RISCVISD::MUL_VL:
17051 return RISCVISD::VWMUL_VL;
17052 default:
17053 llvm_unreachable("Unexpected opcode");
17054 }
17055 }
17056
17057 /// Get the opcode to materialize:
17058 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
17059 static unsigned getZExtOpcode(unsigned Opcode) {
17060 switch (Opcode) {
17061 case ISD::ADD:
17062 case RISCVISD::ADD_VL:
17063 case RISCVISD::VWADD_W_VL:
17064 case RISCVISD::VWADDU_W_VL:
17065 case ISD::OR:
17066 case RISCVISD::OR_VL:
17067 return RISCVISD::VWADDU_VL;
17068 case ISD::SUB:
17069 case RISCVISD::SUB_VL:
17070 case RISCVISD::VWSUB_W_VL:
17071 case RISCVISD::VWSUBU_W_VL:
17072 return RISCVISD::VWSUBU_VL;
17073 case ISD::MUL:
17074 case RISCVISD::MUL_VL:
17075 return RISCVISD::VWMULU_VL;
17076 case ISD::SHL:
17077 case RISCVISD::SHL_VL:
17078 return RISCVISD::VWSLL_VL;
17079 default:
17080 llvm_unreachable("Unexpected opcode");
17081 }
17082 }
17083
17084 /// Get the opcode to materialize:
17085 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
17086 static unsigned getFPExtOpcode(unsigned Opcode) {
17087 switch (Opcode) {
17088 case RISCVISD::FADD_VL:
17089 case RISCVISD::VFWADD_W_VL:
17090 return RISCVISD::VFWADD_VL;
17091 case RISCVISD::FSUB_VL:
17092 case RISCVISD::VFWSUB_W_VL:
17093 return RISCVISD::VFWSUB_VL;
17094 case RISCVISD::FMUL_VL:
17095 return RISCVISD::VFWMUL_VL;
17096 case RISCVISD::VFMADD_VL:
17097 return RISCVISD::VFWMADD_VL;
17098 case RISCVISD::VFMSUB_VL:
17099 return RISCVISD::VFWMSUB_VL;
17100 case RISCVISD::VFNMADD_VL:
17101 return RISCVISD::VFWNMADD_VL;
17102 case RISCVISD::VFNMSUB_VL:
17103 return RISCVISD::VFWNMSUB_VL;
17104 default:
17105 llvm_unreachable("Unexpected opcode");
17106 }
17107 }
17108
17109 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
17110 /// newOpcode(a, b).
17111 static unsigned getSUOpcode(unsigned Opcode) {
17112 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
17113 "SU is only supported for MUL");
17114 return RISCVISD::VWMULSU_VL;
17115 }
17116
17117 /// Get the opcode to materialize
17118 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
17119 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
17120 switch (Opcode) {
17121 case ISD::ADD:
17122 case RISCVISD::ADD_VL:
17123 case ISD::OR:
17124 case RISCVISD::OR_VL:
17125 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
17126 : RISCVISD::VWADDU_W_VL;
17127 case ISD::SUB:
17128 case RISCVISD::SUB_VL:
17129 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
17130 : RISCVISD::VWSUBU_W_VL;
17131 case RISCVISD::FADD_VL:
17132 return RISCVISD::VFWADD_W_VL;
17133 case RISCVISD::FSUB_VL:
17134 return RISCVISD::VFWSUB_W_VL;
17135 default:
17136 llvm_unreachable("Unexpected opcode");
17137 }
17138 }
17139
17140 using CombineToTry = std::function<std::optional<CombineResult>(
17141 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
17142 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
17143 const RISCVSubtarget &)>;
17144
17145 /// Check if this node needs to be fully folded or extended for all users.
17146 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
17147
17148 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
17149 const RISCVSubtarget &Subtarget) {
17150 unsigned Opc = OrigOperand.getOpcode();
17151 MVT VT = OrigOperand.getSimpleValueType();
17152
17153 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
17154 "Unexpected Opcode");
17155
17156 // The pasthru must be undef for tail agnostic.
17157 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
17158 return;
17159
17160 // Get the scalar value.
17161 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
17162 : OrigOperand.getOperand(1);
17163
17164 // See if we have enough sign bits or zero bits in the scalar to use a
17165 // widening opcode by splatting to smaller element size.
17166 unsigned EltBits = VT.getScalarSizeInBits();
17167 unsigned ScalarBits = Op.getValueSizeInBits();
17168 // If we're not getting all bits from the element, we need special handling.
17169 if (ScalarBits < EltBits) {
17170 // This should only occur on RV32.
17171 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
17172 !Subtarget.is64Bit() && "Unexpected splat");
17173 // vmv.v.x sign extends narrow inputs.
17174 SupportsSExt = true;
17175
17176 // If the input is positive, then sign extend is also zero extend.
17177 if (DAG.SignBitIsZero(Op))
17178 SupportsZExt = true;
17179
17180 EnforceOneUse = false;
17181 return;
17182 }
17183
17184 unsigned NarrowSize = EltBits / 2;
17185 // If the narrow type cannot be expressed with a legal VMV,
17186 // this is not a valid candidate.
17187 if (NarrowSize < 8)
17188 return;
17189
17190 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
17191 SupportsSExt = true;
17192
17193 if (DAG.MaskedValueIsZero(Op,
17194 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
17195 SupportsZExt = true;
17196
17197 EnforceOneUse = false;
17198 }
17199
17200 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17201 return (NarrowEltVT == MVT::f32 ||
17202 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
17203 }
17204
17205 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17206 return NarrowEltVT == MVT::bf16 && Subtarget.hasStdExtZvfbfwma();
17207 }
17208
17209 /// Helper method to set the various fields of this struct based on the
17210 /// type of \p Root.
17211 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
17212 const RISCVSubtarget &Subtarget) {
17213 SupportsZExt = false;
17214 SupportsSExt = false;
17215 SupportsFPExt = false;
17216 SupportsBF16Ext = false;
17217 EnforceOneUse = true;
17218 unsigned Opc = OrigOperand.getOpcode();
17219 // For the nodes we handle below, we end up using their inputs directly: see
17220 // getSource(). However since they either don't have a passthru or we check
17221 // that their passthru is undef, we can safely ignore their mask and VL.
17222 switch (Opc) {
17223 case ISD::ZERO_EXTEND:
17224 case ISD::SIGN_EXTEND: {
17225 MVT VT = OrigOperand.getSimpleValueType();
17226 if (!VT.isVector())
17227 break;
17228
17229 SDValue NarrowElt = OrigOperand.getOperand(0);
17230 MVT NarrowVT = NarrowElt.getSimpleValueType();
17231 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
17232 if (NarrowVT.getVectorElementType() == MVT::i1)
17233 break;
17234
17235 SupportsZExt = Opc == ISD::ZERO_EXTEND;
17236 SupportsSExt = Opc == ISD::SIGN_EXTEND;
17237 break;
17238 }
17239 case RISCVISD::VZEXT_VL:
17240 SupportsZExt = true;
17241 break;
17242 case RISCVISD::VSEXT_VL:
17243 SupportsSExt = true;
17244 break;
17245 case RISCVISD::FP_EXTEND_VL: {
17246 MVT NarrowEltVT =
17248 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
17249 SupportsFPExt = true;
17250 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
17251 SupportsBF16Ext = true;
17252
17253 break;
17254 }
17255 case ISD::SPLAT_VECTOR:
17256 case RISCVISD::VMV_V_X_VL:
17257 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
17258 break;
17259 case RISCVISD::VFMV_V_F_VL: {
17260 MVT VT = OrigOperand.getSimpleValueType();
17261
17262 if (!OrigOperand.getOperand(0).isUndef())
17263 break;
17264
17265 SDValue Op = OrigOperand.getOperand(1);
17266 if (Op.getOpcode() != ISD::FP_EXTEND)
17267 break;
17268
17269 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17270 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
17271 if (NarrowSize != ScalarBits)
17272 break;
17273
17274 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
17275 SupportsFPExt = true;
17276 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
17277 Subtarget))
17278 SupportsBF16Ext = true;
17279 break;
17280 }
17281 default:
17282 break;
17283 }
17284 }
17285
17286 /// Check if \p Root supports any extension folding combines.
17287 static bool isSupportedRoot(const SDNode *Root,
17288 const RISCVSubtarget &Subtarget) {
17289 switch (Root->getOpcode()) {
17290 case ISD::ADD:
17291 case ISD::SUB:
17292 case ISD::MUL: {
17293 return Root->getValueType(0).isScalableVector();
17294 }
17295 case ISD::OR: {
17296 return Root->getValueType(0).isScalableVector() &&
17297 Root->getFlags().hasDisjoint();
17298 }
17299 // Vector Widening Integer Add/Sub/Mul Instructions
17300 case RISCVISD::ADD_VL:
17301 case RISCVISD::MUL_VL:
17302 case RISCVISD::VWADD_W_VL:
17303 case RISCVISD::VWADDU_W_VL:
17304 case RISCVISD::SUB_VL:
17305 case RISCVISD::VWSUB_W_VL:
17306 case RISCVISD::VWSUBU_W_VL:
17307 // Vector Widening Floating-Point Add/Sub/Mul Instructions
17308 case RISCVISD::FADD_VL:
17309 case RISCVISD::FSUB_VL:
17310 case RISCVISD::FMUL_VL:
17311 case RISCVISD::VFWADD_W_VL:
17312 case RISCVISD::VFWSUB_W_VL:
17313 return true;
17314 case RISCVISD::OR_VL:
17315 return Root->getFlags().hasDisjoint();
17316 case ISD::SHL:
17317 return Root->getValueType(0).isScalableVector() &&
17318 Subtarget.hasStdExtZvbb();
17319 case RISCVISD::SHL_VL:
17320 return Subtarget.hasStdExtZvbb();
17321 case RISCVISD::VFMADD_VL:
17322 case RISCVISD::VFNMSUB_VL:
17323 case RISCVISD::VFNMADD_VL:
17324 case RISCVISD::VFMSUB_VL:
17325 return true;
17326 default:
17327 return false;
17328 }
17329 }
17330
17331 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
17332 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
17333 const RISCVSubtarget &Subtarget) {
17334 assert(isSupportedRoot(Root, Subtarget) &&
17335 "Trying to build an helper with an "
17336 "unsupported root");
17337 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
17339 OrigOperand = Root->getOperand(OperandIdx);
17340
17341 unsigned Opc = Root->getOpcode();
17342 switch (Opc) {
17343 // We consider
17344 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
17345 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
17346 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
17347 case RISCVISD::VWADD_W_VL:
17348 case RISCVISD::VWADDU_W_VL:
17349 case RISCVISD::VWSUB_W_VL:
17350 case RISCVISD::VWSUBU_W_VL:
17351 case RISCVISD::VFWADD_W_VL:
17352 case RISCVISD::VFWSUB_W_VL:
17353 if (OperandIdx == 1) {
17354 SupportsZExt =
17355 Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
17356 SupportsSExt =
17357 Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWSUB_W_VL;
17358 SupportsFPExt =
17359 Opc == RISCVISD::VFWADD_W_VL || Opc == RISCVISD::VFWSUB_W_VL;
17360 // There's no existing extension here, so we don't have to worry about
17361 // making sure it gets removed.
17362 EnforceOneUse = false;
17363 break;
17364 }
17365 [[fallthrough]];
17366 default:
17367 fillUpExtensionSupport(Root, DAG, Subtarget);
17368 break;
17369 }
17370 }
17371
17372 /// Helper function to get the Mask and VL from \p Root.
17373 static std::pair<SDValue, SDValue>
17374 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
17375 const RISCVSubtarget &Subtarget) {
17376 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
17377 switch (Root->getOpcode()) {
17378 case ISD::ADD:
17379 case ISD::SUB:
17380 case ISD::MUL:
17381 case ISD::OR:
17382 case ISD::SHL: {
17383 SDLoc DL(Root);
17384 MVT VT = Root->getSimpleValueType(0);
17385 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
17386 }
17387 default:
17388 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
17389 }
17390 }
17391
17392 /// Helper function to check if \p N is commutative with respect to the
17393 /// foldings that are supported by this class.
17394 static bool isCommutative(const SDNode *N) {
17395 switch (N->getOpcode()) {
17396 case ISD::ADD:
17397 case ISD::MUL:
17398 case ISD::OR:
17399 case RISCVISD::ADD_VL:
17400 case RISCVISD::MUL_VL:
17401 case RISCVISD::OR_VL:
17402 case RISCVISD::VWADD_W_VL:
17403 case RISCVISD::VWADDU_W_VL:
17404 case RISCVISD::FADD_VL:
17405 case RISCVISD::FMUL_VL:
17406 case RISCVISD::VFWADD_W_VL:
17407 case RISCVISD::VFMADD_VL:
17408 case RISCVISD::VFNMSUB_VL:
17409 case RISCVISD::VFNMADD_VL:
17410 case RISCVISD::VFMSUB_VL:
17411 return true;
17412 case ISD::SUB:
17413 case RISCVISD::SUB_VL:
17414 case RISCVISD::VWSUB_W_VL:
17415 case RISCVISD::VWSUBU_W_VL:
17416 case RISCVISD::FSUB_VL:
17417 case RISCVISD::VFWSUB_W_VL:
17418 case ISD::SHL:
17419 case RISCVISD::SHL_VL:
17420 return false;
17421 default:
17422 llvm_unreachable("Unexpected opcode");
17423 }
17424 }
17425
17426 /// Get a list of combine to try for folding extensions in \p Root.
17427 /// Note that each returned CombineToTry function doesn't actually modify
17428 /// anything. Instead they produce an optional CombineResult that if not None,
17429 /// need to be materialized for the combine to be applied.
17430 /// \see CombineResult::materialize.
17431 /// If the related CombineToTry function returns std::nullopt, that means the
17432 /// combine didn't match.
17433 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
17434};
17435
17436/// Helper structure that holds all the necessary information to materialize a
17437/// combine that does some extension folding.
17438struct CombineResult {
17439 /// Opcode to be generated when materializing the combine.
17440 unsigned TargetOpcode;
17441 // No value means no extension is needed.
17442 std::optional<ExtKind> LHSExt;
17443 std::optional<ExtKind> RHSExt;
17444 /// Root of the combine.
17445 SDNode *Root;
17446 /// LHS of the TargetOpcode.
17447 NodeExtensionHelper LHS;
17448 /// RHS of the TargetOpcode.
17449 NodeExtensionHelper RHS;
17450
17451 CombineResult(unsigned TargetOpcode, SDNode *Root,
17452 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
17453 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
17454 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
17455 LHS(LHS), RHS(RHS) {}
17456
17457 /// Return a value that uses TargetOpcode and that can be used to replace
17458 /// Root.
17459 /// The actual replacement is *not* done in that method.
17460 SDValue materialize(SelectionDAG &DAG,
17461 const RISCVSubtarget &Subtarget) const {
17462 SDValue Mask, VL, Passthru;
17463 std::tie(Mask, VL) =
17464 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
17465 switch (Root->getOpcode()) {
17466 default:
17467 Passthru = Root->getOperand(2);
17468 break;
17469 case ISD::ADD:
17470 case ISD::SUB:
17471 case ISD::MUL:
17472 case ISD::OR:
17473 case ISD::SHL:
17474 Passthru = DAG.getUNDEF(Root->getValueType(0));
17475 break;
17476 }
17477 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
17478 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
17479 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
17480 Passthru, Mask, VL);
17481 }
17482};
17483
17484/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17485/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17486/// are zext) and LHS and RHS can be folded into Root.
17487/// AllowExtMask define which form `ext` can take in this pattern.
17488///
17489/// \note If the pattern can match with both zext and sext, the returned
17490/// CombineResult will feature the zext result.
17491///
17492/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17493/// can be used to apply the pattern.
17494static std::optional<CombineResult>
17495canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
17496 const NodeExtensionHelper &RHS,
17497 uint8_t AllowExtMask, SelectionDAG &DAG,
17498 const RISCVSubtarget &Subtarget) {
17499 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
17500 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17501 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17502 /*RHSExt=*/{ExtKind::ZExt});
17503 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
17504 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17505 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17506 /*RHSExt=*/{ExtKind::SExt});
17507 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
17508 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17509 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17510 /*RHSExt=*/{ExtKind::FPExt});
17511 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
17512 RHS.SupportsBF16Ext)
17513 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17514 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
17515 /*RHSExt=*/{ExtKind::BF16Ext});
17516 return std::nullopt;
17517}
17518
17519/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17520/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17521/// are zext) and LHS and RHS can be folded into Root.
17522///
17523/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17524/// can be used to apply the pattern.
17525static std::optional<CombineResult>
17526canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
17527 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17528 const RISCVSubtarget &Subtarget) {
17529 return canFoldToVWWithSameExtensionImpl(
17530 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
17531 Subtarget);
17532}
17533
17534/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
17535///
17536/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17537/// can be used to apply the pattern.
17538static std::optional<CombineResult>
17539canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
17540 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17541 const RISCVSubtarget &Subtarget) {
17542 if (RHS.SupportsFPExt)
17543 return CombineResult(
17544 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
17545 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
17546
17547 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
17548 // sext/zext?
17549 // Control this behavior behind an option (AllowSplatInVW_W) for testing
17550 // purposes.
17551 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
17552 return CombineResult(
17553 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
17554 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
17555 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
17556 return CombineResult(
17557 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
17558 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
17559 return std::nullopt;
17560}
17561
17562/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
17563///
17564/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17565/// can be used to apply the pattern.
17566static std::optional<CombineResult>
17567canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17568 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17569 const RISCVSubtarget &Subtarget) {
17570 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
17571 Subtarget);
17572}
17573
17574/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
17575///
17576/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17577/// can be used to apply the pattern.
17578static std::optional<CombineResult>
17579canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17580 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17581 const RISCVSubtarget &Subtarget) {
17582 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
17583 Subtarget);
17584}
17585
17586/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
17587///
17588/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17589/// can be used to apply the pattern.
17590static std::optional<CombineResult>
17591canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17592 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17593 const RISCVSubtarget &Subtarget) {
17594 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
17595 Subtarget);
17596}
17597
17598/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
17599///
17600/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17601/// can be used to apply the pattern.
17602static std::optional<CombineResult>
17603canFoldToVWWithBF16EXT(SDNode *Root, const NodeExtensionHelper &LHS,
17604 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17605 const RISCVSubtarget &Subtarget) {
17606 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
17607 Subtarget);
17608}
17609
17610/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
17611///
17612/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17613/// can be used to apply the pattern.
17614static std::optional<CombineResult>
17615canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
17616 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17617 const RISCVSubtarget &Subtarget) {
17618
17619 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
17620 return std::nullopt;
17621 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
17622 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17623 /*RHSExt=*/{ExtKind::ZExt});
17624}
17625
17627NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
17628 SmallVector<CombineToTry> Strategies;
17629 switch (Root->getOpcode()) {
17630 case ISD::ADD:
17631 case ISD::SUB:
17632 case ISD::OR:
17633 case RISCVISD::ADD_VL:
17634 case RISCVISD::SUB_VL:
17635 case RISCVISD::OR_VL:
17636 case RISCVISD::FADD_VL:
17637 case RISCVISD::FSUB_VL:
17638 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
17639 Strategies.push_back(canFoldToVWWithSameExtension);
17640 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
17641 Strategies.push_back(canFoldToVW_W);
17642 break;
17643 case RISCVISD::FMUL_VL:
17644 case RISCVISD::VFMADD_VL:
17645 case RISCVISD::VFMSUB_VL:
17646 case RISCVISD::VFNMADD_VL:
17647 case RISCVISD::VFNMSUB_VL:
17648 Strategies.push_back(canFoldToVWWithSameExtension);
17649 if (Root->getOpcode() == RISCVISD::VFMADD_VL)
17650 Strategies.push_back(canFoldToVWWithBF16EXT);
17651 break;
17652 case ISD::MUL:
17653 case RISCVISD::MUL_VL:
17654 // mul -> vwmul(u)
17655 Strategies.push_back(canFoldToVWWithSameExtension);
17656 // mul -> vwmulsu
17657 Strategies.push_back(canFoldToVW_SU);
17658 break;
17659 case ISD::SHL:
17660 case RISCVISD::SHL_VL:
17661 // shl -> vwsll
17662 Strategies.push_back(canFoldToVWWithZEXT);
17663 break;
17664 case RISCVISD::VWADD_W_VL:
17665 case RISCVISD::VWSUB_W_VL:
17666 // vwadd_w|vwsub_w -> vwadd|vwsub
17667 Strategies.push_back(canFoldToVWWithSEXT);
17668 break;
17669 case RISCVISD::VWADDU_W_VL:
17670 case RISCVISD::VWSUBU_W_VL:
17671 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
17672 Strategies.push_back(canFoldToVWWithZEXT);
17673 break;
17674 case RISCVISD::VFWADD_W_VL:
17675 case RISCVISD::VFWSUB_W_VL:
17676 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
17677 Strategies.push_back(canFoldToVWWithFPEXT);
17678 break;
17679 default:
17680 llvm_unreachable("Unexpected opcode");
17681 }
17682 return Strategies;
17683}
17684} // End anonymous namespace.
17685
17687 // TODO: Extend this to other binops using generic identity logic
17688 assert(N->getOpcode() == RISCVISD::ADD_VL);
17689 SDValue A = N->getOperand(0);
17690 SDValue B = N->getOperand(1);
17691 SDValue Passthru = N->getOperand(2);
17692 if (!Passthru.isUndef())
17693 // TODO:This could be a vmerge instead
17694 return SDValue();
17695 ;
17697 return A;
17698 // Peek through fixed to scalable
17699 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
17700 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
17701 return A;
17702 return SDValue();
17703}
17704
17705/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
17706/// The supported combines are:
17707/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
17708/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17709/// mul | mul_vl -> vwmul(u) | vwmul_su
17710/// shl | shl_vl -> vwsll
17711/// fadd_vl -> vfwadd | vfwadd_w
17712/// fsub_vl -> vfwsub | vfwsub_w
17713/// fmul_vl -> vfwmul
17714/// vwadd_w(u) -> vwadd(u)
17715/// vwsub_w(u) -> vwsub(u)
17716/// vfwadd_w -> vfwadd
17717/// vfwsub_w -> vfwsub
17720 const RISCVSubtarget &Subtarget) {
17721 SelectionDAG &DAG = DCI.DAG;
17722 if (DCI.isBeforeLegalize())
17723 return SDValue();
17724
17725 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
17726 return SDValue();
17727
17728 SmallVector<SDNode *> Worklist;
17729 SmallPtrSet<SDNode *, 8> Inserted;
17730 Worklist.push_back(N);
17731 Inserted.insert(N);
17732 SmallVector<CombineResult> CombinesToApply;
17733
17734 while (!Worklist.empty()) {
17735 SDNode *Root = Worklist.pop_back_val();
17736
17737 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
17738 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
17739 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
17740 &Inserted](const NodeExtensionHelper &Op) {
17741 if (Op.needToPromoteOtherUsers()) {
17742 for (SDUse &Use : Op.OrigOperand->uses()) {
17743 SDNode *TheUser = Use.getUser();
17744 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
17745 return false;
17746 // We only support the first 2 operands of FMA.
17747 if (Use.getOperandNo() >= 2)
17748 return false;
17749 if (Inserted.insert(TheUser).second)
17750 Worklist.push_back(TheUser);
17751 }
17752 }
17753 return true;
17754 };
17755
17756 // Control the compile time by limiting the number of node we look at in
17757 // total.
17758 if (Inserted.size() > ExtensionMaxWebSize)
17759 return SDValue();
17760
17762 NodeExtensionHelper::getSupportedFoldings(Root);
17763
17764 assert(!FoldingStrategies.empty() && "Nothing to be folded");
17765 bool Matched = false;
17766 for (int Attempt = 0;
17767 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
17768 ++Attempt) {
17769
17770 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
17771 FoldingStrategies) {
17772 std::optional<CombineResult> Res =
17773 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
17774 if (Res) {
17775 Matched = true;
17776 CombinesToApply.push_back(*Res);
17777 // All the inputs that are extended need to be folded, otherwise
17778 // we would be leaving the old input (since it is may still be used),
17779 // and the new one.
17780 if (Res->LHSExt.has_value())
17781 if (!AppendUsersIfNeeded(LHS))
17782 return SDValue();
17783 if (Res->RHSExt.has_value())
17784 if (!AppendUsersIfNeeded(RHS))
17785 return SDValue();
17786 break;
17787 }
17788 }
17789 std::swap(LHS, RHS);
17790 }
17791 // Right now we do an all or nothing approach.
17792 if (!Matched)
17793 return SDValue();
17794 }
17795 // Store the value for the replacement of the input node separately.
17796 SDValue InputRootReplacement;
17797 // We do the RAUW after we materialize all the combines, because some replaced
17798 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
17799 // some of these nodes may appear in the NodeExtensionHelpers of some of the
17800 // yet-to-be-visited CombinesToApply roots.
17802 ValuesToReplace.reserve(CombinesToApply.size());
17803 for (CombineResult Res : CombinesToApply) {
17804 SDValue NewValue = Res.materialize(DAG, Subtarget);
17805 if (!InputRootReplacement) {
17806 assert(Res.Root == N &&
17807 "First element is expected to be the current node");
17808 InputRootReplacement = NewValue;
17809 } else {
17810 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
17811 }
17812 }
17813 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
17814 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
17815 DCI.AddToWorklist(OldNewValues.second.getNode());
17816 }
17817 return InputRootReplacement;
17818}
17819
17820// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
17821// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
17822// y will be the Passthru and cond will be the Mask.
17824 unsigned Opc = N->getOpcode();
17825 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17826 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17827
17828 SDValue Y = N->getOperand(0);
17829 SDValue MergeOp = N->getOperand(1);
17830 unsigned MergeOpc = MergeOp.getOpcode();
17831
17832 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
17833 return SDValue();
17834
17835 SDValue X = MergeOp->getOperand(1);
17836
17837 if (!MergeOp.hasOneUse())
17838 return SDValue();
17839
17840 // Passthru should be undef
17841 SDValue Passthru = N->getOperand(2);
17842 if (!Passthru.isUndef())
17843 return SDValue();
17844
17845 // Mask should be all ones
17846 SDValue Mask = N->getOperand(3);
17847 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
17848 return SDValue();
17849
17850 // False value of MergeOp should be all zeros
17851 SDValue Z = MergeOp->getOperand(2);
17852
17853 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
17854 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
17855 Z = Z.getOperand(1);
17856
17857 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
17858 return SDValue();
17859
17860 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
17861 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
17862 N->getFlags());
17863}
17864
17867 const RISCVSubtarget &Subtarget) {
17868 [[maybe_unused]] unsigned Opc = N->getOpcode();
17869 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17870 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17871
17872 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17873 return V;
17874
17875 return combineVWADDSUBWSelect(N, DCI.DAG);
17876}
17877
17878// Helper function for performMemPairCombine.
17879// Try to combine the memory loads/stores LSNode1 and LSNode2
17880// into a single memory pair operation.
17882 LSBaseSDNode *LSNode2, SDValue BasePtr,
17883 uint64_t Imm) {
17885 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
17886
17887 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
17888 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
17889 return SDValue();
17890
17892 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17893
17894 // The new operation has twice the width.
17895 MVT XLenVT = Subtarget.getXLenVT();
17896 EVT MemVT = LSNode1->getMemoryVT();
17897 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
17898 MachineMemOperand *MMO = LSNode1->getMemOperand();
17900 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
17901
17902 if (LSNode1->getOpcode() == ISD::LOAD) {
17903 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
17904 unsigned Opcode;
17905 if (MemVT == MVT::i32)
17906 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
17907 else
17908 Opcode = RISCVISD::TH_LDD;
17909
17910 SDValue Res = DAG.getMemIntrinsicNode(
17911 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
17912 {LSNode1->getChain(), BasePtr,
17913 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
17914 NewMemVT, NewMMO);
17915
17916 SDValue Node1 =
17917 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
17918 SDValue Node2 =
17919 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
17920
17921 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
17922 return Node1;
17923 } else {
17924 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
17925
17926 SDValue Res = DAG.getMemIntrinsicNode(
17927 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
17928 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
17929 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
17930 NewMemVT, NewMMO);
17931
17932 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
17933 return Res;
17934 }
17935}
17936
17937// Try to combine two adjacent loads/stores to a single pair instruction from
17938// the XTHeadMemPair vendor extension.
17941 SelectionDAG &DAG = DCI.DAG;
17943 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
17944
17945 // Target does not support load/store pair.
17946 if (!Subtarget.hasVendorXTHeadMemPair())
17947 return SDValue();
17948
17949 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
17950 EVT MemVT = LSNode1->getMemoryVT();
17951 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
17952
17953 // No volatile, indexed or atomic loads/stores.
17954 if (!LSNode1->isSimple() || LSNode1->isIndexed())
17955 return SDValue();
17956
17957 // Function to get a base + constant representation from a memory value.
17958 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
17959 if (Ptr->getOpcode() == ISD::ADD)
17960 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
17961 return {Ptr->getOperand(0), C1->getZExtValue()};
17962 return {Ptr, 0};
17963 };
17964
17965 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
17966
17967 SDValue Chain = N->getOperand(0);
17968 for (SDUse &Use : Chain->uses()) {
17969 if (Use.getUser() != N && Use.getResNo() == 0 &&
17970 Use.getUser()->getOpcode() == N->getOpcode()) {
17972
17973 // No volatile, indexed or atomic loads/stores.
17974 if (!LSNode2->isSimple() || LSNode2->isIndexed())
17975 continue;
17976
17977 // Check if LSNode1 and LSNode2 have the same type and extension.
17978 if (LSNode1->getOpcode() == ISD::LOAD)
17979 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
17981 continue;
17982
17983 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
17984 continue;
17985
17986 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
17987
17988 // Check if the base pointer is the same for both instruction.
17989 if (Base1 != Base2)
17990 continue;
17991
17992 // Check if the offsets match the XTHeadMemPair encoding constraints.
17993 bool Valid = false;
17994 if (MemVT == MVT::i32) {
17995 // Check for adjacent i32 values and a 2-bit index.
17996 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
17997 Valid = true;
17998 } else if (MemVT == MVT::i64) {
17999 // Check for adjacent i64 values and a 2-bit index.
18000 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
18001 Valid = true;
18002 }
18003
18004 if (!Valid)
18005 continue;
18006
18007 // Try to combine.
18008 if (SDValue Res =
18009 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
18010 return Res;
18011 }
18012 }
18013
18014 return SDValue();
18015}
18016
18017// Fold
18018// (fp_to_int (froundeven X)) -> fcvt X, rne
18019// (fp_to_int (ftrunc X)) -> fcvt X, rtz
18020// (fp_to_int (ffloor X)) -> fcvt X, rdn
18021// (fp_to_int (fceil X)) -> fcvt X, rup
18022// (fp_to_int (fround X)) -> fcvt X, rmm
18023// (fp_to_int (frint X)) -> fcvt X
18026 const RISCVSubtarget &Subtarget) {
18027 SelectionDAG &DAG = DCI.DAG;
18028 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18029 MVT XLenVT = Subtarget.getXLenVT();
18030
18031 SDValue Src = N->getOperand(0);
18032
18033 // Don't do this for strict-fp Src.
18034 if (Src->isStrictFPOpcode())
18035 return SDValue();
18036
18037 // Ensure the FP type is legal.
18038 if (!TLI.isTypeLegal(Src.getValueType()))
18039 return SDValue();
18040
18041 // Don't do this for f16 with Zfhmin and not Zfh.
18042 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18043 return SDValue();
18044
18045 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18046 // If the result is invalid, we didn't find a foldable instruction.
18047 if (FRM == RISCVFPRndMode::Invalid)
18048 return SDValue();
18049
18050 SDLoc DL(N);
18051 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
18052 EVT VT = N->getValueType(0);
18053
18054 if (VT.isVector() && TLI.isTypeLegal(VT)) {
18055 MVT SrcVT = Src.getSimpleValueType();
18056 MVT SrcContainerVT = SrcVT;
18057 MVT ContainerVT = VT.getSimpleVT();
18058 SDValue XVal = Src.getOperand(0);
18059
18060 // For widening and narrowing conversions we just combine it into a
18061 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
18062 // end up getting lowered to their appropriate pseudo instructions based on
18063 // their operand types
18064 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
18065 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
18066 return SDValue();
18067
18068 // Make fixed-length vectors scalable first
18069 if (SrcVT.isFixedLengthVector()) {
18070 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
18071 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
18072 ContainerVT =
18073 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
18074 }
18075
18076 auto [Mask, VL] =
18077 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
18078
18079 SDValue FpToInt;
18080 if (FRM == RISCVFPRndMode::RTZ) {
18081 // Use the dedicated trunc static rounding mode if we're truncating so we
18082 // don't need to generate calls to fsrmi/fsrm
18083 unsigned Opc =
18084 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
18085 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
18086 } else {
18087 unsigned Opc =
18088 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
18089 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
18090 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
18091 }
18092
18093 // If converted from fixed-length to scalable, convert back
18094 if (VT.isFixedLengthVector())
18095 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
18096
18097 return FpToInt;
18098 }
18099
18100 // Only handle XLen or i32 types. Other types narrower than XLen will
18101 // eventually be legalized to XLenVT.
18102 if (VT != MVT::i32 && VT != XLenVT)
18103 return SDValue();
18104
18105 unsigned Opc;
18106 if (VT == XLenVT)
18107 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18108 else
18109 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18110
18111 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
18112 DAG.getTargetConstant(FRM, DL, XLenVT));
18113 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
18114}
18115
18116// Fold
18117// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
18118// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
18119// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
18120// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
18121// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
18122// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
18125 const RISCVSubtarget &Subtarget) {
18126 SelectionDAG &DAG = DCI.DAG;
18127 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18128 MVT XLenVT = Subtarget.getXLenVT();
18129
18130 // Only handle XLen types. Other types narrower than XLen will eventually be
18131 // legalized to XLenVT.
18132 EVT DstVT = N->getValueType(0);
18133 if (DstVT != XLenVT)
18134 return SDValue();
18135
18136 SDValue Src = N->getOperand(0);
18137
18138 // Don't do this for strict-fp Src.
18139 if (Src->isStrictFPOpcode())
18140 return SDValue();
18141
18142 // Ensure the FP type is also legal.
18143 if (!TLI.isTypeLegal(Src.getValueType()))
18144 return SDValue();
18145
18146 // Don't do this for f16 with Zfhmin and not Zfh.
18147 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18148 return SDValue();
18149
18150 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18151
18152 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18153 if (FRM == RISCVFPRndMode::Invalid)
18154 return SDValue();
18155
18156 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
18157
18158 unsigned Opc;
18159 if (SatVT == DstVT)
18160 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18161 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
18162 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18163 else
18164 return SDValue();
18165 // FIXME: Support other SatVTs by clamping before or after the conversion.
18166
18167 Src = Src.getOperand(0);
18168
18169 SDLoc DL(N);
18170 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
18171 DAG.getTargetConstant(FRM, DL, XLenVT));
18172
18173 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
18174 // extend.
18175 if (Opc == RISCVISD::FCVT_WU_RV64)
18176 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
18177
18178 // RISC-V FP-to-int conversions saturate to the destination register size, but
18179 // don't produce 0 for nan.
18180 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
18181 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
18182}
18183
18184// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
18185// smaller than XLenVT.
18187 const RISCVSubtarget &Subtarget) {
18188 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
18189
18190 SDValue Src = N->getOperand(0);
18191 if (Src.getOpcode() != ISD::BSWAP)
18192 return SDValue();
18193
18194 EVT VT = N->getValueType(0);
18195 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
18197 return SDValue();
18198
18199 SDLoc DL(N);
18200 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
18201}
18202
18204 const RISCVSubtarget &Subtarget) {
18205 // Fold:
18206 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
18207
18208 // Check if its first operand is a vp.load.
18209 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
18210 if (!VPLoad)
18211 return SDValue();
18212
18213 EVT LoadVT = VPLoad->getValueType(0);
18214 // We do not have a strided_load version for masks, and the evl of vp.reverse
18215 // and vp.load should always be the same.
18216 if (!LoadVT.getVectorElementType().isByteSized() ||
18217 N->getOperand(2) != VPLoad->getVectorLength() ||
18218 !N->getOperand(0).hasOneUse())
18219 return SDValue();
18220
18221 // Check if the mask of outer vp.reverse are all 1's.
18222 if (!isOneOrOneSplat(N->getOperand(1)))
18223 return SDValue();
18224
18225 SDValue LoadMask = VPLoad->getMask();
18226 // If Mask is all ones, then load is unmasked and can be reversed.
18227 if (!isOneOrOneSplat(LoadMask)) {
18228 // If the mask is not all ones, we can reverse the load if the mask was also
18229 // reversed by an unmasked vp.reverse with the same EVL.
18230 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18231 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
18232 LoadMask.getOperand(2) != VPLoad->getVectorLength())
18233 return SDValue();
18234 LoadMask = LoadMask.getOperand(0);
18235 }
18236
18237 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
18238 SDLoc DL(N);
18239 MVT XLenVT = Subtarget.getXLenVT();
18240 SDValue NumElem = VPLoad->getVectorLength();
18241 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
18242
18243 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18244 DAG.getConstant(1, DL, XLenVT));
18245 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18246 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18247 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
18248 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18249
18251 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
18253 PtrInfo, VPLoad->getMemOperand()->getFlags(),
18254 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
18255
18256 SDValue Ret = DAG.getStridedLoadVP(
18257 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
18258 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
18259
18260 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
18261
18262 return Ret;
18263}
18264
18266 const RISCVSubtarget &Subtarget) {
18267 // Fold:
18268 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
18269 // -1, MASK)
18270 auto *VPStore = cast<VPStoreSDNode>(N);
18271
18272 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
18273 return SDValue();
18274
18275 SDValue VPReverse = VPStore->getValue();
18276 EVT ReverseVT = VPReverse->getValueType(0);
18277
18278 // We do not have a strided_store version for masks, and the evl of vp.reverse
18279 // and vp.store should always be the same.
18280 if (!ReverseVT.getVectorElementType().isByteSized() ||
18281 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
18282 !VPReverse.hasOneUse())
18283 return SDValue();
18284
18285 SDValue StoreMask = VPStore->getMask();
18286 // If Mask is all ones, then load is unmasked and can be reversed.
18287 if (!isOneOrOneSplat(StoreMask)) {
18288 // If the mask is not all ones, we can reverse the store if the mask was
18289 // also reversed by an unmasked vp.reverse with the same EVL.
18290 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18291 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
18292 StoreMask.getOperand(2) != VPStore->getVectorLength())
18293 return SDValue();
18294 StoreMask = StoreMask.getOperand(0);
18295 }
18296
18297 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
18298 SDLoc DL(N);
18299 MVT XLenVT = Subtarget.getXLenVT();
18300 SDValue NumElem = VPStore->getVectorLength();
18301 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
18302
18303 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18304 DAG.getConstant(1, DL, XLenVT));
18305 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18306 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18307 SDValue Base =
18308 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
18309 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18310
18312 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
18314 PtrInfo, VPStore->getMemOperand()->getFlags(),
18315 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
18316
18317 return DAG.getStridedStoreVP(
18318 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
18319 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
18320 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
18321 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
18322}
18323
18324// Peephole avgceil pattern.
18325// %1 = zext <N x i8> %a to <N x i32>
18326// %2 = zext <N x i8> %b to <N x i32>
18327// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
18328// %4 = add nuw nsw <N x i32> %3, %2
18329// %5 = lshr <N x i32> %4, splat (i32 1)
18330// %6 = trunc <N x i32> %5 to <N x i8>
18332 const RISCVSubtarget &Subtarget) {
18333 EVT VT = N->getValueType(0);
18334
18335 // Ignore fixed vectors.
18336 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18337 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
18338 return SDValue();
18339
18340 SDValue In = N->getOperand(0);
18341 SDValue Mask = N->getOperand(1);
18342 SDValue VL = N->getOperand(2);
18343
18344 // Input should be a vp_srl with same mask and VL.
18345 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
18346 In.getOperand(3) != VL)
18347 return SDValue();
18348
18349 // Shift amount should be 1.
18350 if (!isOneOrOneSplat(In.getOperand(1)))
18351 return SDValue();
18352
18353 // Shifted value should be a vp_add with same mask and VL.
18354 SDValue LHS = In.getOperand(0);
18355 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
18356 LHS.getOperand(3) != VL)
18357 return SDValue();
18358
18359 SDValue Operands[3];
18360
18361 // Matches another VP_ADD with same VL and Mask.
18362 auto FindAdd = [&](SDValue V, SDValue Other) {
18363 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
18364 V.getOperand(3) != VL)
18365 return false;
18366
18367 Operands[0] = Other;
18368 Operands[1] = V.getOperand(1);
18369 Operands[2] = V.getOperand(0);
18370 return true;
18371 };
18372
18373 // We need to find another VP_ADD in one of the operands.
18374 SDValue LHS0 = LHS.getOperand(0);
18375 SDValue LHS1 = LHS.getOperand(1);
18376 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
18377 return SDValue();
18378
18379 // Now we have three operands of two additions. Check that one of them is a
18380 // constant vector with ones.
18381 auto I = llvm::find_if(Operands,
18382 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
18383 if (I == std::end(Operands))
18384 return SDValue();
18385 // We found a vector with ones, move if it to the end of the Operands array.
18386 std::swap(*I, Operands[2]);
18387
18388 // Make sure the other 2 operands can be promoted from the result type.
18389 for (SDValue Op : drop_end(Operands)) {
18390 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
18391 Op.getOperand(2) != VL)
18392 return SDValue();
18393 // Input must be the same size or smaller than our result.
18394 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
18395 return SDValue();
18396 }
18397
18398 // Pattern is detected.
18399 // Rebuild the zero extends in case the inputs are smaller than our result.
18400 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
18401 Operands[0].getOperand(0), Mask, VL);
18402 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
18403 Operands[1].getOperand(0), Mask, VL);
18404 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
18405 // mode.
18406 SDLoc DL(N);
18407 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
18408 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
18409}
18410
18411// Convert from one FMA opcode to another based on whether we are negating the
18412// multiply result and/or the accumulator.
18413// NOTE: Only supports RVV operations with VL.
18414static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
18415 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
18416 if (NegMul) {
18417 // clang-format off
18418 switch (Opcode) {
18419 default: llvm_unreachable("Unexpected opcode");
18420 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18421 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18422 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18423 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18424 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18425 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18426 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18427 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18428 }
18429 // clang-format on
18430 }
18431
18432 // Negating the accumulator changes ADD<->SUB.
18433 if (NegAcc) {
18434 // clang-format off
18435 switch (Opcode) {
18436 default: llvm_unreachable("Unexpected opcode");
18437 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18438 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18439 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18440 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18441 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18442 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18443 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18444 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18445 }
18446 // clang-format on
18447 }
18448
18449 return Opcode;
18450}
18451
18453 // Fold FNEG_VL into FMA opcodes.
18454 // The first operand of strict-fp is chain.
18455 bool IsStrict =
18456 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
18457 unsigned Offset = IsStrict ? 1 : 0;
18458 SDValue A = N->getOperand(0 + Offset);
18459 SDValue B = N->getOperand(1 + Offset);
18460 SDValue C = N->getOperand(2 + Offset);
18461 SDValue Mask = N->getOperand(3 + Offset);
18462 SDValue VL = N->getOperand(4 + Offset);
18463
18464 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
18465 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
18466 V.getOperand(2) == VL) {
18467 // Return the negated input.
18468 V = V.getOperand(0);
18469 return true;
18470 }
18471
18472 return false;
18473 };
18474
18475 bool NegA = invertIfNegative(A);
18476 bool NegB = invertIfNegative(B);
18477 bool NegC = invertIfNegative(C);
18478
18479 // If no operands are negated, we're done.
18480 if (!NegA && !NegB && !NegC)
18481 return SDValue();
18482
18483 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
18484 if (IsStrict)
18485 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
18486 {N->getOperand(0), A, B, C, Mask, VL});
18487 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
18488 VL);
18489}
18490
18493 const RISCVSubtarget &Subtarget) {
18494 SelectionDAG &DAG = DCI.DAG;
18495
18497 return V;
18498
18499 // FIXME: Ignore strict opcodes for now.
18500 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
18501 return SDValue();
18502
18503 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18504}
18505
18507 const RISCVSubtarget &Subtarget) {
18508 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
18509
18510 EVT VT = N->getValueType(0);
18511
18512 if (VT != Subtarget.getXLenVT())
18513 return SDValue();
18514
18515 if (!isa<ConstantSDNode>(N->getOperand(1)))
18516 return SDValue();
18517 uint64_t ShAmt = N->getConstantOperandVal(1);
18518
18519 SDValue N0 = N->getOperand(0);
18520
18521 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
18522 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
18523 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
18524 unsigned ExtSize =
18525 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
18526 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
18527 N0.getOperand(0).hasOneUse() &&
18529 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
18530 if (LShAmt < ExtSize) {
18531 unsigned Size = VT.getSizeInBits();
18532 SDLoc ShlDL(N0.getOperand(0));
18533 SDValue Shl =
18534 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
18535 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
18536 SDLoc DL(N);
18537 return DAG.getNode(ISD::SRA, DL, VT, Shl,
18538 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
18539 }
18540 }
18541 }
18542
18543 if (ShAmt > 32 || VT != MVT::i64)
18544 return SDValue();
18545
18546 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
18547 // FIXME: Should this be a generic combine? There's a similar combine on X86.
18548 //
18549 // Also try these folds where an add or sub is in the middle.
18550 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
18551 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
18552 SDValue Shl;
18553 ConstantSDNode *AddC = nullptr;
18554
18555 // We might have an ADD or SUB between the SRA and SHL.
18556 bool IsAdd = N0.getOpcode() == ISD::ADD;
18557 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
18558 // Other operand needs to be a constant we can modify.
18559 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
18560 if (!AddC)
18561 return SDValue();
18562
18563 // AddC needs to have at least 32 trailing zeros.
18564 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
18565 return SDValue();
18566
18567 // All users should be a shift by constant less than or equal to 32. This
18568 // ensures we'll do this optimization for each of them to produce an
18569 // add/sub+sext_inreg they can all share.
18570 for (SDNode *U : N0->users()) {
18571 if (U->getOpcode() != ISD::SRA ||
18572 !isa<ConstantSDNode>(U->getOperand(1)) ||
18573 U->getConstantOperandVal(1) > 32)
18574 return SDValue();
18575 }
18576
18577 Shl = N0.getOperand(IsAdd ? 0 : 1);
18578 } else {
18579 // Not an ADD or SUB.
18580 Shl = N0;
18581 }
18582
18583 // Look for a shift left by 32.
18584 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
18585 Shl.getConstantOperandVal(1) != 32)
18586 return SDValue();
18587
18588 // We if we didn't look through an add/sub, then the shl should have one use.
18589 // If we did look through an add/sub, the sext_inreg we create is free so
18590 // we're only creating 2 new instructions. It's enough to only remove the
18591 // original sra+add/sub.
18592 if (!AddC && !Shl.hasOneUse())
18593 return SDValue();
18594
18595 SDLoc DL(N);
18596 SDValue In = Shl.getOperand(0);
18597
18598 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
18599 // constant.
18600 if (AddC) {
18601 SDValue ShiftedAddC =
18602 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
18603 if (IsAdd)
18604 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
18605 else
18606 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
18607 }
18608
18609 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
18610 DAG.getValueType(MVT::i32));
18611 if (ShAmt == 32)
18612 return SExt;
18613
18614 return DAG.getNode(
18615 ISD::SHL, DL, MVT::i64, SExt,
18616 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
18617}
18618
18619// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
18620// the result is used as the condition of a br_cc or select_cc we can invert,
18621// inverting the setcc is free, and Z is 0/1. Caller will invert the
18622// br_cc/select_cc.
18624 bool IsAnd = Cond.getOpcode() == ISD::AND;
18625 if (!IsAnd && Cond.getOpcode() != ISD::OR)
18626 return SDValue();
18627
18628 if (!Cond.hasOneUse())
18629 return SDValue();
18630
18631 SDValue Setcc = Cond.getOperand(0);
18632 SDValue Xor = Cond.getOperand(1);
18633 // Canonicalize setcc to LHS.
18634 if (Setcc.getOpcode() != ISD::SETCC)
18635 std::swap(Setcc, Xor);
18636 // LHS should be a setcc and RHS should be an xor.
18637 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
18638 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
18639 return SDValue();
18640
18641 // If the condition is an And, SimplifyDemandedBits may have changed
18642 // (xor Z, 1) to (not Z).
18643 SDValue Xor1 = Xor.getOperand(1);
18644 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
18645 return SDValue();
18646
18647 EVT VT = Cond.getValueType();
18648 SDValue Xor0 = Xor.getOperand(0);
18649
18650 // The LHS of the xor needs to be 0/1.
18652 if (!DAG.MaskedValueIsZero(Xor0, Mask))
18653 return SDValue();
18654
18655 // We can only invert integer setccs.
18656 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
18657 if (!SetCCOpVT.isScalarInteger())
18658 return SDValue();
18659
18660 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
18661 if (ISD::isIntEqualitySetCC(CCVal)) {
18662 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
18663 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
18664 Setcc.getOperand(1), CCVal);
18665 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
18666 // Invert (setlt 0, X) by converting to (setlt X, 1).
18667 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
18668 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
18669 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
18670 // (setlt X, 1) by converting to (setlt 0, X).
18671 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
18672 DAG.getConstant(0, SDLoc(Setcc), VT),
18673 Setcc.getOperand(0), CCVal);
18674 } else
18675 return SDValue();
18676
18677 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
18678 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
18679}
18680
18681// Perform common combines for BR_CC and SELECT_CC conditions.
18682static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
18683 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
18684 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
18685
18686 // As far as arithmetic right shift always saves the sign,
18687 // shift can be omitted.
18688 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
18689 // setge (sra X, N), 0 -> setge X, 0
18690 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
18691 LHS.getOpcode() == ISD::SRA) {
18692 LHS = LHS.getOperand(0);
18693 return true;
18694 }
18695
18696 if (!ISD::isIntEqualitySetCC(CCVal))
18697 return false;
18698
18699 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
18700 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
18701 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
18702 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
18703 // If we're looking for eq 0 instead of ne 0, we need to invert the
18704 // condition.
18705 bool Invert = CCVal == ISD::SETEQ;
18706 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
18707 if (Invert)
18708 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18709
18710 RHS = LHS.getOperand(1);
18711 LHS = LHS.getOperand(0);
18712 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
18713
18714 CC = DAG.getCondCode(CCVal);
18715 return true;
18716 }
18717
18718 // If XOR is reused and has an immediate that will fit in XORI,
18719 // do not fold.
18720 auto isXorImmediate = [](const SDValue &Op) -> bool {
18721 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
18722 return isInt<12>(XorCnst->getSExtValue());
18723 return false;
18724 };
18725 // Fold (X(i1) ^ 1) == 0 -> X != 0
18726 auto singleBitOp = [&DAG](const SDValue &VarOp,
18727 const SDValue &ConstOp) -> bool {
18728 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
18729 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
18730 return (XorCnst->getSExtValue() == 1) &&
18731 DAG.MaskedValueIsZero(VarOp, Mask);
18732 }
18733 return false;
18734 };
18735 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
18736 for (const SDNode *UserNode : Op->users()) {
18737 const unsigned Opcode = UserNode->getOpcode();
18738 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
18739 return false;
18740 }
18741 return true;
18742 };
18743 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
18744 const SDValue &LHS, const SDValue &RHS) -> bool {
18745 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
18746 (!isXorImmediate(LHS.getOperand(1)) ||
18747 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
18748 onlyUsedBySelectOrBR(LHS));
18749 };
18750 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
18751 if (isFoldableXorEq(LHS, RHS)) {
18752 RHS = LHS.getOperand(1);
18753 LHS = LHS.getOperand(0);
18754 return true;
18755 }
18756 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
18757 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
18758 const SDValue LHS0 = LHS.getOperand(0);
18759 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
18760 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
18761 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18762 LHS0.getOperand(1), LHS.getOperand(1));
18763 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18764 LHS0.getOperand(0), LHS.getOperand(1));
18765 return true;
18766 }
18767 }
18768
18769 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
18770 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
18771 LHS.getOperand(1).getOpcode() == ISD::Constant) {
18772 SDValue LHS0 = LHS.getOperand(0);
18773 if (LHS0.getOpcode() == ISD::AND &&
18774 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
18775 uint64_t Mask = LHS0.getConstantOperandVal(1);
18776 uint64_t ShAmt = LHS.getConstantOperandVal(1);
18777 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
18778 // XAndesPerf supports branch on test bit.
18779 if (Subtarget.hasVendorXAndesPerf()) {
18780 LHS =
18781 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
18782 DAG.getConstant(Mask, DL, LHS.getValueType()));
18783 return true;
18784 }
18785
18786 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
18787 CC = DAG.getCondCode(CCVal);
18788
18789 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
18790 LHS = LHS0.getOperand(0);
18791 if (ShAmt != 0)
18792 LHS =
18793 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
18794 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
18795 return true;
18796 }
18797 }
18798 }
18799
18800 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
18801 // This can occur when legalizing some floating point comparisons.
18802 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
18803 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
18804 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18805 CC = DAG.getCondCode(CCVal);
18806 RHS = DAG.getConstant(0, DL, LHS.getValueType());
18807 return true;
18808 }
18809
18810 if (isNullConstant(RHS)) {
18811 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
18812 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18813 CC = DAG.getCondCode(CCVal);
18814 LHS = NewCond;
18815 return true;
18816 }
18817 }
18818
18819 return false;
18820}
18821
18822// Fold
18823// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
18824// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
18825// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
18826// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
18827// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
18828// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
18830 SDValue TrueVal, SDValue FalseVal,
18831 bool Swapped) {
18832 bool Commutative = true;
18833 unsigned Opc = TrueVal.getOpcode();
18834 switch (Opc) {
18835 default:
18836 return SDValue();
18837 case ISD::SHL:
18838 case ISD::SRA:
18839 case ISD::SRL:
18840 case ISD::SUB:
18841 case ISD::ROTL:
18842 case ISD::ROTR:
18843 Commutative = false;
18844 break;
18845 case ISD::ADD:
18846 case ISD::OR:
18847 case ISD::XOR:
18848 case ISD::UMIN:
18849 case ISD::UMAX:
18850 break;
18851 }
18852
18853 if (!TrueVal.hasOneUse())
18854 return SDValue();
18855
18856 unsigned OpToFold;
18857 if (FalseVal == TrueVal.getOperand(0))
18858 OpToFold = 0;
18859 else if (Commutative && FalseVal == TrueVal.getOperand(1))
18860 OpToFold = 1;
18861 else
18862 return SDValue();
18863
18864 EVT VT = N->getValueType(0);
18865 SDLoc DL(N);
18866 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
18867 EVT OtherOpVT = OtherOp.getValueType();
18868 SDValue IdentityOperand =
18869 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
18870 if (!Commutative)
18871 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
18872 assert(IdentityOperand && "No identity operand!");
18873
18874 if (Swapped)
18875 std::swap(OtherOp, IdentityOperand);
18876 SDValue NewSel =
18877 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
18878 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
18879}
18880
18881// This tries to get rid of `select` and `icmp` that are being used to handle
18882// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
18884 SDValue Cond = N->getOperand(0);
18885
18886 // This represents either CTTZ or CTLZ instruction.
18887 SDValue CountZeroes;
18888
18889 SDValue ValOnZero;
18890
18891 if (Cond.getOpcode() != ISD::SETCC)
18892 return SDValue();
18893
18894 if (!isNullConstant(Cond->getOperand(1)))
18895 return SDValue();
18896
18897 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
18898 if (CCVal == ISD::CondCode::SETEQ) {
18899 CountZeroes = N->getOperand(2);
18900 ValOnZero = N->getOperand(1);
18901 } else if (CCVal == ISD::CondCode::SETNE) {
18902 CountZeroes = N->getOperand(1);
18903 ValOnZero = N->getOperand(2);
18904 } else {
18905 return SDValue();
18906 }
18907
18908 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
18909 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
18910 CountZeroes = CountZeroes.getOperand(0);
18911
18912 if (CountZeroes.getOpcode() != ISD::CTTZ &&
18913 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
18914 CountZeroes.getOpcode() != ISD::CTLZ &&
18915 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
18916 return SDValue();
18917
18918 if (!isNullConstant(ValOnZero))
18919 return SDValue();
18920
18921 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
18922 if (Cond->getOperand(0) != CountZeroesArgument)
18923 return SDValue();
18924
18925 unsigned BitWidth = CountZeroes.getValueSizeInBits();
18926 if (!isPowerOf2_32(BitWidth))
18927 return SDValue();
18928
18929 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
18930 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
18931 CountZeroes.getValueType(), CountZeroesArgument);
18932 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
18933 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
18934 CountZeroes.getValueType(), CountZeroesArgument);
18935 }
18936
18937 SDValue BitWidthMinusOne =
18938 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
18939
18940 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
18941 CountZeroes, BitWidthMinusOne);
18942 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
18943}
18944
18946 const RISCVSubtarget &Subtarget) {
18947 SDValue Cond = N->getOperand(0);
18948 SDValue True = N->getOperand(1);
18949 SDValue False = N->getOperand(2);
18950 SDLoc DL(N);
18951 EVT VT = N->getValueType(0);
18952 EVT CondVT = Cond.getValueType();
18953
18954 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
18955 return SDValue();
18956
18957 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
18958 // BEXTI, where C is power of 2.
18959 if (Subtarget.hasBEXTILike() && VT.isScalarInteger() &&
18960 (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
18961 SDValue LHS = Cond.getOperand(0);
18962 SDValue RHS = Cond.getOperand(1);
18963 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
18964 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
18965 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
18966 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
18967 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
18968 return DAG.getSelect(DL, VT,
18969 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
18970 False, True);
18971 }
18972 }
18973 return SDValue();
18974}
18975
18976static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
18977 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
18978 return false;
18979
18980 SwapCC = false;
18981 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
18982 std::swap(TrueVal, FalseVal);
18983 SwapCC = true;
18984 }
18985
18986 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
18987 return false;
18988
18989 SDValue A = FalseVal.getOperand(0);
18990 SDValue B = FalseVal.getOperand(1);
18991 // Add is commutative, so check both orders
18992 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
18993 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
18994}
18995
18996/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
18997/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
18998/// register pressure over the add followed by masked vsub sequence.
19000 SDLoc DL(N);
19001 EVT VT = N->getValueType(0);
19002 SDValue CC = N->getOperand(0);
19003 SDValue TrueVal = N->getOperand(1);
19004 SDValue FalseVal = N->getOperand(2);
19005
19006 bool SwapCC;
19007 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
19008 return SDValue();
19009
19010 SDValue Sub = SwapCC ? TrueVal : FalseVal;
19011 SDValue A = Sub.getOperand(0);
19012 SDValue B = Sub.getOperand(1);
19013
19014 // Arrange the select such that we can match a masked
19015 // vrsub.vi to perform the conditional negate
19016 SDValue NegB = DAG.getNegative(B, DL, VT);
19017 if (!SwapCC)
19018 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19019 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19020 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19021}
19022
19024 const RISCVSubtarget &Subtarget) {
19025 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
19026 return Folded;
19027
19028 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
19029 return V;
19030
19031 if (Subtarget.hasConditionalMoveFusion())
19032 return SDValue();
19033
19034 SDValue TrueVal = N->getOperand(1);
19035 SDValue FalseVal = N->getOperand(2);
19036 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
19037 return V;
19038 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
19039}
19040
19041/// If we have a build_vector where each lane is binop X, C, where C
19042/// is a constant (but not necessarily the same constant on all lanes),
19043/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
19044/// We assume that materializing a constant build vector will be no more
19045/// expensive that performing O(n) binops.
19047 const RISCVSubtarget &Subtarget,
19048 const RISCVTargetLowering &TLI) {
19049 SDLoc DL(N);
19050 EVT VT = N->getValueType(0);
19051
19052 assert(!VT.isScalableVector() && "unexpected build vector");
19053
19054 if (VT.getVectorNumElements() == 1)
19055 return SDValue();
19056
19057 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
19058 if (!TLI.isBinOp(Opcode))
19059 return SDValue();
19060
19061 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
19062 return SDValue();
19063
19064 // This BUILD_VECTOR involves an implicit truncation, and sinking
19065 // truncates through binops is non-trivial.
19066 if (N->op_begin()->getValueType() != VT.getVectorElementType())
19067 return SDValue();
19068
19069 SmallVector<SDValue> LHSOps;
19070 SmallVector<SDValue> RHSOps;
19071 for (SDValue Op : N->ops()) {
19072 if (Op.isUndef()) {
19073 // We can't form a divide or remainder from undef.
19074 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
19075 return SDValue();
19076
19077 LHSOps.push_back(Op);
19078 RHSOps.push_back(Op);
19079 continue;
19080 }
19081
19082 // TODO: We can handle operations which have an neutral rhs value
19083 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
19084 // of profit in a more explicit manner.
19085 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
19086 return SDValue();
19087
19088 LHSOps.push_back(Op.getOperand(0));
19089 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
19090 !isa<ConstantFPSDNode>(Op.getOperand(1)))
19091 return SDValue();
19092 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19093 // have different LHS and RHS types.
19094 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
19095 return SDValue();
19096
19097 RHSOps.push_back(Op.getOperand(1));
19098 }
19099
19100 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
19101 DAG.getBuildVector(VT, DL, RHSOps));
19102}
19103
19105 ElementCount OpEC = OpVT.getVectorElementCount();
19106 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
19107 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
19108}
19109
19110/// Given fixed length vectors A and B with equal element types, but possibly
19111/// different number of elements, return A + B where either A or B is zero
19112/// padded to the larger number of elements.
19114 SelectionDAG &DAG) {
19115 // NOTE: Manually doing the extract/add/insert scheme produces
19116 // significantly better codegen than the naive pad with zeros
19117 // and add scheme.
19118 EVT AVT = A.getValueType();
19119 EVT BVT = B.getValueType();
19122 std::swap(A, B);
19123 std::swap(AVT, BVT);
19124 }
19125
19126 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
19127 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
19128 return DAG.getInsertSubvector(DL, B, Res, 0);
19129}
19130
19132 SelectionDAG &DAG,
19133 const RISCVSubtarget &Subtarget,
19134 const RISCVTargetLowering &TLI) {
19135 using namespace SDPatternMatch;
19136 // Note: We intentionally do not check the legality of the reduction type.
19137 // We want to handle the m4/m8 *src* types, and thus need to let illegal
19138 // intermediate types flow through here.
19139 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
19141 return SDValue();
19142
19143 // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
19144 // form).
19145 SDValue A, B;
19146 if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
19147 SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
19148 SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
19149 if (AOpt || BOpt) {
19150 if (AOpt)
19151 A = AOpt;
19152 if (BOpt)
19153 B = BOpt;
19154 // From here, we're doing A + B with mixed types, implicitly zero
19155 // padded to the wider type. Note that we *don't* need the result
19156 // type to be the original VT, and in fact prefer narrower ones
19157 // if possible.
19158 return getZeroPaddedAdd(DL, A, B, DAG);
19159 }
19160 }
19161
19162 // zext a <--> partial_reduce_umla 0, a, 1
19163 // sext a <--> partial_reduce_smla 0, a, 1
19164 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
19165 InVec.getOpcode() == ISD::SIGN_EXTEND) {
19166 SDValue A = InVec.getOperand(0);
19167 EVT OpVT = A.getValueType();
19168 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
19169 return SDValue();
19170
19171 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
19172 SDValue B = DAG.getConstant(0x1, DL, OpVT);
19173 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
19174 unsigned Opc =
19175 IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
19176 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
19177 }
19178
19179 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
19180 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
19181 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
19182 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
19183 if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
19184 return SDValue();
19185
19186 if (!ISD::isExtOpcode(A.getOpcode()))
19187 return SDValue();
19188
19189 EVT OpVT = A.getOperand(0).getValueType();
19190 if (OpVT.getVectorElementType() != MVT::i8 ||
19191 OpVT != B.getOperand(0).getValueType() ||
19192 !TLI.isTypeLegal(A.getValueType()))
19193 return SDValue();
19194
19195 unsigned Opc;
19196 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
19197 Opc = ISD::PARTIAL_REDUCE_SMLA;
19198 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19199 B.getOpcode() == ISD::ZERO_EXTEND)
19200 Opc = ISD::PARTIAL_REDUCE_UMLA;
19201 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
19202 B.getOpcode() == ISD::ZERO_EXTEND)
19203 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19204 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19205 B.getOpcode() == ISD::SIGN_EXTEND) {
19206 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19207 std::swap(A, B);
19208 } else
19209 return SDValue();
19210
19211 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
19212 return DAG.getNode(
19213 Opc, DL, ResVT,
19214 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
19215}
19216
19218 const RISCVSubtarget &Subtarget,
19219 const RISCVTargetLowering &TLI) {
19220 if (!Subtarget.hasStdExtZvqdotq())
19221 return SDValue();
19222
19223 SDLoc DL(N);
19224 EVT VT = N->getValueType(0);
19225 SDValue InVec = N->getOperand(0);
19226 if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
19227 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
19228 return SDValue();
19229}
19230
19232 const RISCVSubtarget &Subtarget,
19233 const RISCVTargetLowering &TLI) {
19234 SDValue InVec = N->getOperand(0);
19235 SDValue InVal = N->getOperand(1);
19236 SDValue EltNo = N->getOperand(2);
19237 SDLoc DL(N);
19238
19239 EVT VT = InVec.getValueType();
19240 if (VT.isScalableVector())
19241 return SDValue();
19242
19243 if (!InVec.hasOneUse())
19244 return SDValue();
19245
19246 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
19247 // move the insert_vector_elts into the arms of the binop. Note that
19248 // the new RHS must be a constant.
19249 const unsigned InVecOpcode = InVec->getOpcode();
19250 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
19251 InVal.hasOneUse()) {
19252 SDValue InVecLHS = InVec->getOperand(0);
19253 SDValue InVecRHS = InVec->getOperand(1);
19254 SDValue InValLHS = InVal->getOperand(0);
19255 SDValue InValRHS = InVal->getOperand(1);
19256
19258 return SDValue();
19259 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
19260 return SDValue();
19261 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19262 // have different LHS and RHS types.
19263 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
19264 return SDValue();
19266 InVecLHS, InValLHS, EltNo);
19268 InVecRHS, InValRHS, EltNo);
19269 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
19270 }
19271
19272 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
19273 // move the insert_vector_elt to the source operand of the concat_vector.
19274 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
19275 return SDValue();
19276
19277 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19278 if (!IndexC)
19279 return SDValue();
19280 unsigned Elt = IndexC->getZExtValue();
19281
19282 EVT ConcatVT = InVec.getOperand(0).getValueType();
19283 if (ConcatVT.getVectorElementType() != InVal.getValueType())
19284 return SDValue();
19285 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19286 unsigned NewIdx = Elt % ConcatNumElts;
19287
19288 unsigned ConcatOpIdx = Elt / ConcatNumElts;
19289 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
19290 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
19291
19292 SmallVector<SDValue> ConcatOps(InVec->ops());
19293 ConcatOps[ConcatOpIdx] = ConcatOp;
19294 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19295}
19296
19297// If we're concatenating a series of vector loads like
19298// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
19299// Then we can turn this into a strided load by widening the vector elements
19300// vlse32 p, stride=n
19302 const RISCVSubtarget &Subtarget,
19303 const RISCVTargetLowering &TLI) {
19304 SDLoc DL(N);
19305 EVT VT = N->getValueType(0);
19306
19307 // Only perform this combine on legal MVTs.
19308 if (!TLI.isTypeLegal(VT))
19309 return SDValue();
19310
19311 // TODO: Potentially extend this to scalable vectors
19312 if (VT.isScalableVector())
19313 return SDValue();
19314
19315 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
19316 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
19317 !SDValue(BaseLd, 0).hasOneUse())
19318 return SDValue();
19319
19320 EVT BaseLdVT = BaseLd->getValueType(0);
19321
19322 // Go through the loads and check that they're strided
19324 Lds.push_back(BaseLd);
19325 Align Align = BaseLd->getAlign();
19326 for (SDValue Op : N->ops().drop_front()) {
19327 auto *Ld = dyn_cast<LoadSDNode>(Op);
19328 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
19329 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
19330 Ld->getValueType(0) != BaseLdVT)
19331 return SDValue();
19332
19333 Lds.push_back(Ld);
19334
19335 // The common alignment is the most restrictive (smallest) of all the loads
19336 Align = std::min(Align, Ld->getAlign());
19337 }
19338
19339 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
19340 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
19341 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
19342 // If the load ptrs can be decomposed into a common (Base + Index) with a
19343 // common constant stride, then return the constant stride.
19344 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
19345 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
19346 if (BIO1.equalBaseIndex(BIO2, DAG))
19347 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
19348
19349 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
19350 SDValue P1 = Ld1->getBasePtr();
19351 SDValue P2 = Ld2->getBasePtr();
19352 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
19353 return {{P2.getOperand(1), false}};
19354 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
19355 return {{P1.getOperand(1), true}};
19356
19357 return std::nullopt;
19358 };
19359
19360 // Get the distance between the first and second loads
19361 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
19362 if (!BaseDiff)
19363 return SDValue();
19364
19365 // Check all the loads are the same distance apart
19366 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
19367 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
19368 return SDValue();
19369
19370 // TODO: At this point, we've successfully matched a generalized gather
19371 // load. Maybe we should emit that, and then move the specialized
19372 // matchers above and below into a DAG combine?
19373
19374 // Get the widened scalar type, e.g. v4i8 -> i64
19375 unsigned WideScalarBitWidth =
19376 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
19377 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
19378
19379 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
19380 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
19381 if (!TLI.isTypeLegal(WideVecVT))
19382 return SDValue();
19383
19384 // Check that the operation is legal
19385 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
19386 return SDValue();
19387
19388 auto [StrideVariant, MustNegateStride] = *BaseDiff;
19389 SDValue Stride =
19390 std::holds_alternative<SDValue>(StrideVariant)
19391 ? std::get<SDValue>(StrideVariant)
19392 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
19393 Lds[0]->getOffset().getValueType());
19394 if (MustNegateStride)
19395 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
19396
19397 SDValue AllOneMask =
19398 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
19399 DAG.getConstant(1, DL, MVT::i1));
19400
19401 uint64_t MemSize;
19402 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
19403 ConstStride && ConstStride->getSExtValue() >= 0)
19404 // total size = (elsize * n) + (stride - elsize) * (n-1)
19405 // = elsize + stride * (n-1)
19406 MemSize = WideScalarVT.getSizeInBits() +
19407 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
19408 else
19409 // If Stride isn't constant, then we can't know how much it will load
19411
19413 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
19414 Align);
19415
19416 SDValue StridedLoad = DAG.getStridedLoadVP(
19417 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
19418 AllOneMask,
19419 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
19420
19421 for (SDValue Ld : N->ops())
19422 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
19423
19424 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
19425}
19426
19428 const RISCVSubtarget &Subtarget,
19429 const RISCVTargetLowering &TLI) {
19430 SDLoc DL(N);
19431 EVT VT = N->getValueType(0);
19432 const unsigned ElementSize = VT.getScalarSizeInBits();
19433 const unsigned NumElts = VT.getVectorNumElements();
19434 SDValue V1 = N->getOperand(0);
19435 SDValue V2 = N->getOperand(1);
19436 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
19437 MVT XLenVT = Subtarget.getXLenVT();
19438
19439 // Recognized a disguised select of add/sub.
19440 bool SwapCC;
19441 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
19442 matchSelectAddSub(V1, V2, SwapCC)) {
19443 SDValue Sub = SwapCC ? V1 : V2;
19444 SDValue A = Sub.getOperand(0);
19445 SDValue B = Sub.getOperand(1);
19446
19447 SmallVector<SDValue> MaskVals;
19448 for (int MaskIndex : Mask) {
19449 bool SelectMaskVal = (MaskIndex < (int)NumElts);
19450 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
19451 }
19452 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
19453 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
19454 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
19455
19456 // Arrange the select such that we can match a masked
19457 // vrsub.vi to perform the conditional negate
19458 SDValue NegB = DAG.getNegative(B, DL, VT);
19459 if (!SwapCC)
19460 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19461 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19462 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19463 }
19464
19465 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
19466 // during the combine phase before type legalization, and relies on
19467 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
19468 // for the source mask.
19469 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
19470 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
19471 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
19472 return SDValue();
19473
19474 SmallVector<int, 8> NewMask;
19475 narrowShuffleMaskElts(2, Mask, NewMask);
19476
19477 LLVMContext &C = *DAG.getContext();
19478 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
19479 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
19480 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
19481 DAG.getBitcast(NewVT, V2), NewMask);
19482 return DAG.getBitcast(VT, Res);
19483}
19484
19486 const RISCVSubtarget &Subtarget) {
19487 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19488
19489 if (N->getValueType(0).isFixedLengthVector())
19490 return SDValue();
19491
19492 SDValue Addend = N->getOperand(0);
19493 SDValue MulOp = N->getOperand(1);
19494
19495 if (N->getOpcode() == RISCVISD::ADD_VL) {
19496 SDValue AddPassthruOp = N->getOperand(2);
19497 if (!AddPassthruOp.isUndef())
19498 return SDValue();
19499 }
19500
19501 auto IsVWMulOpc = [](unsigned Opc) {
19502 switch (Opc) {
19503 case RISCVISD::VWMUL_VL:
19504 case RISCVISD::VWMULU_VL:
19505 case RISCVISD::VWMULSU_VL:
19506 return true;
19507 default:
19508 return false;
19509 }
19510 };
19511
19512 if (!IsVWMulOpc(MulOp.getOpcode()))
19513 std::swap(Addend, MulOp);
19514
19515 if (!IsVWMulOpc(MulOp.getOpcode()))
19516 return SDValue();
19517
19518 SDValue MulPassthruOp = MulOp.getOperand(2);
19519
19520 if (!MulPassthruOp.isUndef())
19521 return SDValue();
19522
19523 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19524 const RISCVSubtarget &Subtarget) {
19525 if (N->getOpcode() == ISD::ADD) {
19526 SDLoc DL(N);
19527 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19528 Subtarget);
19529 }
19530 return std::make_pair(N->getOperand(3), N->getOperand(4));
19531 }(N, DAG, Subtarget);
19532
19533 SDValue MulMask = MulOp.getOperand(3);
19534 SDValue MulVL = MulOp.getOperand(4);
19535
19536 if (AddMask != MulMask || AddVL != MulVL)
19537 return SDValue();
19538
19539 const auto &TSInfo =
19540 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
19541 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
19542
19543 SDLoc DL(N);
19544 EVT VT = N->getValueType(0);
19545 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
19546 AddVL};
19547 return DAG.getNode(Opc, DL, VT, Ops);
19548}
19549
19551 const RISCVSubtarget &Subtarget) {
19552
19553 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19554
19555 if (!N->getValueType(0).isVector())
19556 return SDValue();
19557
19558 SDValue Addend = N->getOperand(0);
19559 SDValue DotOp = N->getOperand(1);
19560
19561 if (N->getOpcode() == RISCVISD::ADD_VL) {
19562 SDValue AddPassthruOp = N->getOperand(2);
19563 if (!AddPassthruOp.isUndef())
19564 return SDValue();
19565 }
19566
19567 auto IsVqdotqOpc = [](unsigned Opc) {
19568 switch (Opc) {
19569 case RISCVISD::VQDOT_VL:
19570 case RISCVISD::VQDOTU_VL:
19571 case RISCVISD::VQDOTSU_VL:
19572 return true;
19573 default:
19574 return false;
19575 }
19576 };
19577
19578 if (!IsVqdotqOpc(DotOp.getOpcode()))
19579 std::swap(Addend, DotOp);
19580
19581 if (!IsVqdotqOpc(DotOp.getOpcode()))
19582 return SDValue();
19583
19584 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19585 const RISCVSubtarget &Subtarget) {
19586 if (N->getOpcode() == ISD::ADD) {
19587 SDLoc DL(N);
19588 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19589 Subtarget);
19590 }
19591 return std::make_pair(N->getOperand(3), N->getOperand(4));
19592 }(N, DAG, Subtarget);
19593
19594 SDValue MulVL = DotOp.getOperand(4);
19595 if (AddVL != MulVL)
19596 return SDValue();
19597
19598 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
19599 AddMask.getOperand(0) != MulVL)
19600 return SDValue();
19601
19602 SDValue AccumOp = DotOp.getOperand(2);
19603 SDLoc DL(N);
19604 EVT VT = N->getValueType(0);
19605 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
19606 DAG.getUNDEF(VT), AddMask, AddVL);
19607
19608 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
19609 DotOp.getOperand(3), DotOp->getOperand(4)};
19610 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
19611}
19612
19613static bool
19615 ISD::MemIndexType &IndexType,
19617 if (!DCI.isBeforeLegalize())
19618 return false;
19619
19620 SelectionDAG &DAG = DCI.DAG;
19621 const MVT XLenVT =
19622 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
19623
19624 const EVT IndexVT = Index.getValueType();
19625
19626 // RISC-V indexed loads only support the "unsigned unscaled" addressing
19627 // mode, so anything else must be manually legalized.
19628 if (!isIndexTypeSigned(IndexType))
19629 return false;
19630
19631 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
19632 // Any index legalization should first promote to XLenVT, so we don't lose
19633 // bits when scaling. This may create an illegal index type so we let
19634 // LLVM's legalization take care of the splitting.
19635 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
19636 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
19637 IndexVT.changeVectorElementType(XLenVT), Index);
19638 }
19639 IndexType = ISD::UNSIGNED_SCALED;
19640 return true;
19641}
19642
19643/// Match the index vector of a scatter or gather node as the shuffle mask
19644/// which performs the rearrangement if possible. Will only match if
19645/// all lanes are touched, and thus replacing the scatter or gather with
19646/// a unit strided access and shuffle is legal.
19647static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
19648 SmallVector<int> &ShuffleMask) {
19649 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19650 return false;
19651 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19652 return false;
19653
19654 const unsigned ElementSize = VT.getScalarStoreSize();
19655 const unsigned NumElems = VT.getVectorNumElements();
19656
19657 // Create the shuffle mask and check all bits active
19658 assert(ShuffleMask.empty());
19659 BitVector ActiveLanes(NumElems);
19660 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19661 // TODO: We've found an active bit of UB, and could be
19662 // more aggressive here if desired.
19663 if (Index->getOperand(i)->isUndef())
19664 return false;
19665 uint64_t C = Index->getConstantOperandVal(i);
19666 if (C % ElementSize != 0)
19667 return false;
19668 C = C / ElementSize;
19669 if (C >= NumElems)
19670 return false;
19671 ShuffleMask.push_back(C);
19672 ActiveLanes.set(C);
19673 }
19674 return ActiveLanes.all();
19675}
19676
19677/// Match the index of a gather or scatter operation as an operation
19678/// with twice the element width and half the number of elements. This is
19679/// generally profitable (if legal) because these operations are linear
19680/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
19681/// come out ahead.
19682static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
19683 Align BaseAlign, const RISCVSubtarget &ST) {
19684 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19685 return false;
19686 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19687 return false;
19688
19689 // Attempt a doubling. If we can use a element type 4x or 8x in
19690 // size, this will happen via multiply iterations of the transform.
19691 const unsigned NumElems = VT.getVectorNumElements();
19692 if (NumElems % 2 != 0)
19693 return false;
19694
19695 const unsigned ElementSize = VT.getScalarStoreSize();
19696 const unsigned WiderElementSize = ElementSize * 2;
19697 if (WiderElementSize > ST.getELen()/8)
19698 return false;
19699
19700 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
19701 return false;
19702
19703 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19704 // TODO: We've found an active bit of UB, and could be
19705 // more aggressive here if desired.
19706 if (Index->getOperand(i)->isUndef())
19707 return false;
19708 // TODO: This offset check is too strict if we support fully
19709 // misaligned memory operations.
19710 uint64_t C = Index->getConstantOperandVal(i);
19711 if (i % 2 == 0) {
19712 if (C % WiderElementSize != 0)
19713 return false;
19714 continue;
19715 }
19716 uint64_t Last = Index->getConstantOperandVal(i-1);
19717 if (C != Last + ElementSize)
19718 return false;
19719 }
19720 return true;
19721}
19722
19723// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
19724// This would be benefit for the cases where X and Y are both the same value
19725// type of low precision vectors. Since the truncate would be lowered into
19726// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
19727// restriction, such pattern would be expanded into a series of "vsetvli"
19728// and "vnsrl" instructions later to reach this point.
19730 SDValue Mask = N->getOperand(1);
19731 SDValue VL = N->getOperand(2);
19732
19733 bool IsVLMAX = isAllOnesConstant(VL) ||
19734 (isa<RegisterSDNode>(VL) &&
19735 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
19736 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
19737 Mask.getOperand(0) != VL)
19738 return SDValue();
19739
19740 auto IsTruncNode = [&](SDValue V) {
19741 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19742 V.getOperand(1) == Mask && V.getOperand(2) == VL;
19743 };
19744
19745 SDValue Op = N->getOperand(0);
19746
19747 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
19748 // to distinguish such pattern.
19749 while (IsTruncNode(Op)) {
19750 if (!Op.hasOneUse())
19751 return SDValue();
19752 Op = Op.getOperand(0);
19753 }
19754
19755 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
19756 return SDValue();
19757
19758 SDValue N0 = Op.getOperand(0);
19759 SDValue N1 = Op.getOperand(1);
19760 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
19761 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
19762 return SDValue();
19763
19764 SDValue N00 = N0.getOperand(0);
19765 SDValue N10 = N1.getOperand(0);
19766 if (!N00.getValueType().isVector() ||
19767 N00.getValueType() != N10.getValueType() ||
19768 N->getValueType(0) != N10.getValueType())
19769 return SDValue();
19770
19771 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
19772 SDValue SMin =
19773 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
19774 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
19775 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
19776}
19777
19778// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
19779// maximum value for the truncated type.
19780// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
19781// is the signed maximum value for the truncated type and C2 is the signed
19782// minimum value.
19784 const RISCVSubtarget &Subtarget) {
19785 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
19786
19787 MVT VT = N->getSimpleValueType(0);
19788
19789 SDValue Mask = N->getOperand(1);
19790 SDValue VL = N->getOperand(2);
19791
19792 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
19793 APInt &SplatVal) {
19794 if (V.getOpcode() != Opc &&
19795 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
19796 V.getOperand(3) == Mask && V.getOperand(4) == VL))
19797 return SDValue();
19798
19799 SDValue Op = V.getOperand(1);
19800
19801 // Peek through conversion between fixed and scalable vectors.
19802 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
19803 isNullConstant(Op.getOperand(2)) &&
19804 Op.getOperand(1).getValueType().isFixedLengthVector() &&
19805 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19806 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
19807 isNullConstant(Op.getOperand(1).getOperand(1)))
19808 Op = Op.getOperand(1).getOperand(0);
19809
19810 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
19811 return V.getOperand(0);
19812
19813 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
19814 Op.getOperand(2) == VL) {
19815 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
19816 SplatVal =
19817 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
19818 return V.getOperand(0);
19819 }
19820 }
19821
19822 return SDValue();
19823 };
19824
19825 SDLoc DL(N);
19826
19827 auto DetectUSatPattern = [&](SDValue V) {
19828 APInt LoC, HiC;
19829
19830 // Simple case, V is a UMIN.
19831 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
19832 if (HiC.isMask(VT.getScalarSizeInBits()))
19833 return UMinOp;
19834
19835 // If we have an SMAX that removes negative numbers first, then we can match
19836 // SMIN instead of UMIN.
19837 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19838 if (SDValue SMaxOp =
19839 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19840 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
19841 return SMinOp;
19842
19843 // If we have an SMIN before an SMAX and the SMAX constant is less than or
19844 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
19845 // first.
19846 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19847 if (SDValue SMinOp =
19848 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19849 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
19850 HiC.uge(LoC))
19851 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
19852 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
19853 Mask, VL);
19854
19855 return SDValue();
19856 };
19857
19858 auto DetectSSatPattern = [&](SDValue V) {
19859 unsigned NumDstBits = VT.getScalarSizeInBits();
19860 unsigned NumSrcBits = V.getScalarValueSizeInBits();
19861 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
19862 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
19863
19864 APInt HiC, LoC;
19865 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19866 if (SDValue SMaxOp =
19867 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19868 if (HiC == SignedMax && LoC == SignedMin)
19869 return SMaxOp;
19870
19871 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19872 if (SDValue SMinOp =
19873 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19874 if (HiC == SignedMax && LoC == SignedMin)
19875 return SMinOp;
19876
19877 return SDValue();
19878 };
19879
19880 SDValue Src = N->getOperand(0);
19881
19882 // Look through multiple layers of truncates.
19883 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19884 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
19885 Src.hasOneUse())
19886 Src = Src.getOperand(0);
19887
19888 SDValue Val;
19889 unsigned ClipOpc;
19890 if ((Val = DetectUSatPattern(Src)))
19891 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
19892 else if ((Val = DetectSSatPattern(Src)))
19893 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
19894 else
19895 return SDValue();
19896
19897 MVT ValVT = Val.getSimpleValueType();
19898
19899 do {
19900 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
19901 ValVT = ValVT.changeVectorElementType(ValEltVT);
19902 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
19903 } while (ValVT != VT);
19904
19905 return Val;
19906}
19907
19908// Convert
19909// (iX ctpop (bitcast (vXi1 A)))
19910// ->
19911// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19912// and
19913// (iN reduce.add (zext (vXi1 A to vXiN))
19914// ->
19915// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
19916// FIXME: It's complicated to match all the variations of this after type
19917// legalization so we only handle the pre-type legalization pattern, but that
19918// requires the fixed vector type to be legal.
19920 const RISCVSubtarget &Subtarget) {
19921 unsigned Opc = N->getOpcode();
19922 assert((Opc == ISD::CTPOP || Opc == ISD::VECREDUCE_ADD) &&
19923 "Unexpected opcode");
19924 EVT VT = N->getValueType(0);
19925 if (!VT.isScalarInteger())
19926 return SDValue();
19927
19928 SDValue Src = N->getOperand(0);
19929
19930 if (Opc == ISD::CTPOP) {
19931 // Peek through zero_extend. It doesn't change the count.
19932 if (Src.getOpcode() == ISD::ZERO_EXTEND)
19933 Src = Src.getOperand(0);
19934
19935 if (Src.getOpcode() != ISD::BITCAST)
19936 return SDValue();
19937 Src = Src.getOperand(0);
19938 } else if (Opc == ISD::VECREDUCE_ADD) {
19939 if (Src.getOpcode() != ISD::ZERO_EXTEND)
19940 return SDValue();
19941 Src = Src.getOperand(0);
19942 }
19943
19944 EVT SrcEVT = Src.getValueType();
19945 if (!SrcEVT.isSimple())
19946 return SDValue();
19947
19948 MVT SrcMVT = SrcEVT.getSimpleVT();
19949 // Make sure the input is an i1 vector.
19950 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
19951 return SDValue();
19952
19953 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19954 if (!TLI.isTypeLegal(SrcMVT))
19955 return SDValue();
19956
19957 // Check that destination type is large enough to hold result without
19958 // overflow.
19959 if (Opc == ISD::VECREDUCE_ADD) {
19960 unsigned EltSize = SrcMVT.getScalarSizeInBits();
19961 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
19962 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
19963 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
19964 ? SrcMVT.getVectorNumElements()
19966 VectorBitsMax, EltSize, MinSize);
19967 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
19968 return SDValue();
19969 }
19970
19971 MVT ContainerVT = SrcMVT;
19972 if (SrcMVT.isFixedLengthVector()) {
19973 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
19974 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
19975 }
19976
19977 SDLoc DL(N);
19978 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
19979
19980 MVT XLenVT = Subtarget.getXLenVT();
19981 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
19982 return DAG.getZExtOrTrunc(Pop, DL, VT);
19983}
19984
19987 const RISCVSubtarget &Subtarget) {
19988 // (shl (zext x), y) -> (vwsll x, y)
19989 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
19990 return V;
19991
19992 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
19993 // (shl (zext x), C) -> (vwmulu x, 1u << C)
19994
19995 if (!DCI.isAfterLegalizeDAG())
19996 return SDValue();
19997
19998 SDValue LHS = N->getOperand(0);
19999 if (!LHS.hasOneUse())
20000 return SDValue();
20001 unsigned Opcode;
20002 switch (LHS.getOpcode()) {
20003 case ISD::SIGN_EXTEND:
20004 case RISCVISD::VSEXT_VL:
20005 Opcode = RISCVISD::VWMULSU_VL;
20006 break;
20007 case ISD::ZERO_EXTEND:
20008 case RISCVISD::VZEXT_VL:
20009 Opcode = RISCVISD::VWMULU_VL;
20010 break;
20011 default:
20012 return SDValue();
20013 }
20014
20015 SDValue RHS = N->getOperand(1);
20016 APInt ShAmt;
20017 uint64_t ShAmtInt;
20018 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
20019 ShAmtInt = ShAmt.getZExtValue();
20020 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
20021 RHS.getOperand(1).getOpcode() == ISD::Constant)
20022 ShAmtInt = RHS.getConstantOperandVal(1);
20023 else
20024 return SDValue();
20025
20026 // Better foldings:
20027 // (shl (sext x), 1) -> (vwadd x, x)
20028 // (shl (zext x), 1) -> (vwaddu x, x)
20029 if (ShAmtInt <= 1)
20030 return SDValue();
20031
20032 SDValue NarrowOp = LHS.getOperand(0);
20033 MVT NarrowVT = NarrowOp.getSimpleValueType();
20034 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
20035 if (ShAmtInt >= NarrowBits)
20036 return SDValue();
20037 MVT VT = N->getSimpleValueType(0);
20038 if (NarrowBits * 2 != VT.getScalarSizeInBits())
20039 return SDValue();
20040
20041 SelectionDAG &DAG = DCI.DAG;
20042 SDLoc DL(N);
20043 SDValue Passthru, Mask, VL;
20044 switch (N->getOpcode()) {
20045 case ISD::SHL:
20046 Passthru = DAG.getUNDEF(VT);
20047 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
20048 break;
20049 case RISCVISD::SHL_VL:
20050 Passthru = N->getOperand(2);
20051 Mask = N->getOperand(3);
20052 VL = N->getOperand(4);
20053 break;
20054 default:
20055 llvm_unreachable("Expected SHL");
20056 }
20057 return DAG.getNode(Opcode, DL, VT, NarrowOp,
20058 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
20059 Passthru, Mask, VL);
20060}
20061
20063 DAGCombinerInfo &DCI) const {
20064 SelectionDAG &DAG = DCI.DAG;
20065 const MVT XLenVT = Subtarget.getXLenVT();
20066 SDLoc DL(N);
20067
20068 // Helper to call SimplifyDemandedBits on an operand of N where only some low
20069 // bits are demanded. N will be added to the Worklist if it was not deleted.
20070 // Caller should return SDValue(N, 0) if this returns true.
20071 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
20072 SDValue Op = N->getOperand(OpNo);
20073 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
20074 if (!SimplifyDemandedBits(Op, Mask, DCI))
20075 return false;
20076
20077 if (N->getOpcode() != ISD::DELETED_NODE)
20078 DCI.AddToWorklist(N);
20079 return true;
20080 };
20081
20082 switch (N->getOpcode()) {
20083 default:
20084 break;
20085 case RISCVISD::SplitF64: {
20086 SDValue Op0 = N->getOperand(0);
20087 // If the input to SplitF64 is just BuildPairF64 then the operation is
20088 // redundant. Instead, use BuildPairF64's operands directly.
20089 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
20090 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
20091
20092 if (Op0->isUndef()) {
20093 SDValue Lo = DAG.getUNDEF(MVT::i32);
20094 SDValue Hi = DAG.getUNDEF(MVT::i32);
20095 return DCI.CombineTo(N, Lo, Hi);
20096 }
20097
20098 // It's cheaper to materialise two 32-bit integers than to load a double
20099 // from the constant pool and transfer it to integer registers through the
20100 // stack.
20102 APInt V = C->getValueAPF().bitcastToAPInt();
20103 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
20104 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
20105 return DCI.CombineTo(N, Lo, Hi);
20106 }
20107
20108 // This is a target-specific version of a DAGCombine performed in
20109 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20110 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20111 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20112 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20113 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
20114 break;
20115 SDValue NewSplitF64 =
20116 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
20117 Op0.getOperand(0));
20118 SDValue Lo = NewSplitF64.getValue(0);
20119 SDValue Hi = NewSplitF64.getValue(1);
20120 APInt SignBit = APInt::getSignMask(32);
20121 if (Op0.getOpcode() == ISD::FNEG) {
20122 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
20123 DAG.getConstant(SignBit, DL, MVT::i32));
20124 return DCI.CombineTo(N, Lo, NewHi);
20125 }
20126 assert(Op0.getOpcode() == ISD::FABS);
20127 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
20128 DAG.getConstant(~SignBit, DL, MVT::i32));
20129 return DCI.CombineTo(N, Lo, NewHi);
20130 }
20131 case RISCVISD::SLLW:
20132 case RISCVISD::SRAW:
20133 case RISCVISD::SRLW:
20134 case RISCVISD::RORW:
20135 case RISCVISD::ROLW: {
20136 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
20137 if (SimplifyDemandedLowBitsHelper(0, 32) ||
20138 SimplifyDemandedLowBitsHelper(1, 5))
20139 return SDValue(N, 0);
20140
20141 break;
20142 }
20143 case RISCVISD::CLZW:
20144 case RISCVISD::CTZW: {
20145 // Only the lower 32 bits of the first operand are read
20146 if (SimplifyDemandedLowBitsHelper(0, 32))
20147 return SDValue(N, 0);
20148 break;
20149 }
20150 case RISCVISD::FMV_W_X_RV64: {
20151 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
20152 // conversion is unnecessary and can be replaced with the
20153 // FMV_X_ANYEXTW_RV64 operand.
20154 SDValue Op0 = N->getOperand(0);
20155 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
20156 return Op0.getOperand(0);
20157 break;
20158 }
20159 case RISCVISD::FMV_X_ANYEXTH:
20160 case RISCVISD::FMV_X_ANYEXTW_RV64: {
20161 SDLoc DL(N);
20162 SDValue Op0 = N->getOperand(0);
20163 MVT VT = N->getSimpleValueType(0);
20164
20165 // Constant fold.
20166 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
20167 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
20168 return DAG.getConstant(Val, DL, VT);
20169 }
20170
20171 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
20172 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
20173 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
20174 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
20175 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
20176 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
20177 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
20178 assert(Op0.getOperand(0).getValueType() == VT &&
20179 "Unexpected value type!");
20180 return Op0.getOperand(0);
20181 }
20182
20183 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
20184 cast<LoadSDNode>(Op0)->isSimple()) {
20186 auto *LN0 = cast<LoadSDNode>(Op0);
20187 SDValue Load =
20188 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
20189 LN0->getBasePtr(), IVT, LN0->getMemOperand());
20190 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
20191 return Load;
20192 }
20193
20194 // This is a target-specific version of a DAGCombine performed in
20195 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20196 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20197 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20198 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20199 !Op0.getNode()->hasOneUse())
20200 break;
20201 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
20202 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
20203 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
20204 if (Op0.getOpcode() == ISD::FNEG)
20205 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
20206 DAG.getConstant(SignBit, DL, VT));
20207
20208 assert(Op0.getOpcode() == ISD::FABS);
20209 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
20210 DAG.getConstant(~SignBit, DL, VT));
20211 }
20212 case ISD::ABS: {
20213 EVT VT = N->getValueType(0);
20214 SDValue N0 = N->getOperand(0);
20215 // abs (sext) -> zext (abs)
20216 // abs (zext) -> zext (handled elsewhere)
20217 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
20218 SDValue Src = N0.getOperand(0);
20219 SDLoc DL(N);
20220 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
20221 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
20222 }
20223 break;
20224 }
20225 case ISD::ADD: {
20226 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20227 return V;
20228 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
20229 return V;
20230 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20231 return V;
20232 return performADDCombine(N, DCI, Subtarget);
20233 }
20234 case ISD::SUB: {
20235 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20236 return V;
20237 return performSUBCombine(N, DAG, Subtarget);
20238 }
20239 case ISD::AND:
20240 return performANDCombine(N, DCI, Subtarget);
20241 case ISD::OR: {
20242 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20243 return V;
20244 return performORCombine(N, DCI, Subtarget);
20245 }
20246 case ISD::XOR:
20247 return performXORCombine(N, DAG, Subtarget);
20248 case ISD::MUL:
20249 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20250 return V;
20251 return performMULCombine(N, DAG, DCI, Subtarget);
20252 case ISD::SDIV:
20253 case ISD::UDIV:
20254 case ISD::SREM:
20255 case ISD::UREM:
20256 if (SDValue V = combineBinOpOfZExt(N, DAG))
20257 return V;
20258 break;
20259 case ISD::FMUL: {
20260 using namespace SDPatternMatch;
20261 SDLoc DL(N);
20262 EVT VT = N->getValueType(0);
20263 SDValue X, Y;
20264 // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
20265 // hoistFNegAboveFMulFDiv.
20266 // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
20268 return DAG.getNode(ISD::FNEG, DL, VT,
20269 DAG.getNode(ISD::FMUL, DL, VT, X, Y));
20270
20271 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
20272 SDValue N0 = N->getOperand(0);
20273 SDValue N1 = N->getOperand(1);
20274 if (N0->getOpcode() != ISD::FCOPYSIGN)
20275 std::swap(N0, N1);
20276 if (N0->getOpcode() != ISD::FCOPYSIGN)
20277 return SDValue();
20279 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
20280 return SDValue();
20281 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
20282 return SDValue();
20283 SDValue Sign = N0->getOperand(1);
20284 if (Sign.getValueType() != VT)
20285 return SDValue();
20286 return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
20287 }
20288 case ISD::FADD:
20289 case ISD::UMAX:
20290 case ISD::UMIN:
20291 case ISD::SMAX:
20292 case ISD::SMIN:
20293 case ISD::FMAXNUM:
20294 case ISD::FMINNUM: {
20295 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
20296 return V;
20297 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
20298 return V;
20299 return SDValue();
20300 }
20301 case ISD::SETCC:
20302 return performSETCCCombine(N, DCI, Subtarget);
20304 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
20305 case ISD::ZERO_EXTEND:
20306 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
20307 // type legalization. This is safe because fp_to_uint produces poison if
20308 // it overflows.
20309 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
20310 SDValue Src = N->getOperand(0);
20311 if (Src.getOpcode() == ISD::FP_TO_UINT &&
20312 isTypeLegal(Src.getOperand(0).getValueType()))
20313 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
20314 Src.getOperand(0));
20315 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
20316 isTypeLegal(Src.getOperand(1).getValueType())) {
20317 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
20318 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
20319 Src.getOperand(0), Src.getOperand(1));
20320 DCI.CombineTo(N, Res);
20321 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
20322 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
20323 return SDValue(N, 0); // Return N so it doesn't get rechecked.
20324 }
20325 }
20326 return SDValue();
20327 case RISCVISD::TRUNCATE_VECTOR_VL:
20328 if (SDValue V = combineTruncOfSraSext(N, DAG))
20329 return V;
20330 return combineTruncToVnclip(N, DAG, Subtarget);
20331 case ISD::VP_TRUNCATE:
20332 return performVP_TRUNCATECombine(N, DAG, Subtarget);
20333 case ISD::TRUNCATE:
20334 return performTRUNCATECombine(N, DAG, Subtarget);
20335 case ISD::SELECT:
20336 return performSELECTCombine(N, DAG, Subtarget);
20337 case ISD::VSELECT:
20338 return performVSELECTCombine(N, DAG);
20339 case RISCVISD::CZERO_EQZ:
20340 case RISCVISD::CZERO_NEZ: {
20341 SDValue Val = N->getOperand(0);
20342 SDValue Cond = N->getOperand(1);
20343
20344 unsigned Opc = N->getOpcode();
20345
20346 // czero_eqz x, x -> x
20347 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
20348 return Val;
20349
20350 unsigned InvOpc =
20351 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
20352
20353 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
20354 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
20355 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
20356 SDValue NewCond = Cond.getOperand(0);
20357 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
20358 if (DAG.MaskedValueIsZero(NewCond, Mask))
20359 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
20360 }
20361 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
20362 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
20363 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
20364 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
20365 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
20366 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
20367 if (ISD::isIntEqualitySetCC(CCVal))
20368 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
20369 N->getValueType(0), Val, Cond.getOperand(0));
20370 }
20371 return SDValue();
20372 }
20373 case RISCVISD::SELECT_CC: {
20374 // Transform
20375 SDValue LHS = N->getOperand(0);
20376 SDValue RHS = N->getOperand(1);
20377 SDValue CC = N->getOperand(2);
20378 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
20379 SDValue TrueV = N->getOperand(3);
20380 SDValue FalseV = N->getOperand(4);
20381 SDLoc DL(N);
20382 EVT VT = N->getValueType(0);
20383
20384 // If the True and False values are the same, we don't need a select_cc.
20385 if (TrueV == FalseV)
20386 return TrueV;
20387
20388 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
20389 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
20390 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
20391 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
20392 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
20393 if (CCVal == ISD::CondCode::SETGE)
20394 std::swap(TrueV, FalseV);
20395
20396 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
20397 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
20398 // Only handle simm12, if it is not in this range, it can be considered as
20399 // register.
20400 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
20401 isInt<12>(TrueSImm - FalseSImm)) {
20402 SDValue SRA =
20403 DAG.getNode(ISD::SRA, DL, VT, LHS,
20404 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
20405 SDValue AND =
20406 DAG.getNode(ISD::AND, DL, VT, SRA,
20407 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
20408 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
20409 }
20410
20411 if (CCVal == ISD::CondCode::SETGE)
20412 std::swap(TrueV, FalseV);
20413 }
20414
20415 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20416 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
20417 {LHS, RHS, CC, TrueV, FalseV});
20418
20419 if (!Subtarget.hasConditionalMoveFusion()) {
20420 // (select c, -1, y) -> -c | y
20421 if (isAllOnesConstant(TrueV)) {
20422 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20423 SDValue Neg = DAG.getNegative(C, DL, VT);
20424 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
20425 }
20426 // (select c, y, -1) -> -!c | y
20427 if (isAllOnesConstant(FalseV)) {
20428 SDValue C =
20429 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20430 SDValue Neg = DAG.getNegative(C, DL, VT);
20431 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
20432 }
20433
20434 // (select c, 0, y) -> -!c & y
20435 if (isNullConstant(TrueV)) {
20436 SDValue C =
20437 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20438 SDValue Neg = DAG.getNegative(C, DL, VT);
20439 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
20440 }
20441 // (select c, y, 0) -> -c & y
20442 if (isNullConstant(FalseV)) {
20443 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20444 SDValue Neg = DAG.getNegative(C, DL, VT);
20445 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
20446 }
20447 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
20448 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
20449 if (((isOneConstant(FalseV) && LHS == TrueV &&
20450 CCVal == ISD::CondCode::SETNE) ||
20451 (isOneConstant(TrueV) && LHS == FalseV &&
20452 CCVal == ISD::CondCode::SETEQ)) &&
20453 isNullConstant(RHS)) {
20454 // freeze it to be safe.
20455 LHS = DAG.getFreeze(LHS);
20456 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
20457 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
20458 }
20459 }
20460
20461 // If both true/false are an xor with 1, pull through the select.
20462 // This can occur after op legalization if both operands are setccs that
20463 // require an xor to invert.
20464 // FIXME: Generalize to other binary ops with identical operand?
20465 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
20466 TrueV.getOperand(1) == FalseV.getOperand(1) &&
20467 isOneConstant(TrueV.getOperand(1)) &&
20468 TrueV.hasOneUse() && FalseV.hasOneUse()) {
20469 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
20470 TrueV.getOperand(0), FalseV.getOperand(0));
20471 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
20472 }
20473
20474 return SDValue();
20475 }
20476 case RISCVISD::BR_CC: {
20477 SDValue LHS = N->getOperand(1);
20478 SDValue RHS = N->getOperand(2);
20479 SDValue CC = N->getOperand(3);
20480 SDLoc DL(N);
20481
20482 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20483 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
20484 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
20485
20486 return SDValue();
20487 }
20488 case ISD::BITREVERSE:
20489 return performBITREVERSECombine(N, DAG, Subtarget);
20490 case ISD::FP_TO_SINT:
20491 case ISD::FP_TO_UINT:
20492 return performFP_TO_INTCombine(N, DCI, Subtarget);
20495 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
20496 case ISD::FCOPYSIGN: {
20497 EVT VT = N->getValueType(0);
20498 if (!VT.isVector())
20499 break;
20500 // There is a form of VFSGNJ which injects the negated sign of its second
20501 // operand. Try and bubble any FNEG up after the extend/round to produce
20502 // this optimized pattern. Avoid modifying cases where FP_ROUND and
20503 // TRUNC=1.
20504 SDValue In2 = N->getOperand(1);
20505 // Avoid cases where the extend/round has multiple uses, as duplicating
20506 // those is typically more expensive than removing a fneg.
20507 if (!In2.hasOneUse())
20508 break;
20509 if (In2.getOpcode() != ISD::FP_EXTEND &&
20510 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
20511 break;
20512 In2 = In2.getOperand(0);
20513 if (In2.getOpcode() != ISD::FNEG)
20514 break;
20515 SDLoc DL(N);
20516 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
20517 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
20518 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
20519 }
20520 case ISD::MGATHER: {
20521 const auto *MGN = cast<MaskedGatherSDNode>(N);
20522 const EVT VT = N->getValueType(0);
20523 SDValue Index = MGN->getIndex();
20524 SDValue ScaleOp = MGN->getScale();
20525 ISD::MemIndexType IndexType = MGN->getIndexType();
20526 assert(!MGN->isIndexScaled() &&
20527 "Scaled gather/scatter should not be formed");
20528
20529 SDLoc DL(N);
20530 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20531 return DAG.getMaskedGather(
20532 N->getVTList(), MGN->getMemoryVT(), DL,
20533 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20534 MGN->getBasePtr(), Index, ScaleOp},
20535 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20536
20537 if (narrowIndex(Index, IndexType, DAG))
20538 return DAG.getMaskedGather(
20539 N->getVTList(), MGN->getMemoryVT(), DL,
20540 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20541 MGN->getBasePtr(), Index, ScaleOp},
20542 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20543
20544 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
20545 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
20546 // The sequence will be XLenVT, not the type of Index. Tell
20547 // isSimpleVIDSequence this so we avoid overflow.
20548 if (std::optional<VIDSequence> SimpleVID =
20549 isSimpleVIDSequence(Index, Subtarget.getXLen());
20550 SimpleVID && SimpleVID->StepDenominator == 1) {
20551 const int64_t StepNumerator = SimpleVID->StepNumerator;
20552 const int64_t Addend = SimpleVID->Addend;
20553
20554 // Note: We don't need to check alignment here since (by assumption
20555 // from the existence of the gather), our offsets must be sufficiently
20556 // aligned.
20557
20558 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
20559 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
20560 assert(IndexType == ISD::UNSIGNED_SCALED);
20561 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
20562 DAG.getSignedConstant(Addend, DL, PtrVT));
20563
20564 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
20566 SDValue StridedLoad = DAG.getStridedLoadVP(
20567 VT, DL, MGN->getChain(), BasePtr,
20568 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
20569 EVL, MGN->getMemOperand());
20570 SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,
20571 MGN->getPassThru());
20572 return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},
20573 DL);
20574 }
20575 }
20576
20577 SmallVector<int> ShuffleMask;
20578 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20579 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
20580 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
20581 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
20582 MGN->getMask(), DAG.getUNDEF(VT),
20583 MGN->getMemoryVT(), MGN->getMemOperand(),
20585 SDValue Shuffle =
20586 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
20587 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
20588 }
20589
20590 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20591 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
20592 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
20593 SmallVector<SDValue> NewIndices;
20594 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
20595 NewIndices.push_back(Index.getOperand(i));
20596 EVT IndexVT = Index.getValueType()
20598 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
20599
20600 unsigned ElementSize = VT.getScalarStoreSize();
20601 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
20602 auto EltCnt = VT.getVectorElementCount();
20603 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
20604 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
20605 EltCnt.divideCoefficientBy(2));
20606 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
20607 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
20608 EltCnt.divideCoefficientBy(2));
20609 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
20610
20611 SDValue Gather =
20612 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
20613 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
20614 Index, ScaleOp},
20615 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
20616 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
20617 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
20618 }
20619 break;
20620 }
20621 case ISD::MSCATTER:{
20622 const auto *MSN = cast<MaskedScatterSDNode>(N);
20623 SDValue Index = MSN->getIndex();
20624 SDValue ScaleOp = MSN->getScale();
20625 ISD::MemIndexType IndexType = MSN->getIndexType();
20626 assert(!MSN->isIndexScaled() &&
20627 "Scaled gather/scatter should not be formed");
20628
20629 SDLoc DL(N);
20630 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20631 return DAG.getMaskedScatter(
20632 N->getVTList(), MSN->getMemoryVT(), DL,
20633 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20634 Index, ScaleOp},
20635 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20636
20637 if (narrowIndex(Index, IndexType, DAG))
20638 return DAG.getMaskedScatter(
20639 N->getVTList(), MSN->getMemoryVT(), DL,
20640 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20641 Index, ScaleOp},
20642 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20643
20644 EVT VT = MSN->getValue()->getValueType(0);
20645 SmallVector<int> ShuffleMask;
20646 if (!MSN->isTruncatingStore() &&
20647 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
20648 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
20649 DAG.getUNDEF(VT), ShuffleMask);
20650 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
20651 DAG.getUNDEF(XLenVT), MSN->getMask(),
20652 MSN->getMemoryVT(), MSN->getMemOperand(),
20653 ISD::UNINDEXED, false);
20654 }
20655 break;
20656 }
20657 case ISD::VP_GATHER: {
20658 const auto *VPGN = cast<VPGatherSDNode>(N);
20659 SDValue Index = VPGN->getIndex();
20660 SDValue ScaleOp = VPGN->getScale();
20661 ISD::MemIndexType IndexType = VPGN->getIndexType();
20662 assert(!VPGN->isIndexScaled() &&
20663 "Scaled gather/scatter should not be formed");
20664
20665 SDLoc DL(N);
20666 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20667 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20668 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20669 ScaleOp, VPGN->getMask(),
20670 VPGN->getVectorLength()},
20671 VPGN->getMemOperand(), IndexType);
20672
20673 if (narrowIndex(Index, IndexType, DAG))
20674 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20675 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20676 ScaleOp, VPGN->getMask(),
20677 VPGN->getVectorLength()},
20678 VPGN->getMemOperand(), IndexType);
20679
20680 break;
20681 }
20682 case ISD::VP_SCATTER: {
20683 const auto *VPSN = cast<VPScatterSDNode>(N);
20684 SDValue Index = VPSN->getIndex();
20685 SDValue ScaleOp = VPSN->getScale();
20686 ISD::MemIndexType IndexType = VPSN->getIndexType();
20687 assert(!VPSN->isIndexScaled() &&
20688 "Scaled gather/scatter should not be formed");
20689
20690 SDLoc DL(N);
20691 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20692 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20693 {VPSN->getChain(), VPSN->getValue(),
20694 VPSN->getBasePtr(), Index, ScaleOp,
20695 VPSN->getMask(), VPSN->getVectorLength()},
20696 VPSN->getMemOperand(), IndexType);
20697
20698 if (narrowIndex(Index, IndexType, DAG))
20699 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20700 {VPSN->getChain(), VPSN->getValue(),
20701 VPSN->getBasePtr(), Index, ScaleOp,
20702 VPSN->getMask(), VPSN->getVectorLength()},
20703 VPSN->getMemOperand(), IndexType);
20704 break;
20705 }
20706 case RISCVISD::SHL_VL:
20707 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20708 return V;
20709 [[fallthrough]];
20710 case RISCVISD::SRA_VL:
20711 case RISCVISD::SRL_VL: {
20712 SDValue ShAmt = N->getOperand(1);
20713 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20714 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20715 SDLoc DL(N);
20716 SDValue VL = N->getOperand(4);
20717 EVT VT = N->getValueType(0);
20718 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20719 ShAmt.getOperand(1), VL);
20720 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
20721 N->getOperand(2), N->getOperand(3), N->getOperand(4));
20722 }
20723 break;
20724 }
20725 case ISD::SRA:
20726 if (SDValue V = performSRACombine(N, DAG, Subtarget))
20727 return V;
20728 [[fallthrough]];
20729 case ISD::SRL:
20730 case ISD::SHL: {
20731 if (N->getOpcode() == ISD::SHL) {
20732 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20733 return V;
20734 }
20735 SDValue ShAmt = N->getOperand(1);
20736 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20737 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20738 SDLoc DL(N);
20739 EVT VT = N->getValueType(0);
20740 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20741 ShAmt.getOperand(1),
20742 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
20743 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
20744 }
20745 break;
20746 }
20747 case RISCVISD::ADD_VL:
20748 if (SDValue V = simplifyOp_VL(N))
20749 return V;
20750 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20751 return V;
20752 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20753 return V;
20754 return combineToVWMACC(N, DAG, Subtarget);
20755 case RISCVISD::VWADD_W_VL:
20756 case RISCVISD::VWADDU_W_VL:
20757 case RISCVISD::VWSUB_W_VL:
20758 case RISCVISD::VWSUBU_W_VL:
20759 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
20760 case RISCVISD::OR_VL:
20761 case RISCVISD::SUB_VL:
20762 case RISCVISD::MUL_VL:
20763 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20764 case RISCVISD::VFMADD_VL:
20765 case RISCVISD::VFNMADD_VL:
20766 case RISCVISD::VFMSUB_VL:
20767 case RISCVISD::VFNMSUB_VL:
20768 case RISCVISD::STRICT_VFMADD_VL:
20769 case RISCVISD::STRICT_VFNMADD_VL:
20770 case RISCVISD::STRICT_VFMSUB_VL:
20771 case RISCVISD::STRICT_VFNMSUB_VL:
20772 return performVFMADD_VLCombine(N, DCI, Subtarget);
20773 case RISCVISD::FADD_VL:
20774 case RISCVISD::FSUB_VL:
20775 case RISCVISD::FMUL_VL:
20776 case RISCVISD::VFWADD_W_VL:
20777 case RISCVISD::VFWSUB_W_VL:
20778 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20779 case ISD::LOAD:
20780 case ISD::STORE: {
20781 if (DCI.isAfterLegalizeDAG())
20782 if (SDValue V = performMemPairCombine(N, DCI))
20783 return V;
20784
20785 if (N->getOpcode() != ISD::STORE)
20786 break;
20787
20788 auto *Store = cast<StoreSDNode>(N);
20789 SDValue Chain = Store->getChain();
20790 EVT MemVT = Store->getMemoryVT();
20791 SDValue Val = Store->getValue();
20792 SDLoc DL(N);
20793
20794 bool IsScalarizable =
20795 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
20796 Store->isSimple() &&
20797 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
20798 isPowerOf2_64(MemVT.getSizeInBits()) &&
20799 MemVT.getSizeInBits() <= Subtarget.getXLen();
20800
20801 // If sufficiently aligned we can scalarize stores of constant vectors of
20802 // any power-of-two size up to XLen bits, provided that they aren't too
20803 // expensive to materialize.
20804 // vsetivli zero, 2, e8, m1, ta, ma
20805 // vmv.v.i v8, 4
20806 // vse64.v v8, (a0)
20807 // ->
20808 // li a1, 1028
20809 // sh a1, 0(a0)
20810 if (DCI.isBeforeLegalize() && IsScalarizable &&
20812 // Get the constant vector bits
20813 APInt NewC(Val.getValueSizeInBits(), 0);
20814 uint64_t EltSize = Val.getScalarValueSizeInBits();
20815 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
20816 if (Val.getOperand(i).isUndef())
20817 continue;
20818 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
20819 i * EltSize);
20820 }
20821 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20822
20823 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
20824 true) <= 2 &&
20826 NewVT, *Store->getMemOperand())) {
20827 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
20828 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
20829 Store->getPointerInfo(), Store->getBaseAlign(),
20830 Store->getMemOperand()->getFlags());
20831 }
20832 }
20833
20834 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
20835 // vsetivli zero, 2, e16, m1, ta, ma
20836 // vle16.v v8, (a0)
20837 // vse16.v v8, (a1)
20838 if (auto *L = dyn_cast<LoadSDNode>(Val);
20839 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
20840 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
20841 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
20842 L->getMemoryVT() == MemVT) {
20843 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20845 NewVT, *Store->getMemOperand()) &&
20847 NewVT, *L->getMemOperand())) {
20848 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
20849 L->getPointerInfo(), L->getBaseAlign(),
20850 L->getMemOperand()->getFlags());
20851 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
20852 Store->getPointerInfo(), Store->getBaseAlign(),
20853 Store->getMemOperand()->getFlags());
20854 }
20855 }
20856
20857 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
20858 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
20859 // any illegal types.
20860 if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
20861 (DCI.isAfterLegalizeDAG() &&
20863 isNullConstant(Val.getOperand(1)))) &&
20864 Val.hasOneUse()) {
20865 SDValue Src = Val.getOperand(0);
20866 MVT VecVT = Src.getSimpleValueType();
20867 // VecVT should be scalable and memory VT should match the element type.
20868 if (!Store->isIndexed() && VecVT.isScalableVector() &&
20869 MemVT == VecVT.getVectorElementType()) {
20870 SDLoc DL(N);
20871 MVT MaskVT = getMaskTypeFor(VecVT);
20872 return DAG.getStoreVP(
20873 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
20874 DAG.getConstant(1, DL, MaskVT),
20875 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
20876 Store->getMemOperand(), Store->getAddressingMode(),
20877 Store->isTruncatingStore(), /*IsCompress*/ false);
20878 }
20879 }
20880
20881 break;
20882 }
20883 case ISD::SPLAT_VECTOR: {
20884 EVT VT = N->getValueType(0);
20885 // Only perform this combine on legal MVT types.
20886 if (!isTypeLegal(VT))
20887 break;
20888 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
20889 DAG, Subtarget))
20890 return Gather;
20891 break;
20892 }
20893 case ISD::BUILD_VECTOR:
20894 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
20895 return V;
20896 break;
20898 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
20899 return V;
20900 break;
20902 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
20903 return V;
20904 break;
20906 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
20907 return V;
20908 break;
20909 case RISCVISD::VFMV_V_F_VL: {
20910 const MVT VT = N->getSimpleValueType(0);
20911 SDValue Passthru = N->getOperand(0);
20912 SDValue Scalar = N->getOperand(1);
20913 SDValue VL = N->getOperand(2);
20914
20915 // If VL is 1, we can use vfmv.s.f.
20916 if (isOneConstant(VL))
20917 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
20918 break;
20919 }
20920 case RISCVISD::VMV_V_X_VL: {
20921 const MVT VT = N->getSimpleValueType(0);
20922 SDValue Passthru = N->getOperand(0);
20923 SDValue Scalar = N->getOperand(1);
20924 SDValue VL = N->getOperand(2);
20925
20926 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
20927 // scalar input.
20928 unsigned ScalarSize = Scalar.getValueSizeInBits();
20929 unsigned EltWidth = VT.getScalarSizeInBits();
20930 if (ScalarSize > EltWidth && Passthru.isUndef())
20931 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
20932 return SDValue(N, 0);
20933
20934 // If VL is 1 and the scalar value won't benefit from immediate, we can
20935 // use vmv.s.x.
20937 if (isOneConstant(VL) &&
20938 (!Const || Const->isZero() ||
20939 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
20940 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
20941
20942 break;
20943 }
20944 case RISCVISD::VFMV_S_F_VL: {
20945 SDValue Src = N->getOperand(1);
20946 // Try to remove vector->scalar->vector if the scalar->vector is inserting
20947 // into an undef vector.
20948 // TODO: Could use a vslide or vmv.v.v for non-undef.
20949 if (N->getOperand(0).isUndef() &&
20950 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20951 isNullConstant(Src.getOperand(1)) &&
20952 Src.getOperand(0).getValueType().isScalableVector()) {
20953 EVT VT = N->getValueType(0);
20954 SDValue EVSrc = Src.getOperand(0);
20955 EVT EVSrcVT = EVSrc.getValueType();
20957 // Widths match, just return the original vector.
20958 if (EVSrcVT == VT)
20959 return EVSrc;
20960 SDLoc DL(N);
20961 // Width is narrower, using insert_subvector.
20962 if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
20963 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
20964 EVSrc,
20965 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
20966 }
20967 // Width is wider, using extract_subvector.
20968 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
20969 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
20970 }
20971 [[fallthrough]];
20972 }
20973 case RISCVISD::VMV_S_X_VL: {
20974 const MVT VT = N->getSimpleValueType(0);
20975 SDValue Passthru = N->getOperand(0);
20976 SDValue Scalar = N->getOperand(1);
20977 SDValue VL = N->getOperand(2);
20978
20979 // The vmv.s.x instruction copies the scalar integer register to element 0
20980 // of the destination vector register. If SEW < XLEN, the least-significant
20981 // bits are copied and the upper XLEN-SEW bits are ignored.
20982 unsigned ScalarSize = Scalar.getValueSizeInBits();
20983 unsigned EltWidth = VT.getScalarSizeInBits();
20984 if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
20985 return SDValue(N, 0);
20986
20987 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
20988 Scalar.getOperand(0).getValueType() == N->getValueType(0))
20989 return Scalar.getOperand(0);
20990
20991 // Use M1 or smaller to avoid over constraining register allocation
20992 const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
20993 if (M1VT.bitsLT(VT)) {
20994 SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);
20995 SDValue Result =
20996 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
20997 Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);
20998 return Result;
20999 }
21000
21001 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
21002 // higher would involve overly constraining the register allocator for
21003 // no purpose.
21004 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
21005 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
21006 VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
21007 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
21008
21009 break;
21010 }
21011 case RISCVISD::VMV_X_S: {
21012 SDValue Vec = N->getOperand(0);
21013 MVT VecVT = N->getOperand(0).getSimpleValueType();
21014 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
21015 if (M1VT.bitsLT(VecVT)) {
21016 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);
21017 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
21018 }
21019 break;
21020 }
21024 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
21025 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
21026 switch (IntNo) {
21027 // By default we do not combine any intrinsic.
21028 default:
21029 return SDValue();
21030 case Intrinsic::riscv_vcpop:
21031 case Intrinsic::riscv_vcpop_mask:
21032 case Intrinsic::riscv_vfirst:
21033 case Intrinsic::riscv_vfirst_mask: {
21034 SDValue VL = N->getOperand(2);
21035 if (IntNo == Intrinsic::riscv_vcpop_mask ||
21036 IntNo == Intrinsic::riscv_vfirst_mask)
21037 VL = N->getOperand(3);
21038 if (!isNullConstant(VL))
21039 return SDValue();
21040 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
21041 SDLoc DL(N);
21042 EVT VT = N->getValueType(0);
21043 if (IntNo == Intrinsic::riscv_vfirst ||
21044 IntNo == Intrinsic::riscv_vfirst_mask)
21045 return DAG.getAllOnesConstant(DL, VT);
21046 return DAG.getConstant(0, DL, VT);
21047 }
21048 case Intrinsic::riscv_vsseg2_mask:
21049 case Intrinsic::riscv_vsseg3_mask:
21050 case Intrinsic::riscv_vsseg4_mask:
21051 case Intrinsic::riscv_vsseg5_mask:
21052 case Intrinsic::riscv_vsseg6_mask:
21053 case Intrinsic::riscv_vsseg7_mask:
21054 case Intrinsic::riscv_vsseg8_mask: {
21055 SDValue Tuple = N->getOperand(2);
21056 unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21057
21058 if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
21059 Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
21060 !Tuple.getOperand(0).isUndef())
21061 return SDValue();
21062
21063 SDValue Val = Tuple.getOperand(1);
21064 unsigned Idx = Tuple.getConstantOperandVal(2);
21065
21066 unsigned SEW = Val.getValueType().getScalarSizeInBits();
21067 assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
21068 "Type mismatch without bitcast?");
21069 unsigned Stride = SEW / 8 * NF;
21070 unsigned Offset = SEW / 8 * Idx;
21071
21072 SDValue Ops[] = {
21073 /*Chain=*/N->getOperand(0),
21074 /*IntID=*/
21075 DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
21076 /*StoredVal=*/Val,
21077 /*Ptr=*/
21078 DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
21079 DAG.getConstant(Offset, DL, XLenVT)),
21080 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21081 /*Mask=*/N->getOperand(4),
21082 /*VL=*/N->getOperand(5)};
21083
21084 auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
21085 // Match getTgtMemIntrinsic for non-unit stride case
21086 EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
21089 OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21090
21091 SDVTList VTs = DAG.getVTList(MVT::Other);
21092 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
21093 MMO);
21094 }
21095 }
21096 }
21097 case ISD::EXPERIMENTAL_VP_REVERSE:
21098 return performVP_REVERSECombine(N, DAG, Subtarget);
21099 case ISD::VP_STORE:
21100 return performVP_STORECombine(N, DAG, Subtarget);
21101 case ISD::BITCAST: {
21102 assert(Subtarget.useRVVForFixedLengthVectors());
21103 SDValue N0 = N->getOperand(0);
21104 EVT VT = N->getValueType(0);
21105 EVT SrcVT = N0.getValueType();
21106 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
21107 unsigned NF = VT.getRISCVVectorTupleNumFields();
21108 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
21109 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
21110 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
21111
21112 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
21113
21114 SDValue Result = DAG.getUNDEF(VT);
21115 for (unsigned i = 0; i < NF; ++i)
21116 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
21117 DAG.getTargetConstant(i, DL, MVT::i32));
21118 return Result;
21119 }
21120 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
21121 // type, widen both sides to avoid a trip through memory.
21122 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
21123 VT.isScalarInteger()) {
21124 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
21125 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
21126 Ops[0] = N0;
21127 SDLoc DL(N);
21128 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
21129 N0 = DAG.getBitcast(MVT::i8, N0);
21130 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
21131 }
21132
21133 return SDValue();
21134 }
21135 case ISD::VECREDUCE_ADD:
21136 if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))
21137 return V;
21138 [[fallthrough]];
21139 case ISD::CTPOP:
21140 if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
21141 return V;
21142 break;
21143 case RISCVISD::VRGATHER_VX_VL: {
21144 // Note this assumes that out of bounds indices produce poison
21145 // and can thus be replaced without having to prove them inbounds..
21146 EVT VT = N->getValueType(0);
21147 SDValue Src = N->getOperand(0);
21148 SDValue Idx = N->getOperand(1);
21149 SDValue Passthru = N->getOperand(2);
21150 SDValue VL = N->getOperand(4);
21151
21152 // Warning: Unlike most cases we strip an insert_subvector, this one
21153 // does not require the first operand to be undef.
21154 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
21155 isNullConstant(Src.getOperand(2)))
21156 Src = Src.getOperand(1);
21157
21158 switch (Src.getOpcode()) {
21159 default:
21160 break;
21161 case RISCVISD::VMV_V_X_VL:
21162 case RISCVISD::VFMV_V_F_VL:
21163 // Drop a redundant vrgather_vx.
21164 // TODO: Remove the type restriction if we find a motivating
21165 // test case?
21166 if (Passthru.isUndef() && VL == Src.getOperand(2) &&
21167 Src.getValueType() == VT)
21168 return Src;
21169 break;
21170 case RISCVISD::VMV_S_X_VL:
21171 case RISCVISD::VFMV_S_F_VL:
21172 // If this use only demands lane zero from the source vmv.s.x, and
21173 // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
21174 // a vmv.v.x. Note that there can be other uses of the original
21175 // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
21176 if (isNullConstant(Idx) && Passthru.isUndef() &&
21177 VL == Src.getOperand(2)) {
21178 unsigned Opc =
21179 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
21180 return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
21181 VL);
21182 }
21183 break;
21184 }
21185 break;
21186 }
21187 case RISCVISD::TUPLE_EXTRACT: {
21188 EVT VT = N->getValueType(0);
21189 SDValue Tuple = N->getOperand(0);
21190 unsigned Idx = N->getConstantOperandVal(1);
21191 if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
21192 break;
21193
21194 unsigned NF = 0;
21195 switch (Tuple.getConstantOperandVal(1)) {
21196 default:
21197 break;
21198 case Intrinsic::riscv_vlseg2_mask:
21199 case Intrinsic::riscv_vlseg3_mask:
21200 case Intrinsic::riscv_vlseg4_mask:
21201 case Intrinsic::riscv_vlseg5_mask:
21202 case Intrinsic::riscv_vlseg6_mask:
21203 case Intrinsic::riscv_vlseg7_mask:
21204 case Intrinsic::riscv_vlseg8_mask:
21205 NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21206 break;
21207 }
21208
21209 if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
21210 break;
21211
21212 unsigned SEW = VT.getScalarSizeInBits();
21213 assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
21214 "Type mismatch without bitcast?");
21215 unsigned Stride = SEW / 8 * NF;
21216 unsigned Offset = SEW / 8 * Idx;
21217
21218 SDValue Ops[] = {
21219 /*Chain=*/Tuple.getOperand(0),
21220 /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
21221 /*Passthru=*/Tuple.getOperand(2),
21222 /*Ptr=*/
21223 DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
21224 DAG.getConstant(Offset, DL, XLenVT)),
21225 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21226 /*Mask=*/Tuple.getOperand(4),
21227 /*VL=*/Tuple.getOperand(5),
21228 /*Policy=*/Tuple.getOperand(6)};
21229
21230 auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
21231 // Match getTgtMemIntrinsic for non-unit stride case
21232 EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
21235 TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21236
21237 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
21239 Ops, MemVT, MMO);
21240 DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
21241 return Result.getValue(0);
21242 }
21243 case RISCVISD::TUPLE_INSERT: {
21244 // tuple_insert tuple, undef, idx -> tuple
21245 if (N->getOperand(1).isUndef())
21246 return N->getOperand(0);
21247 break;
21248 }
21249 case RISCVISD::VSLIDE1UP_VL:
21250 case RISCVISD::VFSLIDE1UP_VL: {
21251 using namespace SDPatternMatch;
21252 SDValue SrcVec;
21253 SDLoc DL(N);
21254 MVT VT = N->getSimpleValueType(0);
21255 // If the scalar we're sliding in was extracted from the first element of a
21256 // vector, we can use that vector as the passthru in a normal slideup of 1.
21257 // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
21258 if (!N->getOperand(0).isUndef() ||
21259 !sd_match(N->getOperand(2),
21260 m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
21261 m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
21262 break;
21263
21264 MVT SrcVecVT = SrcVec.getSimpleValueType();
21265 if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
21266 break;
21267 // Adapt the value type of source vector.
21268 if (SrcVecVT.isFixedLengthVector()) {
21269 SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
21270 SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
21271 }
21273 SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
21274 else
21275 SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
21276
21277 return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
21278 DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
21279 N->getOperand(4));
21280 }
21281 }
21282
21283 return SDValue();
21284}
21285
21287 EVT XVT, unsigned KeptBits) const {
21288 // For vectors, we don't have a preference..
21289 if (XVT.isVector())
21290 return false;
21291
21292 if (XVT != MVT::i32 && XVT != MVT::i64)
21293 return false;
21294
21295 // We can use sext.w for RV64 or an srai 31 on RV32.
21296 if (KeptBits == 32 || KeptBits == 64)
21297 return true;
21298
21299 // With Zbb we can use sext.h/sext.b.
21300 return Subtarget.hasStdExtZbb() &&
21301 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
21302 KeptBits == 16);
21303}
21304
21306 const SDNode *N, CombineLevel Level) const {
21307 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
21308 N->getOpcode() == ISD::SRL) &&
21309 "Expected shift op");
21310
21311 // The following folds are only desirable if `(OP _, c1 << c2)` can be
21312 // materialised in fewer instructions than `(OP _, c1)`:
21313 //
21314 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
21315 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
21316 SDValue N0 = N->getOperand(0);
21317 EVT Ty = N0.getValueType();
21318
21319 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
21320 // LD/ST, it can still complete the folding optimization operation performed
21321 // above.
21322 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
21323 for (SDNode *Use : X->users()) {
21324 // This use is the one we're on right now. Skip it
21325 if (Use == User || Use->getOpcode() == ISD::SELECT)
21326 continue;
21328 return false;
21329 }
21330 return true;
21331 };
21332
21333 if (Ty.isScalarInteger() &&
21334 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
21335 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
21336 return isUsedByLdSt(N0.getNode(), N);
21337
21338 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21339 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
21340
21341 // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
21342 if (C2 && Subtarget.hasShlAdd(C2->getZExtValue()) && N->hasOneUse() &&
21343 N->user_begin()->getOpcode() == ISD::ADD &&
21344 !isUsedByLdSt(*N->user_begin(), nullptr) &&
21345 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
21346 return false;
21347
21348 if (C1 && C2) {
21349 const APInt &C1Int = C1->getAPIntValue();
21350 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
21351
21352 // We can materialise `c1 << c2` into an add immediate, so it's "free",
21353 // and the combine should happen, to potentially allow further combines
21354 // later.
21355 if (ShiftedC1Int.getSignificantBits() <= 64 &&
21356 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
21357 return true;
21358
21359 // We can materialise `c1` in an add immediate, so it's "free", and the
21360 // combine should be prevented.
21361 if (C1Int.getSignificantBits() <= 64 &&
21363 return false;
21364
21365 // Neither constant will fit into an immediate, so find materialisation
21366 // costs.
21367 int C1Cost =
21368 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
21369 /*CompressionCost*/ true);
21370 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
21371 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
21372 /*CompressionCost*/ true);
21373
21374 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
21375 // combine should be prevented.
21376 if (C1Cost < ShiftedC1Cost)
21377 return false;
21378 }
21379 }
21380
21381 if (!N0->hasOneUse())
21382 return false;
21383
21384 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
21385 N0->getOperand(0)->getOpcode() == ISD::ADD &&
21386 !N0->getOperand(0)->hasOneUse())
21387 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
21388
21389 return true;
21390}
21391
21393 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
21394 TargetLoweringOpt &TLO) const {
21395 // Delay this optimization as late as possible.
21396 if (!TLO.LegalOps)
21397 return false;
21398
21399 EVT VT = Op.getValueType();
21400 if (VT.isVector())
21401 return false;
21402
21403 unsigned Opcode = Op.getOpcode();
21404 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
21405 return false;
21406
21407 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
21408 if (!C)
21409 return false;
21410
21411 const APInt &Mask = C->getAPIntValue();
21412
21413 // Clear all non-demanded bits initially.
21414 APInt ShrunkMask = Mask & DemandedBits;
21415
21416 // Try to make a smaller immediate by setting undemanded bits.
21417
21418 APInt ExpandedMask = Mask | ~DemandedBits;
21419
21420 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
21421 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
21422 };
21423 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
21424 if (NewMask == Mask)
21425 return true;
21426 SDLoc DL(Op);
21427 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
21428 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
21429 Op.getOperand(0), NewC);
21430 return TLO.CombineTo(Op, NewOp);
21431 };
21432
21433 // If the shrunk mask fits in sign extended 12 bits, let the target
21434 // independent code apply it.
21435 if (ShrunkMask.isSignedIntN(12))
21436 return false;
21437
21438 // And has a few special cases for zext.
21439 if (Opcode == ISD::AND) {
21440 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
21441 // otherwise use SLLI + SRLI.
21442 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
21443 if (IsLegalMask(NewMask))
21444 return UseMask(NewMask);
21445
21446 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
21447 if (VT == MVT::i64) {
21448 APInt NewMask = APInt(64, 0xffffffff);
21449 if (IsLegalMask(NewMask))
21450 return UseMask(NewMask);
21451 }
21452 }
21453
21454 // For the remaining optimizations, we need to be able to make a negative
21455 // number through a combination of mask and undemanded bits.
21456 if (!ExpandedMask.isNegative())
21457 return false;
21458
21459 // What is the fewest number of bits we need to represent the negative number.
21460 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
21461
21462 // Try to make a 12 bit negative immediate. If that fails try to make a 32
21463 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
21464 // If we can't create a simm12, we shouldn't change opaque constants.
21465 APInt NewMask = ShrunkMask;
21466 if (MinSignedBits <= 12)
21467 NewMask.setBitsFrom(11);
21468 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
21469 NewMask.setBitsFrom(31);
21470 else
21471 return false;
21472
21473 // Check that our new mask is a subset of the demanded mask.
21474 assert(IsLegalMask(NewMask));
21475 return UseMask(NewMask);
21476}
21477
21478static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
21479 static const uint64_t GREVMasks[] = {
21480 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
21481 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
21482
21483 for (unsigned Stage = 0; Stage != 6; ++Stage) {
21484 unsigned Shift = 1 << Stage;
21485 if (ShAmt & Shift) {
21486 uint64_t Mask = GREVMasks[Stage];
21487 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
21488 if (IsGORC)
21489 Res |= x;
21490 x = Res;
21491 }
21492 }
21493
21494 return x;
21495}
21496
21498 KnownBits &Known,
21499 const APInt &DemandedElts,
21500 const SelectionDAG &DAG,
21501 unsigned Depth) const {
21502 unsigned BitWidth = Known.getBitWidth();
21503 unsigned Opc = Op.getOpcode();
21508 "Should use MaskedValueIsZero if you don't know whether Op"
21509 " is a target node!");
21510
21511 Known.resetAll();
21512 switch (Opc) {
21513 default: break;
21514 case RISCVISD::SELECT_CC: {
21515 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
21516 // If we don't know any bits, early out.
21517 if (Known.isUnknown())
21518 break;
21519 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
21520
21521 // Only known if known in both the LHS and RHS.
21522 Known = Known.intersectWith(Known2);
21523 break;
21524 }
21525 case RISCVISD::VCPOP_VL: {
21526 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
21527 Known.Zero.setBitsFrom(Known2.countMaxActiveBits());
21528 break;
21529 }
21530 case RISCVISD::CZERO_EQZ:
21531 case RISCVISD::CZERO_NEZ:
21532 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21533 // Result is either all zero or operand 0. We can propagate zeros, but not
21534 // ones.
21535 Known.One.clearAllBits();
21536 break;
21537 case RISCVISD::REMUW: {
21538 KnownBits Known2;
21539 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21540 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21541 // We only care about the lower 32 bits.
21542 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
21543 // Restore the original width by sign extending.
21544 Known = Known.sext(BitWidth);
21545 break;
21546 }
21547 case RISCVISD::DIVUW: {
21548 KnownBits Known2;
21549 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21550 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21551 // We only care about the lower 32 bits.
21552 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
21553 // Restore the original width by sign extending.
21554 Known = Known.sext(BitWidth);
21555 break;
21556 }
21557 case RISCVISD::SLLW: {
21558 KnownBits Known2;
21559 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21560 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21561 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
21562 // Restore the original width by sign extending.
21563 Known = Known.sext(BitWidth);
21564 break;
21565 }
21566 case RISCVISD::SRLW: {
21567 KnownBits Known2;
21568 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21569 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21570 Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32));
21571 // Restore the original width by sign extending.
21572 Known = Known.sext(BitWidth);
21573 break;
21574 }
21575 case RISCVISD::SRAW: {
21576 KnownBits Known2;
21577 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21578 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21579 Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
21580 // Restore the original width by sign extending.
21581 Known = Known.sext(BitWidth);
21582 break;
21583 }
21584 case RISCVISD::SHL_ADD: {
21585 KnownBits Known2;
21586 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21587 unsigned ShAmt = Op.getConstantOperandVal(1);
21588 Known <<= ShAmt;
21589 Known.Zero.setLowBits(ShAmt); // the <<= operator left these bits unknown
21590 Known2 = DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
21591 Known = KnownBits::add(Known, Known2);
21592 break;
21593 }
21594 case RISCVISD::CTZW: {
21595 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21596 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
21597 unsigned LowBits = llvm::bit_width(PossibleTZ);
21598 Known.Zero.setBitsFrom(LowBits);
21599 break;
21600 }
21601 case RISCVISD::CLZW: {
21602 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21603 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
21604 unsigned LowBits = llvm::bit_width(PossibleLZ);
21605 Known.Zero.setBitsFrom(LowBits);
21606 break;
21607 }
21608 case RISCVISD::BREV8:
21609 case RISCVISD::ORC_B: {
21610 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
21611 // control value of 7 is equivalent to brev8 and orc.b.
21612 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21613 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21614 // To compute zeros for ORC_B, we need to invert the value and invert it
21615 // back after. This inverting is harmless for BREV8.
21616 Known.Zero =
21617 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
21618 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
21619 break;
21620 }
21621 case RISCVISD::READ_VLENB: {
21622 // We can use the minimum and maximum VLEN values to bound VLENB. We
21623 // know VLEN must be a power of two.
21624 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
21625 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
21626 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
21627 Known.Zero.setLowBits(Log2_32(MinVLenB));
21628 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
21629 if (MaxVLenB == MinVLenB)
21630 Known.One.setBit(Log2_32(MinVLenB));
21631 break;
21632 }
21633 case RISCVISD::FCLASS: {
21634 // fclass will only set one of the low 10 bits.
21635 Known.Zero.setBitsFrom(10);
21636 break;
21637 }
21640 unsigned IntNo =
21641 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
21642 switch (IntNo) {
21643 default:
21644 // We can't do anything for most intrinsics.
21645 break;
21646 case Intrinsic::riscv_vsetvli:
21647 case Intrinsic::riscv_vsetvlimax: {
21648 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
21649 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
21650 RISCVVType::VLMUL VLMUL =
21651 static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
21652 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
21653 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
21654 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
21655 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
21656
21657 // Result of vsetvli must be not larger than AVL.
21658 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
21659 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
21660
21661 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
21662 if (BitWidth > KnownZeroFirstBit)
21663 Known.Zero.setBitsFrom(KnownZeroFirstBit);
21664 break;
21665 }
21666 }
21667 break;
21668 }
21669 }
21670}
21671
21673 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21674 unsigned Depth) const {
21675 switch (Op.getOpcode()) {
21676 default:
21677 break;
21678 case RISCVISD::SELECT_CC: {
21679 unsigned Tmp =
21680 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
21681 if (Tmp == 1) return 1; // Early out.
21682 unsigned Tmp2 =
21683 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
21684 return std::min(Tmp, Tmp2);
21685 }
21686 case RISCVISD::CZERO_EQZ:
21687 case RISCVISD::CZERO_NEZ:
21688 // Output is either all zero or operand 0. We can propagate sign bit count
21689 // from operand 0.
21690 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21691 case RISCVISD::ABSW: {
21692 // We expand this at isel to negw+max. The result will have 33 sign bits
21693 // if the input has at least 33 sign bits.
21694 unsigned Tmp =
21695 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21696 if (Tmp < 33) return 1;
21697 return 33;
21698 }
21699 case RISCVISD::SRAW: {
21700 unsigned Tmp =
21701 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21702 // sraw produces at least 33 sign bits. If the input already has more than
21703 // 33 sign bits sraw, will preserve them.
21704 // TODO: A more precise answer could be calculated depending on known bits
21705 // in the shift amount.
21706 return std::max(Tmp, 33U);
21707 }
21708 case RISCVISD::SLLW:
21709 case RISCVISD::SRLW:
21710 case RISCVISD::DIVW:
21711 case RISCVISD::DIVUW:
21712 case RISCVISD::REMUW:
21713 case RISCVISD::ROLW:
21714 case RISCVISD::RORW:
21715 case RISCVISD::FCVT_W_RV64:
21716 case RISCVISD::FCVT_WU_RV64:
21717 case RISCVISD::STRICT_FCVT_W_RV64:
21718 case RISCVISD::STRICT_FCVT_WU_RV64:
21719 // TODO: As the result is sign-extended, this is conservatively correct.
21720 return 33;
21721 case RISCVISD::VMV_X_S: {
21722 // The number of sign bits of the scalar result is computed by obtaining the
21723 // element type of the input vector operand, subtracting its width from the
21724 // XLEN, and then adding one (sign bit within the element type). If the
21725 // element type is wider than XLen, the least-significant XLEN bits are
21726 // taken.
21727 unsigned XLen = Subtarget.getXLen();
21728 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
21729 if (EltBits <= XLen)
21730 return XLen - EltBits + 1;
21731 break;
21732 }
21734 unsigned IntNo = Op.getConstantOperandVal(1);
21735 switch (IntNo) {
21736 default:
21737 break;
21738 case Intrinsic::riscv_masked_atomicrmw_xchg:
21739 case Intrinsic::riscv_masked_atomicrmw_add:
21740 case Intrinsic::riscv_masked_atomicrmw_sub:
21741 case Intrinsic::riscv_masked_atomicrmw_nand:
21742 case Intrinsic::riscv_masked_atomicrmw_max:
21743 case Intrinsic::riscv_masked_atomicrmw_min:
21744 case Intrinsic::riscv_masked_atomicrmw_umax:
21745 case Intrinsic::riscv_masked_atomicrmw_umin:
21746 case Intrinsic::riscv_masked_cmpxchg:
21747 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
21748 // narrow atomic operation. These are implemented using atomic
21749 // operations at the minimum supported atomicrmw/cmpxchg width whose
21750 // result is then sign extended to XLEN. With +A, the minimum width is
21751 // 32 for both 64 and 32.
21753 assert(Subtarget.hasStdExtA());
21754 return Op.getValueSizeInBits() - 31;
21755 }
21756 break;
21757 }
21758 }
21759
21760 return 1;
21761}
21762
21764 SDValue Op, const APInt &OriginalDemandedBits,
21765 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
21766 unsigned Depth) const {
21767 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
21768
21769 switch (Op.getOpcode()) {
21770 case RISCVISD::BREV8:
21771 case RISCVISD::ORC_B: {
21772 KnownBits Known2;
21773 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21774 // For BREV8, we need to do BREV8 on the demanded bits.
21775 // For ORC_B, any bit in the output demandeds all bits from the same byte.
21776 // So we need to do ORC_B on the demanded bits.
21778 APInt(BitWidth, computeGREVOrGORC(OriginalDemandedBits.getZExtValue(),
21779 7, IsGORC));
21780 if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits,
21781 OriginalDemandedElts, Known2, TLO, Depth + 1))
21782 return true;
21783
21784 // To compute zeros for ORC_B, we need to invert the value and invert it
21785 // back after. This inverting is harmless for BREV8.
21786 Known.Zero = ~computeGREVOrGORC(~Known2.Zero.getZExtValue(), 7, IsGORC);
21787 Known.One = computeGREVOrGORC(Known2.One.getZExtValue(), 7, IsGORC);
21788 return false;
21789 }
21790 }
21791
21793 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
21794}
21795
21797 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21798 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
21799
21800 // TODO: Add more target nodes.
21801 switch (Op.getOpcode()) {
21802 case RISCVISD::SLLW:
21803 case RISCVISD::SRAW:
21804 case RISCVISD::SRLW:
21805 case RISCVISD::RORW:
21806 case RISCVISD::ROLW:
21807 // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
21808 // amount is bounds.
21809 return false;
21810 case RISCVISD::SELECT_CC:
21811 // Integer comparisons cannot create poison.
21812 assert(Op.getOperand(0).getValueType().isInteger() &&
21813 "RISCVISD::SELECT_CC only compares integers");
21814 return false;
21815 }
21817 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
21818}
21819
21820const Constant *
21822 assert(Ld && "Unexpected null LoadSDNode");
21823 if (!ISD::isNormalLoad(Ld))
21824 return nullptr;
21825
21826 SDValue Ptr = Ld->getBasePtr();
21827
21828 // Only constant pools with no offset are supported.
21829 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
21830 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
21831 if (!CNode || CNode->isMachineConstantPoolEntry() ||
21832 CNode->getOffset() != 0)
21833 return nullptr;
21834
21835 return CNode;
21836 };
21837
21838 // Simple case, LLA.
21839 if (Ptr.getOpcode() == RISCVISD::LLA) {
21840 auto *CNode = GetSupportedConstantPool(Ptr.getOperand(0));
21841 if (!CNode || CNode->getTargetFlags() != 0)
21842 return nullptr;
21843
21844 return CNode->getConstVal();
21845 }
21846
21847 // Look for a HI and ADD_LO pair.
21848 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
21849 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
21850 return nullptr;
21851
21852 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
21853 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
21854
21855 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
21856 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
21857 return nullptr;
21858
21859 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
21860 return nullptr;
21861
21862 return CNodeLo->getConstVal();
21863}
21864
21866 MachineBasicBlock *BB) {
21867 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
21868
21869 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
21870 // Should the count have wrapped while it was being read, we need to try
21871 // again.
21872 // For example:
21873 // ```
21874 // read:
21875 // csrrs x3, counterh # load high word of counter
21876 // csrrs x2, counter # load low word of counter
21877 // csrrs x4, counterh # load high word of counter
21878 // bne x3, x4, read # check if high word reads match, otherwise try again
21879 // ```
21880
21881 MachineFunction &MF = *BB->getParent();
21882 const BasicBlock *LLVMBB = BB->getBasicBlock();
21884
21885 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
21886 MF.insert(It, LoopMBB);
21887
21888 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
21889 MF.insert(It, DoneMBB);
21890
21891 // Transfer the remainder of BB and its successor edges to DoneMBB.
21892 DoneMBB->splice(DoneMBB->begin(), BB,
21893 std::next(MachineBasicBlock::iterator(MI)), BB->end());
21895
21896 BB->addSuccessor(LoopMBB);
21897
21899 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
21900 Register LoReg = MI.getOperand(0).getReg();
21901 Register HiReg = MI.getOperand(1).getReg();
21902 int64_t LoCounter = MI.getOperand(2).getImm();
21903 int64_t HiCounter = MI.getOperand(3).getImm();
21904 DebugLoc DL = MI.getDebugLoc();
21905
21907 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
21908 .addImm(HiCounter)
21909 .addReg(RISCV::X0);
21910 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
21911 .addImm(LoCounter)
21912 .addReg(RISCV::X0);
21913 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
21914 .addImm(HiCounter)
21915 .addReg(RISCV::X0);
21916
21917 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
21918 .addReg(HiReg)
21919 .addReg(ReadAgainReg)
21920 .addMBB(LoopMBB);
21921
21922 LoopMBB->addSuccessor(LoopMBB);
21923 LoopMBB->addSuccessor(DoneMBB);
21924
21925 MI.eraseFromParent();
21926
21927 return DoneMBB;
21928}
21929
21932 const RISCVSubtarget &Subtarget) {
21933 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
21934
21935 MachineFunction &MF = *BB->getParent();
21936 DebugLoc DL = MI.getDebugLoc();
21939 Register LoReg = MI.getOperand(0).getReg();
21940 Register HiReg = MI.getOperand(1).getReg();
21941 Register SrcReg = MI.getOperand(2).getReg();
21942
21943 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
21944 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21945
21946 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
21947 RI, Register());
21949 MachineMemOperand *MMOLo =
21953 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
21954 .addFrameIndex(FI)
21955 .addImm(0)
21956 .addMemOperand(MMOLo);
21957 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
21958 .addFrameIndex(FI)
21959 .addImm(4)
21960 .addMemOperand(MMOHi);
21961 MI.eraseFromParent(); // The pseudo instruction is gone now.
21962 return BB;
21963}
21964
21967 const RISCVSubtarget &Subtarget) {
21968 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
21969 "Unexpected instruction");
21970
21971 MachineFunction &MF = *BB->getParent();
21972 DebugLoc DL = MI.getDebugLoc();
21975 Register DstReg = MI.getOperand(0).getReg();
21976 Register LoReg = MI.getOperand(1).getReg();
21977 Register HiReg = MI.getOperand(2).getReg();
21978
21979 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
21980 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
21981
21983 MachineMemOperand *MMOLo =
21987 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
21988 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
21989 .addFrameIndex(FI)
21990 .addImm(0)
21991 .addMemOperand(MMOLo);
21992 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
21993 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
21994 .addFrameIndex(FI)
21995 .addImm(4)
21996 .addMemOperand(MMOHi);
21997 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
21998 MI.eraseFromParent(); // The pseudo instruction is gone now.
21999 return BB;
22000}
22001
22003 unsigned RelOpcode, unsigned EqOpcode,
22004 const RISCVSubtarget &Subtarget) {
22005 DebugLoc DL = MI.getDebugLoc();
22006 Register DstReg = MI.getOperand(0).getReg();
22007 Register Src1Reg = MI.getOperand(1).getReg();
22008 Register Src2Reg = MI.getOperand(2).getReg();
22010 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22012
22013 // Save the current FFLAGS.
22014 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
22015
22016 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
22017 .addReg(Src1Reg)
22018 .addReg(Src2Reg);
22021
22022 // Restore the FFLAGS.
22023 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22024 .addReg(SavedFFlags, RegState::Kill);
22025
22026 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
22027 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
22028 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
22029 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
22032
22033 // Erase the pseudoinstruction.
22034 MI.eraseFromParent();
22035 return BB;
22036}
22037
22038static MachineBasicBlock *
22040 MachineBasicBlock *ThisMBB,
22041 const RISCVSubtarget &Subtarget) {
22042 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
22043 // Without this, custom-inserter would have generated:
22044 //
22045 // A
22046 // | \
22047 // | B
22048 // | /
22049 // C
22050 // | \
22051 // | D
22052 // | /
22053 // E
22054 //
22055 // A: X = ...; Y = ...
22056 // B: empty
22057 // C: Z = PHI [X, A], [Y, B]
22058 // D: empty
22059 // E: PHI [X, C], [Z, D]
22060 //
22061 // If we lower both Select_FPRX_ in a single step, we can instead generate:
22062 //
22063 // A
22064 // | \
22065 // | C
22066 // | /|
22067 // |/ |
22068 // | |
22069 // | D
22070 // | /
22071 // E
22072 //
22073 // A: X = ...; Y = ...
22074 // D: empty
22075 // E: PHI [X, A], [X, C], [Y, D]
22076
22077 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22078 const DebugLoc &DL = First.getDebugLoc();
22079 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
22080 MachineFunction *F = ThisMBB->getParent();
22081 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
22082 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
22083 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
22084 MachineFunction::iterator It = ++ThisMBB->getIterator();
22085 F->insert(It, FirstMBB);
22086 F->insert(It, SecondMBB);
22087 F->insert(It, SinkMBB);
22088
22089 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
22090 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
22092 ThisMBB->end());
22093 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
22094
22095 // Fallthrough block for ThisMBB.
22096 ThisMBB->addSuccessor(FirstMBB);
22097 // Fallthrough block for FirstMBB.
22098 FirstMBB->addSuccessor(SecondMBB);
22099 ThisMBB->addSuccessor(SinkMBB);
22100 FirstMBB->addSuccessor(SinkMBB);
22101 // This is fallthrough.
22102 SecondMBB->addSuccessor(SinkMBB);
22103
22104 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
22105 Register FLHS = First.getOperand(1).getReg();
22106 Register FRHS = First.getOperand(2).getReg();
22107 // Insert appropriate branch.
22108 BuildMI(FirstMBB, DL, TII.get(RISCVCC::getBrCond(FirstCC, First.getOpcode())))
22109 .addReg(FLHS)
22110 .addReg(FRHS)
22111 .addMBB(SinkMBB);
22112
22113 Register SLHS = Second.getOperand(1).getReg();
22114 Register SRHS = Second.getOperand(2).getReg();
22115 Register Op1Reg4 = First.getOperand(4).getReg();
22116 Register Op1Reg5 = First.getOperand(5).getReg();
22117
22118 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
22119 // Insert appropriate branch.
22120 BuildMI(ThisMBB, DL,
22121 TII.get(RISCVCC::getBrCond(SecondCC, Second.getOpcode())))
22122 .addReg(SLHS)
22123 .addReg(SRHS)
22124 .addMBB(SinkMBB);
22125
22126 Register DestReg = Second.getOperand(0).getReg();
22127 Register Op2Reg4 = Second.getOperand(4).getReg();
22128 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
22129 .addReg(Op2Reg4)
22130 .addMBB(ThisMBB)
22131 .addReg(Op1Reg4)
22132 .addMBB(FirstMBB)
22133 .addReg(Op1Reg5)
22134 .addMBB(SecondMBB);
22135
22136 // Now remove the Select_FPRX_s.
22137 First.eraseFromParent();
22138 Second.eraseFromParent();
22139 return SinkMBB;
22140}
22141
22144 const RISCVSubtarget &Subtarget) {
22145 // To "insert" Select_* instructions, we actually have to insert the triangle
22146 // control-flow pattern. The incoming instructions know the destination vreg
22147 // to set, the condition code register to branch on, the true/false values to
22148 // select between, and the condcode to use to select the appropriate branch.
22149 //
22150 // We produce the following control flow:
22151 // HeadMBB
22152 // | \
22153 // | IfFalseMBB
22154 // | /
22155 // TailMBB
22156 //
22157 // When we find a sequence of selects we attempt to optimize their emission
22158 // by sharing the control flow. Currently we only handle cases where we have
22159 // multiple selects with the exact same condition (same LHS, RHS and CC).
22160 // The selects may be interleaved with other instructions if the other
22161 // instructions meet some requirements we deem safe:
22162 // - They are not pseudo instructions.
22163 // - They are debug instructions. Otherwise,
22164 // - They do not have side-effects, do not access memory and their inputs do
22165 // not depend on the results of the select pseudo-instructions.
22166 // The TrueV/FalseV operands of the selects cannot depend on the result of
22167 // previous selects in the sequence.
22168 // These conditions could be further relaxed. See the X86 target for a
22169 // related approach and more information.
22170 //
22171 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
22172 // is checked here and handled by a separate function -
22173 // EmitLoweredCascadedSelect.
22174
22175 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
22176 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
22177 MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
22178 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
22179 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
22180 Next->getOperand(5).isKill())
22181 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
22182
22183 Register LHS = MI.getOperand(1).getReg();
22184 Register RHS;
22185 if (MI.getOperand(2).isReg())
22186 RHS = MI.getOperand(2).getReg();
22187 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
22188
22189 SmallVector<MachineInstr *, 4> SelectDebugValues;
22190 SmallSet<Register, 4> SelectDests;
22191 SelectDests.insert(MI.getOperand(0).getReg());
22192
22193 MachineInstr *LastSelectPseudo = &MI;
22194 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
22195 SequenceMBBI != E; ++SequenceMBBI) {
22196 if (SequenceMBBI->isDebugInstr())
22197 continue;
22198 if (RISCVInstrInfo::isSelectPseudo(*SequenceMBBI)) {
22199 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
22200 !SequenceMBBI->getOperand(2).isReg() ||
22201 SequenceMBBI->getOperand(2).getReg() != RHS ||
22202 SequenceMBBI->getOperand(3).getImm() != CC ||
22203 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
22204 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
22205 break;
22206 LastSelectPseudo = &*SequenceMBBI;
22207 SequenceMBBI->collectDebugValues(SelectDebugValues);
22208 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
22209 continue;
22210 }
22211 if (SequenceMBBI->hasUnmodeledSideEffects() ||
22212 SequenceMBBI->mayLoadOrStore() ||
22213 SequenceMBBI->usesCustomInsertionHook())
22214 break;
22215 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
22216 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
22217 }))
22218 break;
22219 }
22220
22221 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22222 const BasicBlock *LLVM_BB = BB->getBasicBlock();
22223 DebugLoc DL = MI.getDebugLoc();
22225
22226 MachineBasicBlock *HeadMBB = BB;
22227 MachineFunction *F = BB->getParent();
22228 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
22229 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
22230
22231 F->insert(I, IfFalseMBB);
22232 F->insert(I, TailMBB);
22233
22234 // Set the call frame size on entry to the new basic blocks.
22235 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
22236 IfFalseMBB->setCallFrameSize(CallFrameSize);
22237 TailMBB->setCallFrameSize(CallFrameSize);
22238
22239 // Transfer debug instructions associated with the selects to TailMBB.
22240 for (MachineInstr *DebugInstr : SelectDebugValues) {
22241 TailMBB->push_back(DebugInstr->removeFromParent());
22242 }
22243
22244 // Move all instructions after the sequence to TailMBB.
22245 TailMBB->splice(TailMBB->end(), HeadMBB,
22246 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
22247 // Update machine-CFG edges by transferring all successors of the current
22248 // block to the new block which will contain the Phi nodes for the selects.
22249 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
22250 // Set the successors for HeadMBB.
22251 HeadMBB->addSuccessor(IfFalseMBB);
22252 HeadMBB->addSuccessor(TailMBB);
22253
22254 // Insert appropriate branch.
22255 if (MI.getOperand(2).isImm())
22256 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22257 .addReg(LHS)
22258 .addImm(MI.getOperand(2).getImm())
22259 .addMBB(TailMBB);
22260 else
22261 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22262 .addReg(LHS)
22263 .addReg(RHS)
22264 .addMBB(TailMBB);
22265
22266 // IfFalseMBB just falls through to TailMBB.
22267 IfFalseMBB->addSuccessor(TailMBB);
22268
22269 // Create PHIs for all of the select pseudo-instructions.
22270 auto SelectMBBI = MI.getIterator();
22271 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
22272 auto InsertionPoint = TailMBB->begin();
22273 while (SelectMBBI != SelectEnd) {
22274 auto Next = std::next(SelectMBBI);
22275 if (RISCVInstrInfo::isSelectPseudo(*SelectMBBI)) {
22276 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
22277 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
22278 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
22279 .addReg(SelectMBBI->getOperand(4).getReg())
22280 .addMBB(HeadMBB)
22281 .addReg(SelectMBBI->getOperand(5).getReg())
22282 .addMBB(IfFalseMBB);
22283 SelectMBBI->eraseFromParent();
22284 }
22285 SelectMBBI = Next;
22286 }
22287
22288 F->getProperties().resetNoPHIs();
22289 return TailMBB;
22290}
22291
22292// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
22293static const RISCV::RISCVMaskedPseudoInfo *
22294lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {
22296 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
22297 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
22299 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
22300 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
22301 return Masked;
22302}
22303
22306 unsigned CVTXOpc) {
22307 DebugLoc DL = MI.getDebugLoc();
22308
22310
22312 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22313
22314 // Save the old value of FFLAGS.
22315 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
22316
22317 assert(MI.getNumOperands() == 7);
22318
22319 // Emit a VFCVT_X_F
22320 const TargetRegisterInfo *TRI =
22322 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
22323 Register Tmp = MRI.createVirtualRegister(RC);
22324 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
22325 .add(MI.getOperand(1))
22326 .add(MI.getOperand(2))
22327 .add(MI.getOperand(3))
22328 .add(MachineOperand::CreateImm(7)) // frm = DYN
22329 .add(MI.getOperand(4))
22330 .add(MI.getOperand(5))
22331 .add(MI.getOperand(6))
22332 .add(MachineOperand::CreateReg(RISCV::FRM,
22333 /*IsDef*/ false,
22334 /*IsImp*/ true));
22335
22336 // Emit a VFCVT_F_X
22337 RISCVVType::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
22338 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
22339 // There is no E8 variant for VFCVT_F_X.
22340 assert(Log2SEW >= 4);
22341 unsigned CVTFOpc =
22342 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
22343 ->MaskedPseudo;
22344
22345 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
22346 .add(MI.getOperand(0))
22347 .add(MI.getOperand(1))
22348 .addReg(Tmp)
22349 .add(MI.getOperand(3))
22350 .add(MachineOperand::CreateImm(7)) // frm = DYN
22351 .add(MI.getOperand(4))
22352 .add(MI.getOperand(5))
22353 .add(MI.getOperand(6))
22354 .add(MachineOperand::CreateReg(RISCV::FRM,
22355 /*IsDef*/ false,
22356 /*IsImp*/ true));
22357
22358 // Restore FFLAGS.
22359 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22360 .addReg(SavedFFLAGS, RegState::Kill);
22361
22362 // Erase the pseudoinstruction.
22363 MI.eraseFromParent();
22364 return BB;
22365}
22366
22368 const RISCVSubtarget &Subtarget) {
22369 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
22370 const TargetRegisterClass *RC;
22371 switch (MI.getOpcode()) {
22372 default:
22373 llvm_unreachable("Unexpected opcode");
22374 case RISCV::PseudoFROUND_H:
22375 CmpOpc = RISCV::FLT_H;
22376 F2IOpc = RISCV::FCVT_W_H;
22377 I2FOpc = RISCV::FCVT_H_W;
22378 FSGNJOpc = RISCV::FSGNJ_H;
22379 FSGNJXOpc = RISCV::FSGNJX_H;
22380 RC = &RISCV::FPR16RegClass;
22381 break;
22382 case RISCV::PseudoFROUND_H_INX:
22383 CmpOpc = RISCV::FLT_H_INX;
22384 F2IOpc = RISCV::FCVT_W_H_INX;
22385 I2FOpc = RISCV::FCVT_H_W_INX;
22386 FSGNJOpc = RISCV::FSGNJ_H_INX;
22387 FSGNJXOpc = RISCV::FSGNJX_H_INX;
22388 RC = &RISCV::GPRF16RegClass;
22389 break;
22390 case RISCV::PseudoFROUND_S:
22391 CmpOpc = RISCV::FLT_S;
22392 F2IOpc = RISCV::FCVT_W_S;
22393 I2FOpc = RISCV::FCVT_S_W;
22394 FSGNJOpc = RISCV::FSGNJ_S;
22395 FSGNJXOpc = RISCV::FSGNJX_S;
22396 RC = &RISCV::FPR32RegClass;
22397 break;
22398 case RISCV::PseudoFROUND_S_INX:
22399 CmpOpc = RISCV::FLT_S_INX;
22400 F2IOpc = RISCV::FCVT_W_S_INX;
22401 I2FOpc = RISCV::FCVT_S_W_INX;
22402 FSGNJOpc = RISCV::FSGNJ_S_INX;
22403 FSGNJXOpc = RISCV::FSGNJX_S_INX;
22404 RC = &RISCV::GPRF32RegClass;
22405 break;
22406 case RISCV::PseudoFROUND_D:
22407 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22408 CmpOpc = RISCV::FLT_D;
22409 F2IOpc = RISCV::FCVT_L_D;
22410 I2FOpc = RISCV::FCVT_D_L;
22411 FSGNJOpc = RISCV::FSGNJ_D;
22412 FSGNJXOpc = RISCV::FSGNJX_D;
22413 RC = &RISCV::FPR64RegClass;
22414 break;
22415 case RISCV::PseudoFROUND_D_INX:
22416 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22417 CmpOpc = RISCV::FLT_D_INX;
22418 F2IOpc = RISCV::FCVT_L_D_INX;
22419 I2FOpc = RISCV::FCVT_D_L_INX;
22420 FSGNJOpc = RISCV::FSGNJ_D_INX;
22421 FSGNJXOpc = RISCV::FSGNJX_D_INX;
22422 RC = &RISCV::GPRRegClass;
22423 break;
22424 }
22425
22426 const BasicBlock *BB = MBB->getBasicBlock();
22427 DebugLoc DL = MI.getDebugLoc();
22428 MachineFunction::iterator I = ++MBB->getIterator();
22429
22430 MachineFunction *F = MBB->getParent();
22431 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
22432 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
22433
22434 F->insert(I, CvtMBB);
22435 F->insert(I, DoneMBB);
22436 // Move all instructions after the sequence to DoneMBB.
22437 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
22438 MBB->end());
22439 // Update machine-CFG edges by transferring all successors of the current
22440 // block to the new block which will contain the Phi nodes for the selects.
22442 // Set the successors for MBB.
22443 MBB->addSuccessor(CvtMBB);
22444 MBB->addSuccessor(DoneMBB);
22445
22446 Register DstReg = MI.getOperand(0).getReg();
22447 Register SrcReg = MI.getOperand(1).getReg();
22448 Register MaxReg = MI.getOperand(2).getReg();
22449 int64_t FRM = MI.getOperand(3).getImm();
22450
22451 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22452 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
22453
22454 Register FabsReg = MRI.createVirtualRegister(RC);
22455 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
22456
22457 // Compare the FP value to the max value.
22458 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22459 auto MIB =
22460 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
22463
22464 // Insert branch.
22465 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
22466 .addReg(CmpReg)
22467 .addReg(RISCV::X0)
22468 .addMBB(DoneMBB);
22469
22470 CvtMBB->addSuccessor(DoneMBB);
22471
22472 // Convert to integer.
22473 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22474 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
22477
22478 // Convert back to FP.
22479 Register I2FReg = MRI.createVirtualRegister(RC);
22480 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
22483
22484 // Restore the sign bit.
22485 Register CvtReg = MRI.createVirtualRegister(RC);
22486 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
22487
22488 // Merge the results.
22489 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
22490 .addReg(SrcReg)
22491 .addMBB(MBB)
22492 .addReg(CvtReg)
22493 .addMBB(CvtMBB);
22494
22495 MI.eraseFromParent();
22496 return DoneMBB;
22497}
22498
22501 MachineBasicBlock *BB) const {
22502 switch (MI.getOpcode()) {
22503 default:
22504 llvm_unreachable("Unexpected instr type to insert");
22505 case RISCV::ReadCounterWide:
22506 assert(!Subtarget.is64Bit() &&
22507 "ReadCounterWide is only to be used on riscv32");
22508 return emitReadCounterWidePseudo(MI, BB);
22509 case RISCV::Select_GPR_Using_CC_GPR:
22510 case RISCV::Select_GPR_Using_CC_Imm5_Zibi:
22511 case RISCV::Select_GPR_Using_CC_SImm5_CV:
22512 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
22513 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
22514 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
22515 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
22516 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
22517 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
22518 case RISCV::Select_FPR16_Using_CC_GPR:
22519 case RISCV::Select_FPR16INX_Using_CC_GPR:
22520 case RISCV::Select_FPR32_Using_CC_GPR:
22521 case RISCV::Select_FPR32INX_Using_CC_GPR:
22522 case RISCV::Select_FPR64_Using_CC_GPR:
22523 case RISCV::Select_FPR64INX_Using_CC_GPR:
22524 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
22525 return emitSelectPseudo(MI, BB, Subtarget);
22526 case RISCV::BuildPairF64Pseudo:
22527 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
22528 case RISCV::SplitF64Pseudo:
22529 return emitSplitF64Pseudo(MI, BB, Subtarget);
22530 case RISCV::PseudoQuietFLE_H:
22531 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
22532 case RISCV::PseudoQuietFLE_H_INX:
22533 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
22534 case RISCV::PseudoQuietFLT_H:
22535 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
22536 case RISCV::PseudoQuietFLT_H_INX:
22537 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
22538 case RISCV::PseudoQuietFLE_S:
22539 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
22540 case RISCV::PseudoQuietFLE_S_INX:
22541 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
22542 case RISCV::PseudoQuietFLT_S:
22543 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
22544 case RISCV::PseudoQuietFLT_S_INX:
22545 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
22546 case RISCV::PseudoQuietFLE_D:
22547 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
22548 case RISCV::PseudoQuietFLE_D_INX:
22549 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
22550 case RISCV::PseudoQuietFLE_D_IN32X:
22551 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
22552 Subtarget);
22553 case RISCV::PseudoQuietFLT_D:
22554 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
22555 case RISCV::PseudoQuietFLT_D_INX:
22556 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
22557 case RISCV::PseudoQuietFLT_D_IN32X:
22558 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
22559 Subtarget);
22560
22561 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
22562 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
22563 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
22564 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
22565 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
22566 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
22567 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
22568 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
22569 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
22570 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
22571 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
22572 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
22573 case RISCV::PseudoFROUND_H:
22574 case RISCV::PseudoFROUND_H_INX:
22575 case RISCV::PseudoFROUND_S:
22576 case RISCV::PseudoFROUND_S_INX:
22577 case RISCV::PseudoFROUND_D:
22578 case RISCV::PseudoFROUND_D_INX:
22579 case RISCV::PseudoFROUND_D_IN32X:
22580 return emitFROUND(MI, BB, Subtarget);
22581 case RISCV::PROBED_STACKALLOC_DYN:
22582 return emitDynamicProbedAlloc(MI, BB);
22583 case TargetOpcode::STATEPOINT:
22584 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
22585 // while jal call instruction (where statepoint will be lowered at the end)
22586 // has implicit def. This def is early-clobber as it will be set at
22587 // the moment of the call and earlier than any use is read.
22588 // Add this implicit dead def here as a workaround.
22589 MI.addOperand(*MI.getMF(),
22591 RISCV::X1, /*isDef*/ true,
22592 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
22593 /*isUndef*/ false, /*isEarlyClobber*/ true));
22594 [[fallthrough]];
22595 case TargetOpcode::STACKMAP:
22596 case TargetOpcode::PATCHPOINT:
22597 if (!Subtarget.is64Bit())
22598 reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "
22599 "supported on 64-bit targets");
22600 return emitPatchPoint(MI, BB);
22601 }
22602}
22603
22605 SDNode *Node) const {
22606 // If instruction defines FRM operand, conservatively set it as non-dead to
22607 // express data dependency with FRM users and prevent incorrect instruction
22608 // reordering.
22609 if (auto *FRMDef = MI.findRegisterDefOperand(RISCV::FRM, /*TRI=*/nullptr)) {
22610 FRMDef->setIsDead(false);
22611 return;
22612 }
22613 // Add FRM dependency to any instructions with dynamic rounding mode.
22614 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
22615 if (Idx < 0) {
22616 // Vector pseudos have FRM index indicated by TSFlags.
22617 Idx = RISCVII::getFRMOpNum(MI.getDesc());
22618 if (Idx < 0)
22619 return;
22620 }
22621 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
22622 return;
22623 // If the instruction already reads FRM, don't add another read.
22624 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
22625 return;
22626 MI.addOperand(
22627 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
22628}
22629
22630void RISCVTargetLowering::analyzeInputArgs(
22631 MachineFunction &MF, CCState &CCInfo,
22632 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
22633 RISCVCCAssignFn Fn) const {
22634 for (const auto &[Idx, In] : enumerate(Ins)) {
22635 MVT ArgVT = In.VT;
22636 ISD::ArgFlagsTy ArgFlags = In.Flags;
22637
22638 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22639 In.OrigTy)) {
22640 LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type "
22641 << ArgVT << '\n');
22642 llvm_unreachable(nullptr);
22643 }
22644 }
22645}
22646
22647void RISCVTargetLowering::analyzeOutputArgs(
22648 MachineFunction &MF, CCState &CCInfo,
22649 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
22650 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
22651 for (const auto &[Idx, Out] : enumerate(Outs)) {
22652 MVT ArgVT = Out.VT;
22653 ISD::ArgFlagsTy ArgFlags = Out.Flags;
22654
22655 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22656 Out.OrigTy)) {
22657 LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type "
22658 << ArgVT << "\n");
22659 llvm_unreachable(nullptr);
22660 }
22661 }
22662}
22663
22664// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
22665// values.
22667 const CCValAssign &VA, const SDLoc &DL,
22668 const RISCVSubtarget &Subtarget) {
22669 if (VA.needsCustom()) {
22670 if (VA.getLocVT().isInteger() &&
22671 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22672 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
22673 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
22674 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
22676 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
22677 llvm_unreachable("Unexpected Custom handling.");
22678 }
22679
22680 switch (VA.getLocInfo()) {
22681 default:
22682 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22683 case CCValAssign::Full:
22684 break;
22685 case CCValAssign::BCvt:
22686 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
22687 break;
22688 }
22689 return Val;
22690}
22691
22692// The caller is responsible for loading the full value if the argument is
22693// passed with CCValAssign::Indirect.
22695 const CCValAssign &VA, const SDLoc &DL,
22696 const ISD::InputArg &In,
22697 const RISCVTargetLowering &TLI) {
22700 EVT LocVT = VA.getLocVT();
22701 SDValue Val;
22702 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
22703 Register VReg = RegInfo.createVirtualRegister(RC);
22704 RegInfo.addLiveIn(VA.getLocReg(), VReg);
22705 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
22706
22707 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
22708 if (In.isOrigArg()) {
22709 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
22710 if (OrigArg->getType()->isIntegerTy()) {
22711 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
22712 // An input zero extended from i31 can also be considered sign extended.
22713 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
22714 (BitWidth < 32 && In.Flags.isZExt())) {
22716 RVFI->addSExt32Register(VReg);
22717 }
22718 }
22719 }
22720
22722 return Val;
22723
22724 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
22725}
22726
22728 const CCValAssign &VA, const SDLoc &DL,
22729 const RISCVSubtarget &Subtarget) {
22730 EVT LocVT = VA.getLocVT();
22731
22732 if (VA.needsCustom()) {
22733 if (LocVT.isInteger() &&
22734 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22735 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
22736 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
22737 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
22738 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
22739 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
22740 llvm_unreachable("Unexpected Custom handling.");
22741 }
22742
22743 switch (VA.getLocInfo()) {
22744 default:
22745 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22746 case CCValAssign::Full:
22747 break;
22748 case CCValAssign::BCvt:
22749 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
22750 break;
22751 }
22752 return Val;
22753}
22754
22755// The caller is responsible for loading the full value if the argument is
22756// passed with CCValAssign::Indirect.
22758 const CCValAssign &VA, const SDLoc &DL) {
22760 MachineFrameInfo &MFI = MF.getFrameInfo();
22761 EVT LocVT = VA.getLocVT();
22762 EVT ValVT = VA.getValVT();
22764 if (VA.getLocInfo() == CCValAssign::Indirect) {
22765 // When the value is a scalable vector, we save the pointer which points to
22766 // the scalable vector value in the stack. The ValVT will be the pointer
22767 // type, instead of the scalable vector type.
22768 ValVT = LocVT;
22769 }
22770 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
22771 /*IsImmutable=*/true);
22772 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22773 SDValue Val;
22774
22776 switch (VA.getLocInfo()) {
22777 default:
22778 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22779 case CCValAssign::Full:
22781 case CCValAssign::BCvt:
22782 break;
22783 }
22784 Val = DAG.getExtLoad(
22785 ExtType, DL, LocVT, Chain, FIN,
22787 return Val;
22788}
22789
22791 const CCValAssign &VA,
22792 const CCValAssign &HiVA,
22793 const SDLoc &DL) {
22794 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
22795 "Unexpected VA");
22797 MachineFrameInfo &MFI = MF.getFrameInfo();
22799
22800 assert(VA.isRegLoc() && "Expected register VA assignment");
22801
22802 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22803 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
22804 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
22805 SDValue Hi;
22806 if (HiVA.isMemLoc()) {
22807 // Second half of f64 is passed on the stack.
22808 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
22809 /*IsImmutable=*/true);
22810 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
22811 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
22813 } else {
22814 // Second half of f64 is passed in another GPR.
22815 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22816 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
22817 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
22818 }
22819 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
22820}
22821
22822// Transform physical registers into virtual registers.
22824 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
22825 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
22826 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
22827
22829
22830 switch (CallConv) {
22831 default:
22832 reportFatalUsageError("Unsupported calling convention");
22833 case CallingConv::C:
22834 case CallingConv::Fast:
22837 case CallingConv::GRAAL:
22839#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:
22840 CC_VLS_CASE(32)
22841 CC_VLS_CASE(64)
22842 CC_VLS_CASE(128)
22843 CC_VLS_CASE(256)
22844 CC_VLS_CASE(512)
22845 CC_VLS_CASE(1024)
22846 CC_VLS_CASE(2048)
22847 CC_VLS_CASE(4096)
22848 CC_VLS_CASE(8192)
22849 CC_VLS_CASE(16384)
22850 CC_VLS_CASE(32768)
22851 CC_VLS_CASE(65536)
22852#undef CC_VLS_CASE
22853 break;
22854 case CallingConv::GHC:
22855 if (Subtarget.hasStdExtE())
22856 reportFatalUsageError("GHC calling convention is not supported on RVE!");
22857 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
22858 reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "
22859 "(Zdinx/D) instruction set extensions");
22860 }
22861
22862 const Function &Func = MF.getFunction();
22863 if (Func.hasFnAttribute("interrupt")) {
22864 if (!Func.arg_empty())
22866 "Functions with the interrupt attribute cannot have arguments!");
22867
22868 StringRef Kind =
22869 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
22870
22871 constexpr StringLiteral SupportedInterruptKinds[] = {
22872 "machine",
22873 "supervisor",
22874 "rnmi",
22875 "qci-nest",
22876 "qci-nonest",
22877 "SiFive-CLIC-preemptible",
22878 "SiFive-CLIC-stack-swap",
22879 "SiFive-CLIC-preemptible-stack-swap",
22880 };
22881 if (!llvm::is_contained(SupportedInterruptKinds, Kind))
22883 "Function interrupt attribute argument not supported!");
22884
22885 if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())
22887 "'qci-*' interrupt kinds require Xqciint extension");
22888
22889 if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
22891 "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
22892
22893 if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
22894 reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
22895 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
22896 if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
22897 reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
22898 "have a frame pointer");
22899 }
22900
22901 EVT PtrVT = getPointerTy(DAG.getDataLayout());
22902 MVT XLenVT = Subtarget.getXLenVT();
22903 unsigned XLenInBytes = Subtarget.getXLen() / 8;
22904 // Used with vargs to accumulate store chains.
22905 std::vector<SDValue> OutChains;
22906
22907 // Assign locations to all of the incoming arguments.
22909 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
22910
22911 if (CallConv == CallingConv::GHC)
22913 else
22914 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
22916 : CC_RISCV);
22917
22918 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
22919 CCValAssign &VA = ArgLocs[i];
22920 SDValue ArgValue;
22921 // Passing f64 on RV32D with a soft float ABI must be handled as a special
22922 // case.
22923 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
22924 assert(VA.needsCustom());
22925 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
22926 } else if (VA.isRegLoc())
22927 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
22928 else
22929 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
22930
22931 if (VA.getLocInfo() == CCValAssign::Indirect) {
22932 // If the original argument was split and passed by reference (e.g. i128
22933 // on RV32), we need to load all parts of it here (using the same
22934 // address). Vectors may be partly split to registers and partly to the
22935 // stack, in which case the base address is partly offset and subsequent
22936 // stores are relative to that.
22937 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
22939 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
22940 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
22941 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
22942 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
22943 CCValAssign &PartVA = ArgLocs[i + 1];
22944 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
22945 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
22946 if (PartVA.getValVT().isScalableVector())
22947 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
22948 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
22949 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
22951 ++i;
22952 ++InsIdx;
22953 }
22954 continue;
22955 }
22956 InVals.push_back(ArgValue);
22957 }
22958
22959 if (any_of(ArgLocs,
22960 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
22962
22963 if (IsVarArg) {
22964 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
22965 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
22966 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
22967 MachineFrameInfo &MFI = MF.getFrameInfo();
22968 MachineRegisterInfo &RegInfo = MF.getRegInfo();
22970
22971 // Size of the vararg save area. For now, the varargs save area is either
22972 // zero or large enough to hold a0-a7.
22973 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
22974 int FI;
22975
22976 // If all registers are allocated, then all varargs must be passed on the
22977 // stack and we don't need to save any argregs.
22978 if (VarArgsSaveSize == 0) {
22979 int VaArgOffset = CCInfo.getStackSize();
22980 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
22981 } else {
22982 int VaArgOffset = -VarArgsSaveSize;
22983 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
22984
22985 // If saving an odd number of registers then create an extra stack slot to
22986 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
22987 // offsets to even-numbered registered remain 2*XLEN-aligned.
22988 if (Idx % 2) {
22990 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
22991 VarArgsSaveSize += XLenInBytes;
22992 }
22993
22994 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22995
22996 // Copy the integer registers that may have been used for passing varargs
22997 // to the vararg save area.
22998 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
22999 const Register Reg = RegInfo.createVirtualRegister(RC);
23000 RegInfo.addLiveIn(ArgRegs[I], Reg);
23001 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
23002 SDValue Store = DAG.getStore(
23003 Chain, DL, ArgValue, FIN,
23004 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
23005 OutChains.push_back(Store);
23006 FIN =
23007 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
23008 }
23009 }
23010
23011 // Record the frame index of the first variable argument
23012 // which is a value necessary to VASTART.
23013 RVFI->setVarArgsFrameIndex(FI);
23014 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
23015 }
23016
23017 // All stores are grouped in one node to allow the matching between
23018 // the size of Ins and InVals. This only happens for vararg functions.
23019 if (!OutChains.empty()) {
23020 OutChains.push_back(Chain);
23021 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
23022 }
23023
23024 return Chain;
23025}
23026
23027/// isEligibleForTailCallOptimization - Check whether the call is eligible
23028/// for tail call optimization.
23029/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
23030bool RISCVTargetLowering::isEligibleForTailCallOptimization(
23031 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
23032 const SmallVector<CCValAssign, 16> &ArgLocs) const {
23033
23034 auto CalleeCC = CLI.CallConv;
23035 auto &Outs = CLI.Outs;
23036 auto &Caller = MF.getFunction();
23037 auto CallerCC = Caller.getCallingConv();
23038
23039 // Exception-handling functions need a special set of instructions to
23040 // indicate a return to the hardware. Tail-calling another function would
23041 // probably break this.
23042 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
23043 // should be expanded as new function attributes are introduced.
23044 if (Caller.hasFnAttribute("interrupt"))
23045 return false;
23046
23047 // Do not tail call opt if the stack is used to pass parameters.
23048 if (CCInfo.getStackSize() != 0)
23049 return false;
23050
23051 // Do not tail call opt if any parameters need to be passed indirectly.
23052 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
23053 // passed indirectly. So the address of the value will be passed in a
23054 // register, or if not available, then the address is put on the stack. In
23055 // order to pass indirectly, space on the stack often needs to be allocated
23056 // in order to store the value. In this case the CCInfo.getNextStackOffset()
23057 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
23058 // are passed CCValAssign::Indirect.
23059 for (auto &VA : ArgLocs)
23060 if (VA.getLocInfo() == CCValAssign::Indirect)
23061 return false;
23062
23063 // Do not tail call opt if either caller or callee uses struct return
23064 // semantics.
23065 auto IsCallerStructRet = Caller.hasStructRetAttr();
23066 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
23067 if (IsCallerStructRet || IsCalleeStructRet)
23068 return false;
23069
23070 // The callee has to preserve all registers the caller needs to preserve.
23071 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
23072 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
23073 if (CalleeCC != CallerCC) {
23074 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
23075 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
23076 return false;
23077 }
23078
23079 // Byval parameters hand the function a pointer directly into the stack area
23080 // we want to reuse during a tail call. Working around this *is* possible
23081 // but less efficient and uglier in LowerCall.
23082 for (auto &Arg : Outs)
23083 if (Arg.Flags.isByVal())
23084 return false;
23085
23086 return true;
23087}
23088
23090 return DAG.getDataLayout().getPrefTypeAlign(
23091 VT.getTypeForEVT(*DAG.getContext()));
23092}
23093
23094// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
23095// and output parameter nodes.
23097 SmallVectorImpl<SDValue> &InVals) const {
23098 SelectionDAG &DAG = CLI.DAG;
23099 SDLoc &DL = CLI.DL;
23101 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
23103 SDValue Chain = CLI.Chain;
23104 SDValue Callee = CLI.Callee;
23105 bool &IsTailCall = CLI.IsTailCall;
23106 CallingConv::ID CallConv = CLI.CallConv;
23107 bool IsVarArg = CLI.IsVarArg;
23108 EVT PtrVT = getPointerTy(DAG.getDataLayout());
23109 MVT XLenVT = Subtarget.getXLenVT();
23110 const CallBase *CB = CLI.CB;
23111
23114
23115 // Set type id for call site info.
23116 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
23117 CSInfo = MachineFunction::CallSiteInfo(*CB);
23118
23119 // Analyze the operands of the call, assigning locations to each operand.
23121 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
23122
23123 if (CallConv == CallingConv::GHC) {
23124 if (Subtarget.hasStdExtE())
23125 reportFatalUsageError("GHC calling convention is not supported on RVE!");
23126 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
23127 } else
23128 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
23130 : CC_RISCV);
23131
23132 // Check if it's really possible to do a tail call.
23133 if (IsTailCall)
23134 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
23135
23136 if (IsTailCall)
23137 ++NumTailCalls;
23138 else if (CLI.CB && CLI.CB->isMustTailCall())
23139 reportFatalInternalError("failed to perform tail call elimination on a "
23140 "call site marked musttail");
23141
23142 // Get a count of how many bytes are to be pushed on the stack.
23143 unsigned NumBytes = ArgCCInfo.getStackSize();
23144
23145 // Create local copies for byval args
23146 SmallVector<SDValue, 8> ByValArgs;
23147 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23148 ISD::ArgFlagsTy Flags = Outs[i].Flags;
23149 if (!Flags.isByVal())
23150 continue;
23151
23152 SDValue Arg = OutVals[i];
23153 unsigned Size = Flags.getByValSize();
23154 Align Alignment = Flags.getNonZeroByValAlign();
23155
23156 int FI =
23157 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
23158 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
23159 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
23160
23161 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
23162 /*IsVolatile=*/false,
23163 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
23165 ByValArgs.push_back(FIPtr);
23166 }
23167
23168 if (!IsTailCall)
23169 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
23170
23171 // Copy argument values to their designated locations.
23173 SmallVector<SDValue, 8> MemOpChains;
23174 SDValue StackPtr;
23175 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
23176 ++i, ++OutIdx) {
23177 CCValAssign &VA = ArgLocs[i];
23178 SDValue ArgValue = OutVals[OutIdx];
23179 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
23180
23181 // Handle passing f64 on RV32D with a soft float ABI as a special case.
23182 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23183 assert(VA.isRegLoc() && "Expected register VA assignment");
23184 assert(VA.needsCustom());
23185 SDValue SplitF64 = DAG.getNode(
23186 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
23187 SDValue Lo = SplitF64.getValue(0);
23188 SDValue Hi = SplitF64.getValue(1);
23189
23190 Register RegLo = VA.getLocReg();
23191 RegsToPass.push_back(std::make_pair(RegLo, Lo));
23192
23193 // Get the CCValAssign for the Hi part.
23194 CCValAssign &HiVA = ArgLocs[++i];
23195
23196 if (HiVA.isMemLoc()) {
23197 // Second half of f64 is passed on the stack.
23198 if (!StackPtr.getNode())
23199 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23201 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23202 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
23203 // Emit the store.
23204 MemOpChains.push_back(DAG.getStore(
23205 Chain, DL, Hi, Address,
23207 } else {
23208 // Second half of f64 is passed in another GPR.
23209 Register RegHigh = HiVA.getLocReg();
23210 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
23211 }
23212 continue;
23213 }
23214
23215 // Promote the value if needed.
23216 // For now, only handle fully promoted and indirect arguments.
23217 if (VA.getLocInfo() == CCValAssign::Indirect) {
23218 // Store the argument in a stack slot and pass its address.
23219 Align StackAlign =
23220 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
23221 getPrefTypeAlign(ArgValue.getValueType(), DAG));
23222 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
23223 // If the original argument was split (e.g. i128), we need
23224 // to store the required parts of it here (and pass just one address).
23225 // Vectors may be partly split to registers and partly to the stack, in
23226 // which case the base address is partly offset and subsequent stores are
23227 // relative to that.
23228 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
23229 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
23230 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23231 // Calculate the total size to store. We don't have access to what we're
23232 // actually storing other than performing the loop and collecting the
23233 // info.
23235 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
23236 SDValue PartValue = OutVals[OutIdx + 1];
23237 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
23238 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23239 EVT PartVT = PartValue.getValueType();
23240 if (PartVT.isScalableVector())
23241 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23242 StoredSize += PartVT.getStoreSize();
23243 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
23244 Parts.push_back(std::make_pair(PartValue, Offset));
23245 ++i;
23246 ++OutIdx;
23247 }
23248 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
23249 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
23250 MemOpChains.push_back(
23251 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
23253 for (const auto &Part : Parts) {
23254 SDValue PartValue = Part.first;
23255 SDValue PartOffset = Part.second;
23257 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
23258 MemOpChains.push_back(
23259 DAG.getStore(Chain, DL, PartValue, Address,
23261 }
23262 ArgValue = SpillSlot;
23263 } else {
23264 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
23265 }
23266
23267 // Use local copy if it is a byval arg.
23268 if (Flags.isByVal())
23269 ArgValue = ByValArgs[j++];
23270
23271 if (VA.isRegLoc()) {
23272 // Queue up the argument copies and emit them at the end.
23273 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
23274
23275 const TargetOptions &Options = DAG.getTarget().Options;
23276 if (Options.EmitCallSiteInfo)
23277 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
23278 } else {
23279 assert(VA.isMemLoc() && "Argument not register or memory");
23280 assert(!IsTailCall && "Tail call not allowed if stack is used "
23281 "for passing parameters");
23282
23283 // Work out the address of the stack slot.
23284 if (!StackPtr.getNode())
23285 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23287 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23289
23290 // Emit the store.
23291 MemOpChains.push_back(
23292 DAG.getStore(Chain, DL, ArgValue, Address,
23294 }
23295 }
23296
23297 // Join the stores, which are independent of one another.
23298 if (!MemOpChains.empty())
23299 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
23300
23301 SDValue Glue;
23302
23303 // Build a sequence of copy-to-reg nodes, chained and glued together.
23304 for (auto &Reg : RegsToPass) {
23305 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
23306 Glue = Chain.getValue(1);
23307 }
23308
23309 // Validate that none of the argument registers have been marked as
23310 // reserved, if so report an error. Do the same for the return address if this
23311 // is not a tailcall.
23312 validateCCReservedRegs(RegsToPass, MF);
23313 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
23315 MF.getFunction(),
23316 "Return address register required, but has been reserved."});
23317
23318 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
23319 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
23320 // split it and then direct call can be matched by PseudoCALL.
23321 bool CalleeIsLargeExternalSymbol = false;
23323 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
23324 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
23325 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23326 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
23327 CalleeIsLargeExternalSymbol = true;
23328 }
23329 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
23330 const GlobalValue *GV = S->getGlobal();
23331 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
23332 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23333 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
23334 }
23335
23336 // The first call operand is the chain and the second is the target address.
23338 Ops.push_back(Chain);
23339 Ops.push_back(Callee);
23340
23341 // Add argument registers to the end of the list so that they are
23342 // known live into the call.
23343 for (auto &Reg : RegsToPass)
23344 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
23345
23346 // Add a register mask operand representing the call-preserved registers.
23347 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
23348 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
23349 assert(Mask && "Missing call preserved mask for calling convention");
23350 Ops.push_back(DAG.getRegisterMask(Mask));
23351
23352 // Glue the call to the argument copies, if any.
23353 if (Glue.getNode())
23354 Ops.push_back(Glue);
23355
23356 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
23357 "Unexpected CFI type for a direct call");
23358
23359 // Emit the call.
23360 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
23361
23362 // Use software guarded branch for large code model non-indirect calls
23363 // Tail call to external symbol will have a null CLI.CB and we need another
23364 // way to determine the callsite type
23365 bool NeedSWGuarded = false;
23367 Subtarget.hasStdExtZicfilp() &&
23368 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
23369 NeedSWGuarded = true;
23370
23371 if (IsTailCall) {
23373 unsigned CallOpc =
23374 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
23375 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23376 if (CLI.CFIType)
23377 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23378 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
23379 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
23380 return Ret;
23381 }
23382
23383 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
23384 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23385 if (CLI.CFIType)
23386 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23387
23388 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
23389 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
23390 Glue = Chain.getValue(1);
23391
23392 // Mark the end of the call, which is glued to the call itself.
23393 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
23394 Glue = Chain.getValue(1);
23395
23396 // Assign locations to each value returned by this call.
23398 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
23399 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
23400
23401 // Copy all of the result registers out of their specified physreg.
23402 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
23403 auto &VA = RVLocs[i];
23404 // Copy the value out
23405 SDValue RetValue =
23406 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
23407 // Glue the RetValue to the end of the call sequence
23408 Chain = RetValue.getValue(1);
23409 Glue = RetValue.getValue(2);
23410
23411 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23412 assert(VA.needsCustom());
23413 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
23414 MVT::i32, Glue);
23415 Chain = RetValue2.getValue(1);
23416 Glue = RetValue2.getValue(2);
23417 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
23418 RetValue2);
23419 } else
23420 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
23421
23422 InVals.push_back(RetValue);
23423 }
23424
23425 return Chain;
23426}
23427
23429 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
23430 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
23431 const Type *RetTy) const {
23433 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
23434
23435 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23436 MVT VT = Outs[i].VT;
23437 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
23438 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
23439 /*IsRet=*/true, Outs[i].OrigTy))
23440 return false;
23441 }
23442 return true;
23443}
23444
23445SDValue
23447 bool IsVarArg,
23449 const SmallVectorImpl<SDValue> &OutVals,
23450 const SDLoc &DL, SelectionDAG &DAG) const {
23452 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
23453
23454 // Stores the assignment of the return value to a location.
23456
23457 // Info about the registers and stack slot.
23458 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
23459 *DAG.getContext());
23460
23461 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
23462 nullptr, CC_RISCV);
23463
23464 if (CallConv == CallingConv::GHC && !RVLocs.empty())
23465 reportFatalUsageError("GHC functions return void only");
23466
23467 SDValue Glue;
23468 SmallVector<SDValue, 4> RetOps(1, Chain);
23469
23470 // Copy the result values into the output registers.
23471 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
23472 SDValue Val = OutVals[OutIdx];
23473 CCValAssign &VA = RVLocs[i];
23474 assert(VA.isRegLoc() && "Can only return in registers!");
23475
23476 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23477 // Handle returning f64 on RV32D with a soft float ABI.
23478 assert(VA.isRegLoc() && "Expected return via registers");
23479 assert(VA.needsCustom());
23480 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
23481 DAG.getVTList(MVT::i32, MVT::i32), Val);
23482 SDValue Lo = SplitF64.getValue(0);
23483 SDValue Hi = SplitF64.getValue(1);
23484 Register RegLo = VA.getLocReg();
23485 Register RegHi = RVLocs[++i].getLocReg();
23486
23487 if (STI.isRegisterReservedByUser(RegLo) ||
23488 STI.isRegisterReservedByUser(RegHi))
23490 MF.getFunction(),
23491 "Return value register required, but has been reserved."});
23492
23493 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
23494 Glue = Chain.getValue(1);
23495 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
23496 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
23497 Glue = Chain.getValue(1);
23498 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
23499 } else {
23500 // Handle a 'normal' return.
23501 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
23502 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
23503
23504 if (STI.isRegisterReservedByUser(VA.getLocReg()))
23506 MF.getFunction(),
23507 "Return value register required, but has been reserved."});
23508
23509 // Guarantee that all emitted copies are stuck together.
23510 Glue = Chain.getValue(1);
23511 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
23512 }
23513 }
23514
23515 RetOps[0] = Chain; // Update chain.
23516
23517 // Add the glue node if we have it.
23518 if (Glue.getNode()) {
23519 RetOps.push_back(Glue);
23520 }
23521
23522 if (any_of(RVLocs,
23523 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23525
23526 unsigned RetOpc = RISCVISD::RET_GLUE;
23527 // Interrupt service routines use different return instructions.
23528 const Function &Func = DAG.getMachineFunction().getFunction();
23529 if (Func.hasFnAttribute("interrupt")) {
23530 if (!Func.getReturnType()->isVoidTy())
23532 "Functions with the interrupt attribute must have void return type!");
23533
23535 StringRef Kind =
23536 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
23537
23538 if (Kind == "supervisor")
23539 RetOpc = RISCVISD::SRET_GLUE;
23540 else if (Kind == "rnmi") {
23541 assert(STI.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
23542 "Need Smrnmi extension for rnmi");
23543 RetOpc = RISCVISD::MNRET_GLUE;
23544 } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
23545 assert(STI.hasFeature(RISCV::FeatureVendorXqciint) &&
23546 "Need Xqciint for qci-(no)nest");
23547 RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
23548 } else
23549 RetOpc = RISCVISD::MRET_GLUE;
23550 }
23551
23552 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
23553}
23554
23555void RISCVTargetLowering::validateCCReservedRegs(
23556 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
23557 MachineFunction &MF) const {
23558 const Function &F = MF.getFunction();
23559 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
23560
23561 if (llvm::any_of(Regs, [&STI](auto Reg) {
23562 return STI.isRegisterReservedByUser(Reg.first);
23563 }))
23564 F.getContext().diagnose(DiagnosticInfoUnsupported{
23565 F, "Argument register required, but has been reserved."});
23566}
23567
23568// Check if the result of the node is only used as a return value, as
23569// otherwise we can't perform a tail-call.
23571 if (N->getNumValues() != 1)
23572 return false;
23573 if (!N->hasNUsesOfValue(1, 0))
23574 return false;
23575
23576 SDNode *Copy = *N->user_begin();
23577
23578 if (Copy->getOpcode() == ISD::BITCAST) {
23579 return isUsedByReturnOnly(Copy, Chain);
23580 }
23581
23582 // TODO: Handle additional opcodes in order to support tail-calling libcalls
23583 // with soft float ABIs.
23584 if (Copy->getOpcode() != ISD::CopyToReg) {
23585 return false;
23586 }
23587
23588 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
23589 // isn't safe to perform a tail call.
23590 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
23591 return false;
23592
23593 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
23594 bool HasRet = false;
23595 for (SDNode *Node : Copy->users()) {
23596 if (Node->getOpcode() != RISCVISD::RET_GLUE)
23597 return false;
23598 HasRet = true;
23599 }
23600 if (!HasRet)
23601 return false;
23602
23603 Chain = Copy->getOperand(0);
23604 return true;
23605}
23606
23608 return CI->isTailCall();
23609}
23610
23611/// getConstraintType - Given a constraint letter, return the type of
23612/// constraint it is for this target.
23615 if (Constraint.size() == 1) {
23616 switch (Constraint[0]) {
23617 default:
23618 break;
23619 case 'f':
23620 case 'R':
23621 return C_RegisterClass;
23622 case 'I':
23623 case 'J':
23624 case 'K':
23625 return C_Immediate;
23626 case 'A':
23627 return C_Memory;
23628 case 's':
23629 case 'S': // A symbolic address
23630 return C_Other;
23631 }
23632 } else {
23633 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
23634 return C_RegisterClass;
23635 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
23636 return C_RegisterClass;
23637 }
23638 return TargetLowering::getConstraintType(Constraint);
23639}
23640
23641std::pair<unsigned, const TargetRegisterClass *>
23643 StringRef Constraint,
23644 MVT VT) const {
23645 // First, see if this is a constraint that directly corresponds to a RISC-V
23646 // register class.
23647 if (Constraint.size() == 1) {
23648 switch (Constraint[0]) {
23649 case 'r':
23650 // TODO: Support fixed vectors up to XLen for P extension?
23651 if (VT.isVector())
23652 break;
23653 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23654 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23655 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23656 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23657 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23658 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23659 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23660 case 'f':
23661 if (VT == MVT::f16) {
23662 if (Subtarget.hasStdExtZfhmin())
23663 return std::make_pair(0U, &RISCV::FPR16RegClass);
23664 if (Subtarget.hasStdExtZhinxmin())
23665 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23666 } else if (VT == MVT::f32) {
23667 if (Subtarget.hasStdExtF())
23668 return std::make_pair(0U, &RISCV::FPR32RegClass);
23669 if (Subtarget.hasStdExtZfinx())
23670 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23671 } else if (VT == MVT::f64) {
23672 if (Subtarget.hasStdExtD())
23673 return std::make_pair(0U, &RISCV::FPR64RegClass);
23674 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23675 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23676 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23677 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23678 }
23679 break;
23680 case 'R':
23681 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23682 (VT == MVT::i128 && Subtarget.is64Bit()))
23683 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23684 break;
23685 default:
23686 break;
23687 }
23688 } else if (Constraint == "vr") {
23689 for (const auto *RC :
23690 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
23691 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
23692 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
23693 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
23694 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
23695 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
23696 &RISCV::VRN2M4RegClass}) {
23697 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23698 return std::make_pair(0U, RC);
23699
23700 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23701 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23702 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23703 return std::make_pair(0U, RC);
23704 }
23705 }
23706 } else if (Constraint == "vd") {
23707 for (const auto *RC :
23708 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
23709 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
23710 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
23711 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
23712 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
23713 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
23714 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
23715 &RISCV::VRN2M4NoV0RegClass}) {
23716 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23717 return std::make_pair(0U, RC);
23718
23719 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23720 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23721 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23722 return std::make_pair(0U, RC);
23723 }
23724 }
23725 } else if (Constraint == "vm") {
23726 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
23727 return std::make_pair(0U, &RISCV::VMV0RegClass);
23728
23729 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23730 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23731 // VT here might be coerced to vector with i8 elements, so we need to
23732 // check if this is a M1 register here instead of checking VMV0RegClass.
23733 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))
23734 return std::make_pair(0U, &RISCV::VMV0RegClass);
23735 }
23736 } else if (Constraint == "cr") {
23737 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23738 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23739 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23740 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23741 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23742 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23743 if (!VT.isVector())
23744 return std::make_pair(0U, &RISCV::GPRCRegClass);
23745 } else if (Constraint == "cR") {
23746 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23747 (VT == MVT::i128 && Subtarget.is64Bit()))
23748 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23749 } else if (Constraint == "cf") {
23750 if (VT == MVT::f16) {
23751 if (Subtarget.hasStdExtZfhmin())
23752 return std::make_pair(0U, &RISCV::FPR16CRegClass);
23753 if (Subtarget.hasStdExtZhinxmin())
23754 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23755 } else if (VT == MVT::f32) {
23756 if (Subtarget.hasStdExtF())
23757 return std::make_pair(0U, &RISCV::FPR32CRegClass);
23758 if (Subtarget.hasStdExtZfinx())
23759 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23760 } else if (VT == MVT::f64) {
23761 if (Subtarget.hasStdExtD())
23762 return std::make_pair(0U, &RISCV::FPR64CRegClass);
23763 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23764 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23765 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23766 return std::make_pair(0U, &RISCV::GPRCRegClass);
23767 }
23768 }
23769
23770 // Clang will correctly decode the usage of register name aliases into their
23771 // official names. However, other frontends like `rustc` do not. This allows
23772 // users of these frontends to use the ABI names for registers in LLVM-style
23773 // register constraints.
23774 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
23775 .Case("{zero}", RISCV::X0)
23776 .Case("{ra}", RISCV::X1)
23777 .Case("{sp}", RISCV::X2)
23778 .Case("{gp}", RISCV::X3)
23779 .Case("{tp}", RISCV::X4)
23780 .Case("{t0}", RISCV::X5)
23781 .Case("{t1}", RISCV::X6)
23782 .Case("{t2}", RISCV::X7)
23783 .Cases("{s0}", "{fp}", RISCV::X8)
23784 .Case("{s1}", RISCV::X9)
23785 .Case("{a0}", RISCV::X10)
23786 .Case("{a1}", RISCV::X11)
23787 .Case("{a2}", RISCV::X12)
23788 .Case("{a3}", RISCV::X13)
23789 .Case("{a4}", RISCV::X14)
23790 .Case("{a5}", RISCV::X15)
23791 .Case("{a6}", RISCV::X16)
23792 .Case("{a7}", RISCV::X17)
23793 .Case("{s2}", RISCV::X18)
23794 .Case("{s3}", RISCV::X19)
23795 .Case("{s4}", RISCV::X20)
23796 .Case("{s5}", RISCV::X21)
23797 .Case("{s6}", RISCV::X22)
23798 .Case("{s7}", RISCV::X23)
23799 .Case("{s8}", RISCV::X24)
23800 .Case("{s9}", RISCV::X25)
23801 .Case("{s10}", RISCV::X26)
23802 .Case("{s11}", RISCV::X27)
23803 .Case("{t3}", RISCV::X28)
23804 .Case("{t4}", RISCV::X29)
23805 .Case("{t5}", RISCV::X30)
23806 .Case("{t6}", RISCV::X31)
23807 .Default(RISCV::NoRegister);
23808 if (XRegFromAlias != RISCV::NoRegister)
23809 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
23810
23811 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
23812 // TableGen record rather than the AsmName to choose registers for InlineAsm
23813 // constraints, plus we want to match those names to the widest floating point
23814 // register type available, manually select floating point registers here.
23815 //
23816 // The second case is the ABI name of the register, so that frontends can also
23817 // use the ABI names in register constraint lists.
23818 if (Subtarget.hasStdExtF()) {
23819 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
23820 .Cases("{f0}", "{ft0}", RISCV::F0_F)
23821 .Cases("{f1}", "{ft1}", RISCV::F1_F)
23822 .Cases("{f2}", "{ft2}", RISCV::F2_F)
23823 .Cases("{f3}", "{ft3}", RISCV::F3_F)
23824 .Cases("{f4}", "{ft4}", RISCV::F4_F)
23825 .Cases("{f5}", "{ft5}", RISCV::F5_F)
23826 .Cases("{f6}", "{ft6}", RISCV::F6_F)
23827 .Cases("{f7}", "{ft7}", RISCV::F7_F)
23828 .Cases("{f8}", "{fs0}", RISCV::F8_F)
23829 .Cases("{f9}", "{fs1}", RISCV::F9_F)
23830 .Cases("{f10}", "{fa0}", RISCV::F10_F)
23831 .Cases("{f11}", "{fa1}", RISCV::F11_F)
23832 .Cases("{f12}", "{fa2}", RISCV::F12_F)
23833 .Cases("{f13}", "{fa3}", RISCV::F13_F)
23834 .Cases("{f14}", "{fa4}", RISCV::F14_F)
23835 .Cases("{f15}", "{fa5}", RISCV::F15_F)
23836 .Cases("{f16}", "{fa6}", RISCV::F16_F)
23837 .Cases("{f17}", "{fa7}", RISCV::F17_F)
23838 .Cases("{f18}", "{fs2}", RISCV::F18_F)
23839 .Cases("{f19}", "{fs3}", RISCV::F19_F)
23840 .Cases("{f20}", "{fs4}", RISCV::F20_F)
23841 .Cases("{f21}", "{fs5}", RISCV::F21_F)
23842 .Cases("{f22}", "{fs6}", RISCV::F22_F)
23843 .Cases("{f23}", "{fs7}", RISCV::F23_F)
23844 .Cases("{f24}", "{fs8}", RISCV::F24_F)
23845 .Cases("{f25}", "{fs9}", RISCV::F25_F)
23846 .Cases("{f26}", "{fs10}", RISCV::F26_F)
23847 .Cases("{f27}", "{fs11}", RISCV::F27_F)
23848 .Cases("{f28}", "{ft8}", RISCV::F28_F)
23849 .Cases("{f29}", "{ft9}", RISCV::F29_F)
23850 .Cases("{f30}", "{ft10}", RISCV::F30_F)
23851 .Cases("{f31}", "{ft11}", RISCV::F31_F)
23852 .Default(RISCV::NoRegister);
23853 if (FReg != RISCV::NoRegister) {
23854 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
23855 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
23856 unsigned RegNo = FReg - RISCV::F0_F;
23857 unsigned DReg = RISCV::F0_D + RegNo;
23858 return std::make_pair(DReg, &RISCV::FPR64RegClass);
23859 }
23860 if (VT == MVT::f32 || VT == MVT::Other)
23861 return std::make_pair(FReg, &RISCV::FPR32RegClass);
23862 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
23863 unsigned RegNo = FReg - RISCV::F0_F;
23864 unsigned HReg = RISCV::F0_H + RegNo;
23865 return std::make_pair(HReg, &RISCV::FPR16RegClass);
23866 }
23867 }
23868 }
23869
23870 if (Subtarget.hasVInstructions()) {
23871 Register VReg = StringSwitch<Register>(Constraint.lower())
23872 .Case("{v0}", RISCV::V0)
23873 .Case("{v1}", RISCV::V1)
23874 .Case("{v2}", RISCV::V2)
23875 .Case("{v3}", RISCV::V3)
23876 .Case("{v4}", RISCV::V4)
23877 .Case("{v5}", RISCV::V5)
23878 .Case("{v6}", RISCV::V6)
23879 .Case("{v7}", RISCV::V7)
23880 .Case("{v8}", RISCV::V8)
23881 .Case("{v9}", RISCV::V9)
23882 .Case("{v10}", RISCV::V10)
23883 .Case("{v11}", RISCV::V11)
23884 .Case("{v12}", RISCV::V12)
23885 .Case("{v13}", RISCV::V13)
23886 .Case("{v14}", RISCV::V14)
23887 .Case("{v15}", RISCV::V15)
23888 .Case("{v16}", RISCV::V16)
23889 .Case("{v17}", RISCV::V17)
23890 .Case("{v18}", RISCV::V18)
23891 .Case("{v19}", RISCV::V19)
23892 .Case("{v20}", RISCV::V20)
23893 .Case("{v21}", RISCV::V21)
23894 .Case("{v22}", RISCV::V22)
23895 .Case("{v23}", RISCV::V23)
23896 .Case("{v24}", RISCV::V24)
23897 .Case("{v25}", RISCV::V25)
23898 .Case("{v26}", RISCV::V26)
23899 .Case("{v27}", RISCV::V27)
23900 .Case("{v28}", RISCV::V28)
23901 .Case("{v29}", RISCV::V29)
23902 .Case("{v30}", RISCV::V30)
23903 .Case("{v31}", RISCV::V31)
23904 .Default(RISCV::NoRegister);
23905 if (VReg != RISCV::NoRegister) {
23906 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
23907 return std::make_pair(VReg, &RISCV::VMRegClass);
23908 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
23909 return std::make_pair(VReg, &RISCV::VRRegClass);
23910 for (const auto *RC :
23911 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
23912 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
23913 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
23914 return std::make_pair(VReg, RC);
23915 }
23916 }
23917 }
23918 }
23919
23920 std::pair<Register, const TargetRegisterClass *> Res =
23922
23923 // If we picked one of the Zfinx register classes, remap it to the GPR class.
23924 // FIXME: When Zfinx is supported in CodeGen this will need to take the
23925 // Subtarget into account.
23926 if (Res.second == &RISCV::GPRF16RegClass ||
23927 Res.second == &RISCV::GPRF32RegClass ||
23928 Res.second == &RISCV::GPRPairRegClass)
23929 return std::make_pair(Res.first, &RISCV::GPRRegClass);
23930
23931 return Res;
23932}
23933
23936 // Currently only support length 1 constraints.
23937 if (ConstraintCode.size() == 1) {
23938 switch (ConstraintCode[0]) {
23939 case 'A':
23941 default:
23942 break;
23943 }
23944 }
23945
23946 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
23947}
23948
23950 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
23951 SelectionDAG &DAG) const {
23952 // Currently only support length 1 constraints.
23953 if (Constraint.size() == 1) {
23954 switch (Constraint[0]) {
23955 case 'I':
23956 // Validate & create a 12-bit signed immediate operand.
23957 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
23958 uint64_t CVal = C->getSExtValue();
23959 if (isInt<12>(CVal))
23960 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
23961 Subtarget.getXLenVT()));
23962 }
23963 return;
23964 case 'J':
23965 // Validate & create an integer zero operand.
23966 if (isNullConstant(Op))
23967 Ops.push_back(
23968 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
23969 return;
23970 case 'K':
23971 // Validate & create a 5-bit unsigned immediate operand.
23972 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
23973 uint64_t CVal = C->getZExtValue();
23974 if (isUInt<5>(CVal))
23975 Ops.push_back(
23976 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
23977 }
23978 return;
23979 case 'S':
23981 return;
23982 default:
23983 break;
23984 }
23985 }
23987}
23988
23990 Instruction *Inst,
23991 AtomicOrdering Ord) const {
23992 if (Subtarget.hasStdExtZtso()) {
23994 return Builder.CreateFence(Ord);
23995 return nullptr;
23996 }
23997
23999 return Builder.CreateFence(Ord);
24000 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
24001 return Builder.CreateFence(AtomicOrdering::Release);
24002 return nullptr;
24003}
24004
24006 Instruction *Inst,
24007 AtomicOrdering Ord) const {
24008 if (Subtarget.hasStdExtZtso()) {
24010 return Builder.CreateFence(Ord);
24011 return nullptr;
24012 }
24013
24014 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
24015 return Builder.CreateFence(AtomicOrdering::Acquire);
24016 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
24018 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
24019 return nullptr;
24020}
24021
24024 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
24025 // point operations can't be used in an lr/sc sequence without breaking the
24026 // forward-progress guarantee.
24027 if (AI->isFloatingPointOperation() ||
24033
24034 // Don't expand forced atomics, we want to have __sync libcalls instead.
24035 if (Subtarget.hasForcedAtomics())
24037
24038 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
24039 if (AI->getOperation() == AtomicRMWInst::Nand) {
24040 if (Subtarget.hasStdExtZacas() &&
24041 (Size >= 32 || Subtarget.hasStdExtZabha()))
24043 if (Size < 32)
24045 }
24046
24047 if (Size < 32 && !Subtarget.hasStdExtZabha())
24049
24051}
24052
24053static Intrinsic::ID
24055 switch (BinOp) {
24056 default:
24057 llvm_unreachable("Unexpected AtomicRMW BinOp");
24059 return Intrinsic::riscv_masked_atomicrmw_xchg;
24060 case AtomicRMWInst::Add:
24061 return Intrinsic::riscv_masked_atomicrmw_add;
24062 case AtomicRMWInst::Sub:
24063 return Intrinsic::riscv_masked_atomicrmw_sub;
24065 return Intrinsic::riscv_masked_atomicrmw_nand;
24066 case AtomicRMWInst::Max:
24067 return Intrinsic::riscv_masked_atomicrmw_max;
24068 case AtomicRMWInst::Min:
24069 return Intrinsic::riscv_masked_atomicrmw_min;
24071 return Intrinsic::riscv_masked_atomicrmw_umax;
24073 return Intrinsic::riscv_masked_atomicrmw_umin;
24074 }
24075}
24076
24078 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
24079 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
24080 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
24081 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
24082 // mask, as this produces better code than the LR/SC loop emitted by
24083 // int_riscv_masked_atomicrmw_xchg.
24084 if (AI->getOperation() == AtomicRMWInst::Xchg &&
24087 if (CVal->isZero())
24088 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
24089 Builder.CreateNot(Mask, "Inv_Mask"),
24090 AI->getAlign(), Ord);
24091 if (CVal->isMinusOne())
24092 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
24093 AI->getAlign(), Ord);
24094 }
24095
24096 unsigned XLen = Subtarget.getXLen();
24097 Value *Ordering =
24098 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
24099 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24101 AI->getModule(),
24103
24104 if (XLen == 64) {
24105 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
24106 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24107 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
24108 }
24109
24110 Value *Result;
24111
24112 // Must pass the shift amount needed to sign extend the loaded value prior
24113 // to performing a signed comparison for min/max. ShiftAmt is the number of
24114 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
24115 // is the number of bits to left+right shift the value in order to
24116 // sign-extend.
24117 if (AI->getOperation() == AtomicRMWInst::Min ||
24119 const DataLayout &DL = AI->getDataLayout();
24120 unsigned ValWidth =
24121 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
24122 Value *SextShamt =
24123 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
24124 Result = Builder.CreateCall(LrwOpScwLoop,
24125 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
24126 } else {
24127 Result =
24128 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
24129 }
24130
24131 if (XLen == 64)
24132 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24133 return Result;
24134}
24135
24138 AtomicCmpXchgInst *CI) const {
24139 // Don't expand forced atomics, we want to have __sync libcalls instead.
24140 if (Subtarget.hasForcedAtomics())
24142
24144 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
24145 (Size == 8 || Size == 16))
24148}
24149
24151 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
24152 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
24153 unsigned XLen = Subtarget.getXLen();
24154 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
24155 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;
24156 if (XLen == 64) {
24157 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
24158 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
24159 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24160 }
24161 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24162 Value *Result = Builder.CreateIntrinsic(
24163 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
24164 if (XLen == 64)
24165 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24166 return Result;
24167}
24168
24170 EVT DataVT) const {
24171 // We have indexed loads for all supported EEW types. Indices are always
24172 // zero extended.
24173 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
24174 isTypeLegal(Extend.getValueType()) &&
24175 isTypeLegal(Extend.getOperand(0).getValueType()) &&
24176 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
24177}
24178
24180 EVT VT) const {
24181 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
24182 return false;
24183
24184 switch (FPVT.getSimpleVT().SimpleTy) {
24185 case MVT::f16:
24186 return Subtarget.hasStdExtZfhmin();
24187 case MVT::f32:
24188 return Subtarget.hasStdExtF();
24189 case MVT::f64:
24190 return Subtarget.hasStdExtD();
24191 default:
24192 return false;
24193 }
24194}
24195
24197 // If we are using the small code model, we can reduce size of jump table
24198 // entry to 4 bytes.
24199 if (Subtarget.is64Bit() && !isPositionIndependent() &&
24202 }
24204}
24205
24207 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
24208 unsigned uid, MCContext &Ctx) const {
24209 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
24211 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
24212}
24213
24215 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
24216 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
24217 // a power of two as well.
24218 // FIXME: This doesn't work for zve32, but that's already broken
24219 // elsewhere for the same reason.
24220 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
24221 static_assert(RISCV::RVVBitsPerBlock == 64,
24222 "RVVBitsPerBlock changed, audit needed");
24223 return true;
24224}
24225
24227 SDValue &Offset,
24229 SelectionDAG &DAG) const {
24230 // Target does not support indexed loads.
24231 if (!Subtarget.hasVendorXTHeadMemIdx())
24232 return false;
24233
24234 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
24235 return false;
24236
24237 Base = Op->getOperand(0);
24238 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
24239 int64_t RHSC = RHS->getSExtValue();
24240 if (Op->getOpcode() == ISD::SUB)
24241 RHSC = -(uint64_t)RHSC;
24242
24243 // The constants that can be encoded in the THeadMemIdx instructions
24244 // are of the form (sign_extend(imm5) << imm2).
24245 bool isLegalIndexedOffset = false;
24246 for (unsigned i = 0; i < 4; i++)
24247 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
24248 isLegalIndexedOffset = true;
24249 break;
24250 }
24251
24252 if (!isLegalIndexedOffset)
24253 return false;
24254
24255 Offset = Op->getOperand(1);
24256 return true;
24257 }
24258
24259 return false;
24260}
24261
24263 SDValue &Offset,
24265 SelectionDAG &DAG) const {
24266 EVT VT;
24267 SDValue Ptr;
24268 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24269 VT = LD->getMemoryVT();
24270 Ptr = LD->getBasePtr();
24271 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24272 VT = ST->getMemoryVT();
24273 Ptr = ST->getBasePtr();
24274 } else
24275 return false;
24276
24277 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
24278 return false;
24279
24280 AM = ISD::PRE_INC;
24281 return true;
24282}
24283
24285 SDValue &Base,
24286 SDValue &Offset,
24288 SelectionDAG &DAG) const {
24289 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
24290 if (Op->getOpcode() != ISD::ADD)
24291 return false;
24292
24294 Base = LS->getBasePtr();
24295 else
24296 return false;
24297
24298 if (Base == Op->getOperand(0))
24299 Offset = Op->getOperand(1);
24300 else if (Base == Op->getOperand(1))
24301 Offset = Op->getOperand(0);
24302 else
24303 return false;
24304
24305 AM = ISD::POST_INC;
24306 return true;
24307 }
24308
24309 EVT VT;
24310 SDValue Ptr;
24311 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24312 VT = LD->getMemoryVT();
24313 Ptr = LD->getBasePtr();
24314 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24315 VT = ST->getMemoryVT();
24316 Ptr = ST->getBasePtr();
24317 } else
24318 return false;
24319
24320 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
24321 return false;
24322 // Post-indexing updates the base, so it's not a valid transform
24323 // if that's not the same as the load's pointer.
24324 if (Ptr != Base)
24325 return false;
24326
24327 AM = ISD::POST_INC;
24328 return true;
24329}
24330
24332 EVT VT) const {
24333 EVT SVT = VT.getScalarType();
24334
24335 if (!SVT.isSimple())
24336 return false;
24337
24338 switch (SVT.getSimpleVT().SimpleTy) {
24339 case MVT::f16:
24340 return VT.isVector() ? Subtarget.hasVInstructionsF16()
24341 : Subtarget.hasStdExtZfhOrZhinx();
24342 case MVT::f32:
24343 return Subtarget.hasStdExtFOrZfinx();
24344 case MVT::f64:
24345 return Subtarget.hasStdExtDOrZdinx();
24346 default:
24347 break;
24348 }
24349
24350 return false;
24351}
24352
24354 // Zacas will use amocas.w which does not require extension.
24355 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
24356}
24357
24359 const Constant *PersonalityFn) const {
24360 return RISCV::X10;
24361}
24362
24364 const Constant *PersonalityFn) const {
24365 return RISCV::X11;
24366}
24367
24369 // Return false to suppress the unnecessary extensions if the LibCall
24370 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
24371 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
24372 Type.getSizeInBits() < Subtarget.getXLen()))
24373 return false;
24374
24375 return true;
24376}
24377
24379 bool IsSigned) const {
24380 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
24381 return true;
24382
24383 return IsSigned;
24384}
24385
24387 SDValue C) const {
24388 // Check integral scalar types.
24389 if (!VT.isScalarInteger())
24390 return false;
24391
24392 // Omit the optimization if the sub target has the M extension and the data
24393 // size exceeds XLen.
24394 const bool HasZmmul = Subtarget.hasStdExtZmmul();
24395 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
24396 return false;
24397
24398 auto *ConstNode = cast<ConstantSDNode>(C);
24399 const APInt &Imm = ConstNode->getAPIntValue();
24400
24401 // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
24402 if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
24403 return false;
24404
24405 // Break the MUL to a SLLI and an ADD/SUB.
24406 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
24407 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
24408 return true;
24409
24410 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
24411 if (Subtarget.hasShlAdd(3) && !Imm.isSignedIntN(12) &&
24412 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
24413 (Imm - 8).isPowerOf2()))
24414 return true;
24415
24416 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
24417 // a pair of LUI/ADDI.
24418 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
24419 ConstNode->hasOneUse()) {
24420 APInt ImmS = Imm.ashr(Imm.countr_zero());
24421 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
24422 (1 - ImmS).isPowerOf2())
24423 return true;
24424 }
24425
24426 return false;
24427}
24428
24430 SDValue ConstNode) const {
24431 // Let the DAGCombiner decide for vectors.
24432 EVT VT = AddNode.getValueType();
24433 if (VT.isVector())
24434 return true;
24435
24436 // Let the DAGCombiner decide for larger types.
24437 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
24438 return true;
24439
24440 // It is worse if c1 is simm12 while c1*c2 is not.
24441 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
24442 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
24443 const APInt &C1 = C1Node->getAPIntValue();
24444 const APInt &C2 = C2Node->getAPIntValue();
24445 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
24446 return false;
24447
24448 // Default to true and let the DAGCombiner decide.
24449 return true;
24450}
24451
24453 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
24454 unsigned *Fast) const {
24455 if (!VT.isVector()) {
24456 if (Fast)
24457 *Fast = Subtarget.enableUnalignedScalarMem();
24458 return Subtarget.enableUnalignedScalarMem();
24459 }
24460
24461 // All vector implementations must support element alignment
24462 EVT ElemVT = VT.getVectorElementType();
24463 if (Alignment >= ElemVT.getStoreSize()) {
24464 if (Fast)
24465 *Fast = 1;
24466 return true;
24467 }
24468
24469 // Note: We lower an unmasked unaligned vector access to an equally sized
24470 // e8 element type access. Given this, we effectively support all unmasked
24471 // misaligned accesses. TODO: Work through the codegen implications of
24472 // allowing such accesses to be formed, and considered fast.
24473 if (Fast)
24474 *Fast = Subtarget.enableUnalignedVectorMem();
24475 return Subtarget.enableUnalignedVectorMem();
24476}
24477
24479 LLVMContext &Context, const MemOp &Op,
24480 const AttributeList &FuncAttributes) const {
24481 if (!Subtarget.hasVInstructions())
24482 return MVT::Other;
24483
24484 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
24485 return MVT::Other;
24486
24487 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
24488 // has an expansion threshold, and we want the number of hardware memory
24489 // operations to correspond roughly to that threshold. LMUL>1 operations
24490 // are typically expanded linearly internally, and thus correspond to more
24491 // than one actual memory operation. Note that store merging and load
24492 // combining will typically form larger LMUL operations from the LMUL1
24493 // operations emitted here, and that's okay because combining isn't
24494 // introducing new memory operations; it's just merging existing ones.
24495 // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
24496 const unsigned MinVLenInBytes =
24497 std::min(Subtarget.getRealMinVLen() / 8, 1024U);
24498
24499 if (Op.size() < MinVLenInBytes)
24500 // TODO: Figure out short memops. For the moment, do the default thing
24501 // which ends up using scalar sequences.
24502 return MVT::Other;
24503
24504 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
24505 // fixed vectors.
24506 if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
24507 return MVT::Other;
24508
24509 // Prefer i8 for non-zero memset as it allows us to avoid materializing
24510 // a large scalar constant and instead use vmv.v.x/i to do the
24511 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
24512 // maximize the chance we can encode the size in the vsetvli.
24513 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
24514 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
24515
24516 // Do we have sufficient alignment for our preferred VT? If not, revert
24517 // to largest size allowed by our alignment criteria.
24518 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
24519 Align RequiredAlign(PreferredVT.getStoreSize());
24520 if (Op.isFixedDstAlign())
24521 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
24522 if (Op.isMemcpy())
24523 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
24524 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
24525 }
24526 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
24527}
24528
24530 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
24531 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
24532 bool IsABIRegCopy = CC.has_value();
24533 EVT ValueVT = Val.getValueType();
24534
24535 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24536 if ((ValueVT == PairVT ||
24537 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24538 ValueVT == MVT::f64)) &&
24539 NumParts == 1 && PartVT == MVT::Untyped) {
24540 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24541 MVT XLenVT = Subtarget.getXLenVT();
24542 if (ValueVT == MVT::f64)
24543 Val = DAG.getBitcast(MVT::i64, Val);
24544 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
24545 // Always creating an MVT::Untyped part, so always use
24546 // RISCVISD::BuildGPRPair.
24547 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
24548 return true;
24549 }
24550
24551 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24552 PartVT == MVT::f32) {
24553 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
24554 // nan, and cast to f32.
24555 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
24556 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
24557 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
24558 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
24559 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24560 Parts[0] = Val;
24561 return true;
24562 }
24563
24564 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
24565#ifndef NDEBUG
24566 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
24567 [[maybe_unused]] unsigned ValLMUL =
24569 ValNF * RISCV::RVVBitsPerBlock);
24570 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
24571 [[maybe_unused]] unsigned PartLMUL =
24573 PartNF * RISCV::RVVBitsPerBlock);
24574 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
24575 "RISC-V vector tuple type only accepts same register class type "
24576 "TUPLE_INSERT");
24577#endif
24578
24579 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
24580 Val, DAG.getTargetConstant(0, DL, MVT::i32));
24581 Parts[0] = Val;
24582 return true;
24583 }
24584
24585 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24586 PartVT.isScalableVector()) {
24587 if (ValueVT.isFixedLengthVector()) {
24588 ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
24589 Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
24590 }
24591 LLVMContext &Context = *DAG.getContext();
24592 EVT ValueEltVT = ValueVT.getVectorElementType();
24593 EVT PartEltVT = PartVT.getVectorElementType();
24594 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24595 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24596 if (PartVTBitSize % ValueVTBitSize == 0) {
24597 assert(PartVTBitSize >= ValueVTBitSize);
24598 // If the element types are different, bitcast to the same element type of
24599 // PartVT first.
24600 // Give an example here, we want copy a <vscale x 1 x i8> value to
24601 // <vscale x 4 x i16>.
24602 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
24603 // subvector, then we can bitcast to <vscale x 4 x i16>.
24604 if (ValueEltVT != PartEltVT) {
24605 if (PartVTBitSize > ValueVTBitSize) {
24606 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24607 assert(Count != 0 && "The number of element should not be zero.");
24608 EVT SameEltTypeVT =
24609 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24610 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(SameEltTypeVT), Val, 0);
24611 }
24612 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24613 } else {
24614 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(PartVT), Val, 0);
24615 }
24616 Parts[0] = Val;
24617 return true;
24618 }
24619 }
24620
24621 return false;
24622}
24623
24625 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
24626 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
24627 bool IsABIRegCopy = CC.has_value();
24628
24629 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24630 if ((ValueVT == PairVT ||
24631 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24632 ValueVT == MVT::f64)) &&
24633 NumParts == 1 && PartVT == MVT::Untyped) {
24634 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24635 MVT XLenVT = Subtarget.getXLenVT();
24636
24637 SDValue Val = Parts[0];
24638 // Always starting with an MVT::Untyped part, so always use
24639 // RISCVISD::SplitGPRPair
24640 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
24641 Val);
24642 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
24643 Val.getValue(1));
24644 if (ValueVT == MVT::f64)
24645 Val = DAG.getBitcast(ValueVT, Val);
24646 return Val;
24647 }
24648
24649 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24650 PartVT == MVT::f32) {
24651 SDValue Val = Parts[0];
24652
24653 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
24654 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
24655 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
24656 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
24657 return Val;
24658 }
24659
24660 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24661 PartVT.isScalableVector()) {
24662 LLVMContext &Context = *DAG.getContext();
24663 SDValue Val = Parts[0];
24664 EVT ValueEltVT = ValueVT.getVectorElementType();
24665 EVT PartEltVT = PartVT.getVectorElementType();
24666 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24667 if (ValueVT.isFixedLengthVector())
24668 ValueVTBitSize = getContainerForFixedLengthVector(ValueVT.getSimpleVT())
24669 .getSizeInBits()
24671 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24672 if (PartVTBitSize % ValueVTBitSize == 0) {
24673 assert(PartVTBitSize >= ValueVTBitSize);
24674 EVT SameEltTypeVT = ValueVT;
24675 // If the element types are different, convert it to the same element type
24676 // of PartVT.
24677 // Give an example here, we want copy a <vscale x 1 x i8> value from
24678 // <vscale x 4 x i16>.
24679 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
24680 // then we can extract <vscale x 1 x i8>.
24681 if (ValueEltVT != PartEltVT) {
24682 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24683 assert(Count != 0 && "The number of element should not be zero.");
24684 SameEltTypeVT =
24685 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24686 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
24687 }
24688 if (ValueVT.isFixedLengthVector())
24689 Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);
24690 else
24691 Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
24692 return Val;
24693 }
24694 }
24695 return SDValue();
24696}
24697
24698bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
24699 // When aggressively optimizing for code size, we prefer to use a div
24700 // instruction, as it is usually smaller than the alternative sequence.
24701 // TODO: Add vector division?
24702 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
24703 return OptSize && !VT.isVector();
24704}
24705
24707 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
24708 // some situation.
24709 unsigned Opc = N->getOpcode();
24711 return false;
24712 return true;
24713}
24714
24715static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
24716 Module *M = IRB.GetInsertBlock()->getModule();
24717 Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(
24718 M, Intrinsic::thread_pointer, IRB.getPtrTy());
24719 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
24720 IRB.CreateCall(ThreadPointerFunc), Offset);
24721}
24722
24724 // Fuchsia provides a fixed TLS slot for the stack cookie.
24725 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
24726 if (Subtarget.isTargetFuchsia())
24727 return useTpOffset(IRB, -0x10);
24728
24729 // Android provides a fixed TLS slot for the stack cookie. See the definition
24730 // of TLS_SLOT_STACK_GUARD in
24731 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
24732 if (Subtarget.isTargetAndroid())
24733 return useTpOffset(IRB, -0x18);
24734
24735 Module *M = IRB.GetInsertBlock()->getModule();
24736
24737 if (M->getStackProtectorGuard() == "tls") {
24738 // Users must specify the offset explicitly
24739 int Offset = M->getStackProtectorGuardOffset();
24740 return useTpOffset(IRB, Offset);
24741 }
24742
24744}
24745
24747 Align Alignment) const {
24748 if (!Subtarget.hasVInstructions())
24749 return false;
24750
24751 // Only support fixed vectors if we know the minimum vector size.
24752 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
24753 return false;
24754
24755 EVT ScalarType = DataType.getScalarType();
24756 if (!isLegalElementTypeForRVV(ScalarType))
24757 return false;
24758
24759 if (!Subtarget.enableUnalignedVectorMem() &&
24760 Alignment < ScalarType.getStoreSize())
24761 return false;
24762
24763 return true;
24764}
24765
24769 const TargetInstrInfo *TII) const {
24770 assert(MBBI->isCall() && MBBI->getCFIType() &&
24771 "Invalid call instruction for a KCFI check");
24772 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
24773 MBBI->getOpcode()));
24774
24775 MachineOperand &Target = MBBI->getOperand(0);
24776 Target.setIsRenamable(false);
24777
24778 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
24779 .addReg(Target.getReg())
24780 .addImm(MBBI->getCFIType())
24781 .getInstr();
24782}
24783
24784#define GET_REGISTER_MATCHER
24785#include "RISCVGenAsmMatcher.inc"
24786
24789 const MachineFunction &MF) const {
24791 if (!Reg)
24793 if (!Reg)
24794 return Reg;
24795
24796 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
24797 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
24798 reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +
24799 StringRef(RegName) + "\"."));
24800 return Reg;
24801}
24802
24805 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
24806
24807 if (NontemporalInfo == nullptr)
24809
24810 // 1 for default value work as __RISCV_NTLH_ALL
24811 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
24812 // 3 -> __RISCV_NTLH_ALL_PRIVATE
24813 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
24814 // 5 -> __RISCV_NTLH_ALL
24815 int NontemporalLevel = 5;
24816 const MDNode *RISCVNontemporalInfo =
24817 I.getMetadata("riscv-nontemporal-domain");
24818 if (RISCVNontemporalInfo != nullptr)
24819 NontemporalLevel =
24821 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
24822 ->getValue())
24823 ->getZExtValue();
24824
24825 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
24826 "RISC-V target doesn't support this non-temporal domain.");
24827
24828 NontemporalLevel -= 2;
24830 if (NontemporalLevel & 0b1)
24831 Flags |= MONontemporalBit0;
24832 if (NontemporalLevel & 0b10)
24833 Flags |= MONontemporalBit1;
24834
24835 return Flags;
24836}
24837
24840
24841 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
24843 TargetFlags |= (NodeFlags & MONontemporalBit0);
24844 TargetFlags |= (NodeFlags & MONontemporalBit1);
24845 return TargetFlags;
24846}
24847
24849 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
24850 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
24851}
24852
24854 if (VT.isVector()) {
24855 EVT SVT = VT.getVectorElementType();
24856 // If the element type is legal we can use cpop.v if it is enabled.
24857 if (isLegalElementTypeForRVV(SVT))
24858 return Subtarget.hasStdExtZvbb();
24859 // Don't consider it fast if the type needs to be legalized or scalarized.
24860 return false;
24861 }
24862
24863 return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);
24864}
24865
24867 ISD::CondCode Cond) const {
24868 return isCtpopFast(VT) ? 0 : 1;
24869}
24870
24872 const Instruction *I) const {
24873 if (Subtarget.hasStdExtZalasr()) {
24874 if (Subtarget.hasStdExtZtso()) {
24875 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
24876 // should be lowered to plain load/store. The easiest way to do this is
24877 // to say we should insert fences for them, and the fence insertion code
24878 // will just not insert any fences
24879 auto *LI = dyn_cast<LoadInst>(I);
24880 auto *SI = dyn_cast<StoreInst>(I);
24881 if ((LI &&
24882 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
24883 (SI &&
24884 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
24885 // Here, this is a load or store which is seq_cst, and needs a .aq or
24886 // .rl therefore we shouldn't try to insert fences
24887 return false;
24888 }
24889 // Here, we are a TSO inst that isn't a seq_cst load/store
24890 return isa<LoadInst>(I) || isa<StoreInst>(I);
24891 }
24892 return false;
24893 }
24894 // Note that one specific case requires fence insertion for an
24895 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
24896 // than this hook due to limitations in the interface here.
24897 return isa<LoadInst>(I) || isa<StoreInst>(I);
24898}
24899
24901
24902 // GISel support is in progress or complete for these opcodes.
24903 unsigned Op = Inst.getOpcode();
24904 if (Op == Instruction::Add || Op == Instruction::Sub ||
24905 Op == Instruction::And || Op == Instruction::Or ||
24906 Op == Instruction::Xor || Op == Instruction::InsertElement ||
24907 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
24908 Op == Instruction::Freeze || Op == Instruction::Store)
24909 return false;
24910
24911 if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
24912 // Mark RVV intrinsic as supported.
24913 if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(II->getIntrinsicID()))
24914 return false;
24915 }
24916
24917 if (Inst.getType()->isScalableTy())
24918 return true;
24919
24920 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
24921 if (Inst.getOperand(i)->getType()->isScalableTy() &&
24922 !isa<ReturnInst>(&Inst))
24923 return true;
24924
24925 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
24926 if (AI->getAllocatedType()->isScalableTy())
24927 return true;
24928 }
24929
24930 return false;
24931}
24932
24933SDValue
24934RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
24935 SelectionDAG &DAG,
24936 SmallVectorImpl<SDNode *> &Created) const {
24937 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
24938 if (isIntDivCheap(N->getValueType(0), Attr))
24939 return SDValue(N, 0); // Lower SDIV as SDIV
24940
24941 // Only perform this transform if short forward branch opt is supported.
24942 if (!Subtarget.hasShortForwardBranchOpt())
24943 return SDValue();
24944 EVT VT = N->getValueType(0);
24945 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
24946 return SDValue();
24947
24948 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
24949 if (Divisor.sgt(2048) || Divisor.slt(-2048))
24950 return SDValue();
24951 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
24952}
24953
24954bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
24955 EVT VT, const APInt &AndMask) const {
24956 if (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())
24957 return !Subtarget.hasBEXTILike() && AndMask.ugt(1024);
24959}
24960
24962 return Subtarget.getMinimumJumpTableEntries();
24963}
24964
24966 SDValue Value, SDValue Addr,
24967 int JTI,
24968 SelectionDAG &DAG) const {
24969 if (Subtarget.hasStdExtZicfilp()) {
24970 // When Zicfilp enabled, we need to use software guarded branch for jump
24971 // table branch.
24972 SDValue Chain = Value;
24973 // Jump table debug info is only needed if CodeView is enabled.
24975 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
24976 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
24977 }
24978 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
24979}
24980
24981// If an output pattern produces multiple instructions tablegen may pick an
24982// arbitrary type from an instructions destination register class to use for the
24983// VT of that MachineSDNode. This VT may be used to look up the representative
24984// register class. If the type isn't legal, the default implementation will
24985// not find a register class.
24986//
24987// Some integer types smaller than XLen are listed in the GPR register class to
24988// support isel patterns for GISel, but are not legal in SelectionDAG. The
24989// arbitrary type tablegen picks may be one of these smaller types.
24990//
24991// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
24992// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
24993std::pair<const TargetRegisterClass *, uint8_t>
24994RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
24995 MVT VT) const {
24996 switch (VT.SimpleTy) {
24997 default:
24998 break;
24999 case MVT::i8:
25000 case MVT::i16:
25001 case MVT::i32:
25003 case MVT::bf16:
25004 case MVT::f16:
25006 }
25007
25009}
25010
25012
25013#define GET_RISCVVIntrinsicsTable_IMPL
25014#include "RISCVGenSearchableTables.inc"
25015
25016} // namespace llvm::RISCVVIntrinsicsTable
25017
25019
25020 // If the function specifically requests inline stack probes, emit them.
25021 if (MF.getFunction().hasFnAttribute("probe-stack"))
25022 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
25023 "inline-asm";
25024
25025 return false;
25026}
25027
25029 Align StackAlign) const {
25030 // The default stack probe size is 4096 if the function has no
25031 // stack-probe-size attribute.
25032 const Function &Fn = MF.getFunction();
25033 unsigned StackProbeSize =
25034 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
25035 // Round down to the stack alignment.
25036 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
25037 return StackProbeSize ? StackProbeSize : StackAlign.value();
25038}
25039
25040SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
25041 SelectionDAG &DAG) const {
25043 if (!hasInlineStackProbe(MF))
25044 return SDValue();
25045
25046 MVT XLenVT = Subtarget.getXLenVT();
25047 // Get the inputs.
25048 SDValue Chain = Op.getOperand(0);
25049 SDValue Size = Op.getOperand(1);
25050
25052 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
25053 SDLoc dl(Op);
25054 EVT VT = Op.getValueType();
25055
25056 // Construct the new SP value in a GPR.
25057 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
25058 Chain = SP.getValue(1);
25059 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
25060 if (Align)
25061 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
25062 DAG.getSignedConstant(-Align->value(), dl, VT));
25063
25064 // Set the real SP to the new value with a probing loop.
25065 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
25066 return DAG.getMergeValues({SP, Chain}, dl);
25067}
25068
25071 MachineBasicBlock *MBB) const {
25072 MachineFunction &MF = *MBB->getParent();
25073 MachineBasicBlock::iterator MBBI = MI.getIterator();
25074 DebugLoc DL = MBB->findDebugLoc(MBBI);
25075 Register TargetReg = MI.getOperand(0).getReg();
25076
25077 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
25078 bool IsRV64 = Subtarget.is64Bit();
25079 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
25080 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
25081 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
25082
25083 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
25084 MachineBasicBlock *LoopTestMBB =
25085 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25086 MF.insert(MBBInsertPoint, LoopTestMBB);
25087 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25088 MF.insert(MBBInsertPoint, ExitMBB);
25089 Register SPReg = RISCV::X2;
25090 Register ScratchReg =
25091 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
25092
25093 // ScratchReg = ProbeSize
25094 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
25095
25096 // LoopTest:
25097 // SUB SP, SP, ProbeSize
25098 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
25099 .addReg(SPReg)
25100 .addReg(ScratchReg);
25101
25102 // s[d|w] zero, 0(sp)
25103 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
25104 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
25105 .addReg(RISCV::X0)
25106 .addReg(SPReg)
25107 .addImm(0);
25108
25109 // BLT TargetReg, SP, LoopTest
25110 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
25111 .addReg(TargetReg)
25112 .addReg(SPReg)
25113 .addMBB(LoopTestMBB);
25114
25115 // Adjust with: MV SP, TargetReg.
25116 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
25117 .addReg(TargetReg)
25118 .addImm(0);
25119
25120 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
25122
25123 LoopTestMBB->addSuccessor(ExitMBB);
25124 LoopTestMBB->addSuccessor(LoopTestMBB);
25125 MBB->addSuccessor(LoopTestMBB);
25126
25127 MI.eraseFromParent();
25128 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
25129 return ExitMBB->begin()->getParent();
25130}
25131
25133 if (Subtarget.hasStdExtFOrZfinx()) {
25134 static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};
25135 return RCRegs;
25136 }
25137 return {};
25138}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Definition CostModel.cpp:74
#define Check(C,...)
#define DEBUG_TYPE
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define CC_VLS_CASE(ABIVlen)
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
mir Rename Register Operands
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr MCPhysReg SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
static bool isZipOdd(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipodd idiom.
static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2)
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static bool isLegalBitRotate(ArrayRef< int > Mask, EVT VT, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static SDValue simplifyOp_VL(SDNode *N)
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isAlternating(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned Factor, bool RequiredPolarity)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< bool > ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, cl::desc("Swap add and addi in cases where the add may " "be combined with a shift"), cl::init(true))
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static bool isSpanSplatShuffle(ArrayRef< int > Mask, int Span)
Return true for a mask which performs an arbitrary shuffle within the first span, and then repeats th...
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
const uint64_t ModeMask64
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, SelectionDAG &DAG)
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static bool isValidVisniInsertExtractIndex(SDValue Idx)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B, SelectionDAG &DAG)
Given fixed length vectors A and B with equal element types, but possibly different number of element...
const uint32_t ModeMask32
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static SDValue combineVqdotAccum(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSimm12Constant(SDValue V)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static bool isElementRotate(const std::array< std::pair< int, int >, 2 > &SrcInfo, unsigned NumElts)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match a single source shuffle which is an identity except that some particular element is repeated.
static bool isNonZeroAVL(SDValue AVL)
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getQDOTXResultType(MVT OpVT)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue foldConcatVector(SDValue V1, SDValue V2)
If concat_vector(V1,V2) could be folded away to some existing vector source, return it.
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isZipEven(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipeven idiom.
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
#define ROTR(x, n)
Definition SHA256.cpp:32
static bool isCommutative(Instruction *I, Value *ValWithUses)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1109
Class for arbitrary precision integers.
Definition APInt.h:78
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1644
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1396
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
bool isMask(unsigned numBits) const
Definition APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
bool isFloatingPointOperation() const
BinOp getOperation() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This is an SDNode representing atomic operations.
const SDValue & getBasePtr() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool test(unsigned Idx) const
Definition BitVector.h:461
BitVector & set()
Definition BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:388
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:229
unsigned size() const
Definition DenseMap.h:108
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:205
Implements a dense probed hash-table based set.
Definition DenseSet.h:269
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Tagged union holding either a T or a Error.
Definition Error.h:485
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Argument * getArg(unsigned i) const
Definition Function.h:884
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Helper struct to store a base, index and offset that forms an address.
bool isDSOLocal() const
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:1936
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2508
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
MCContext & getContext() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1077
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1445
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition Module.cpp:353
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtZfhOrZhinx() const
bool hasShlAdd(int64_t ShAmt) const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsBF16Minimal() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
bool hasBEXTILike() const
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
bool hasCZEROLike() const
unsigned getELen() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
static unsigned getRegClassIDForLMUL(RISCVVType::VLMUL LMul)
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Match a mask which "spreads" the leading elements of a vector evenly across the result.
static RISCVVType::VLMUL getLMUL(MVT VT)
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
std::optional< APInt > bitcastToAPInt() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static LLVM_ABI bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:862
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
LLVM_ABI std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual MVT getVPExplicitVectorLengthTy() const
Returns the type to be used for the EVL/AVL operand of VP nodes: ISD::VP_ADD, ISD::VP_SUB,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetOptions Options
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:774
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:62
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr bool isZero() const
Definition TypeSize.h:154
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
self_iterator getIterator()
Definition ilist_node.h:130
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:952
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:627
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition ISDOpcodes.h:672
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:648
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:690
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition ISDOpcodes.h:611
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:857
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
static constexpr unsigned RVVBytesPerBlock
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
auto m_Node(unsigned Opcode, const OpndPreds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
initializer< Ty > init(const Ty &Val)
uint32_t read32le(const void *P)
Definition Endian.h:428
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2452
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isStrongerThanMonotonic(AtomicOrdering AO)
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
static const MachineMemOperand::Flags MONontemporalBit0
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1589
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1948
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:325
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
CombineLevel
Definition DAGCombine.h:15
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
@ Xor
Bitwise or logical XOR of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1941
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:207
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
#define NC
Definition regutils.h:42
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition ValueTypes.h:402
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition ValueTypes.h:364
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:419
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:267
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:154
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:165
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:289
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:304
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:173
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:340
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:273
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Matching combinators.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:117
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...