LLVM 22.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/IntrinsicsRISCV.h"
46#include "llvm/Support/Debug.h"
52#include <optional>
53
54using namespace llvm;
55
56#define DEBUG_TYPE "riscv-lower"
57
58STATISTIC(NumTailCalls, "Number of tail calls");
59
61 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
62 cl::desc("Give the maximum size (in number of nodes) of the web of "
63 "instructions that we will consider for VW expansion"),
64 cl::init(18));
65
66static cl::opt<bool>
67 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
68 cl::desc("Allow the formation of VW_W operations (e.g., "
69 "VWADD_W) with splat constants"),
70 cl::init(false));
71
73 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
74 cl::desc("Set the minimum number of repetitions of a divisor to allow "
75 "transformation to multiplications by the reciprocal"),
76 cl::init(2));
77
78static cl::opt<int>
80 cl::desc("Give the maximum number of instructions that we will "
81 "use for creating a floating-point immediate value"),
82 cl::init(3));
83
84static cl::opt<bool>
85 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
86 cl::desc("Swap add and addi in cases where the add may "
87 "be combined with a shift"),
88 cl::init(true));
89
91 const RISCVSubtarget &STI)
92 : TargetLowering(TM), Subtarget(STI) {
93
94 RISCVABI::ABI ABI = Subtarget.getTargetABI();
95 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
96
97 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
98 !Subtarget.hasStdExtF()) {
99 errs() << "Hard-float 'f' ABI can't be used for a target that "
100 "doesn't support the F instruction set extension (ignoring "
101 "target-abi)\n";
102 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
103 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
104 !Subtarget.hasStdExtD()) {
105 errs() << "Hard-float 'd' ABI can't be used for a target that "
106 "doesn't support the D instruction set extension (ignoring "
107 "target-abi)\n";
108 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
109 }
110
111 switch (ABI) {
112 default:
113 reportFatalUsageError("Don't know how to lower this ABI");
122 break;
123 }
124
125 MVT XLenVT = Subtarget.getXLenVT();
126
127 // Set up the register classes.
128 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
129
130 if (Subtarget.hasStdExtZfhmin())
131 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
132 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
133 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
134 if (Subtarget.hasStdExtF())
135 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
136 if (Subtarget.hasStdExtD())
137 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
138 if (Subtarget.hasStdExtZhinxmin())
139 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
140 if (Subtarget.hasStdExtZfinx())
141 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
142 if (Subtarget.hasStdExtZdinx()) {
143 if (Subtarget.is64Bit())
144 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
145 else
146 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
147 }
148
149 static const MVT::SimpleValueType BoolVecVTs[] = {
150 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
151 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
152 static const MVT::SimpleValueType IntVecVTs[] = {
153 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
154 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
155 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
156 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
157 MVT::nxv4i64, MVT::nxv8i64};
158 static const MVT::SimpleValueType F16VecVTs[] = {
159 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
160 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
161 static const MVT::SimpleValueType BF16VecVTs[] = {
162 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
163 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
164 static const MVT::SimpleValueType F32VecVTs[] = {
165 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
166 static const MVT::SimpleValueType F64VecVTs[] = {
167 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
168 static const MVT::SimpleValueType VecTupleVTs[] = {
169 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
170 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
171 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
172 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
173 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
174 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
175 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
176 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
177 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
178 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
179 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
180
181 if (Subtarget.hasVInstructions()) {
182 auto addRegClassForRVV = [this](MVT VT) {
183 // Disable the smallest fractional LMUL types if ELEN is less than
184 // RVVBitsPerBlock.
185 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
186 if (VT.getVectorMinNumElements() < MinElts)
187 return;
188
189 unsigned Size = VT.getSizeInBits().getKnownMinValue();
190 const TargetRegisterClass *RC;
192 RC = &RISCV::VRRegClass;
193 else if (Size == 2 * RISCV::RVVBitsPerBlock)
194 RC = &RISCV::VRM2RegClass;
195 else if (Size == 4 * RISCV::RVVBitsPerBlock)
196 RC = &RISCV::VRM4RegClass;
197 else if (Size == 8 * RISCV::RVVBitsPerBlock)
198 RC = &RISCV::VRM8RegClass;
199 else
200 llvm_unreachable("Unexpected size");
201
202 addRegisterClass(VT, RC);
203 };
204
205 for (MVT VT : BoolVecVTs)
206 addRegClassForRVV(VT);
207 for (MVT VT : IntVecVTs) {
208 if (VT.getVectorElementType() == MVT::i64 &&
209 !Subtarget.hasVInstructionsI64())
210 continue;
211 addRegClassForRVV(VT);
212 }
213
214 if (Subtarget.hasVInstructionsF16Minimal() ||
215 Subtarget.hasVendorXAndesVPackFPH())
216 for (MVT VT : F16VecVTs)
217 addRegClassForRVV(VT);
218
219 if (Subtarget.hasVInstructionsBF16Minimal() ||
220 Subtarget.hasVendorXAndesVBFHCvt())
221 for (MVT VT : BF16VecVTs)
222 addRegClassForRVV(VT);
223
224 if (Subtarget.hasVInstructionsF32())
225 for (MVT VT : F32VecVTs)
226 addRegClassForRVV(VT);
227
228 if (Subtarget.hasVInstructionsF64())
229 for (MVT VT : F64VecVTs)
230 addRegClassForRVV(VT);
231
232 if (Subtarget.useRVVForFixedLengthVectors()) {
233 auto addRegClassForFixedVectors = [this](MVT VT) {
234 MVT ContainerVT = getContainerForFixedLengthVector(VT);
235 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
236 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
237 addRegisterClass(VT, TRI.getRegClass(RCID));
238 };
240 if (useRVVForFixedLengthVectorVT(VT))
241 addRegClassForFixedVectors(VT);
242
244 if (useRVVForFixedLengthVectorVT(VT))
245 addRegClassForFixedVectors(VT);
246 }
247
248 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
249 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
250 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
251 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
252 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
253 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
254 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
255 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
256 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
257 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
258 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
259 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
260 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
261 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
262 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
263 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
264 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
265 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
266 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
267 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
268 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
269 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
270 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
271 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
272 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
273 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
274 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
275 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
276 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
277 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
278 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
279 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
280 }
281
282 // Compute derived properties from the register classes.
284
286
288 MVT::i1, Promote);
289 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
291 MVT::i1, Promote);
292
293 // TODO: add all necessary setOperationAction calls.
294 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Custom);
295
296 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
297 setOperationAction(ISD::BR_CC, XLenVT, Expand);
298 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
300
305 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
308 }
309
310 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
311
312 setOperationAction(ISD::VASTART, MVT::Other, Custom);
313 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
314
315 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
316 !Subtarget.hasVendorXAndesPerf())
318
320
321 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
322 !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
323 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
324 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
325
326 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
327 setOperationAction(ISD::LOAD, MVT::i64, Custom);
328 setOperationAction(ISD::STORE, MVT::i64, Custom);
329 }
330
331 if (Subtarget.is64Bit()) {
333
334 setOperationAction(ISD::LOAD, MVT::i32, Custom);
336 MVT::i32, Custom);
338 if (!Subtarget.hasStdExtZbb())
341 Custom);
343 }
344 if (!Subtarget.hasStdExtZmmul()) {
346 } else if (Subtarget.is64Bit()) {
349 } else {
351 }
352
353 if (!Subtarget.hasStdExtM()) {
355 Expand);
356 } else if (Subtarget.is64Bit()) {
358 {MVT::i8, MVT::i16, MVT::i32}, Custom);
359 }
360
363 Expand);
364
366 Custom);
367
368 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
369 if (Subtarget.is64Bit())
371 } else if (Subtarget.hasVendorXTHeadBb()) {
372 if (Subtarget.is64Bit())
375 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
377 } else {
379 }
380
382 Subtarget.hasREV8Like() ? Legal : Expand);
383
384 if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
385 !Subtarget.is64Bit()) {
387 } else {
388 // Zbkb can use rev8+brev8 to implement bitreverse.
390 Subtarget.hasStdExtZbkb() ? Custom : Expand);
391 if (Subtarget.hasStdExtZbkb())
393 }
394
395 if (Subtarget.hasStdExtZbb() ||
396 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
398 Legal);
399 }
400
401 if (Subtarget.hasCTZLike()) {
402 if (Subtarget.is64Bit())
404 } else {
406 }
407
408 if (!Subtarget.hasCPOPLike()) {
409 // TODO: These should be set to LibCall, but this currently breaks
410 // the Linux kernel build. See #101786. Lacks i128 tests, too.
411 if (Subtarget.is64Bit())
413 else
416 }
417
418 if (Subtarget.hasCLZLike()) {
419 // We need the custom lowering to make sure that the resulting sequence
420 // for the 32bit case is efficient on 64bit targets.
421 // Use default promotion for i32 without Zbb.
422 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbb())
424 } else {
426 }
427
428 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
430 } else if (Subtarget.hasShortForwardBranchOpt()) {
431 // We can use PseudoCCSUB to implement ABS.
433 } else if (Subtarget.is64Bit()) {
435 }
436
437 if (!Subtarget.useMIPSCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov())
439
440 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
447 }
448
449 static const unsigned FPLegalNodeTypes[] = {
450 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM,
451 ISD::FMAXIMUMNUM, ISD::LRINT, ISD::LLRINT,
452 ISD::LROUND, ISD::LLROUND, ISD::STRICT_LRINT,
457
458 static const ISD::CondCode FPCCToExpand[] = {
462
463 static const unsigned FPOpToExpand[] = {
464 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
465 ISD::FREM};
466
467 static const unsigned FPRndMode[] = {
468 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
469 ISD::FROUNDEVEN};
470
471 static const unsigned ZfhminZfbfminPromoteOps[] = {
472 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
473 ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
478 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
479 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
480 ISD::FROUNDEVEN, ISD::FCANONICALIZE};
481
482 if (Subtarget.hasStdExtZfbfmin()) {
483 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
487 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
488 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
490 setOperationAction(ISD::FABS, MVT::bf16, Custom);
491 setOperationAction(ISD::FNEG, MVT::bf16, Custom);
495 }
496
497 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
498 if (Subtarget.hasStdExtZfhOrZhinx()) {
499 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
500 setOperationAction(FPRndMode, MVT::f16,
501 Subtarget.hasStdExtZfa() ? Legal : Custom);
503 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
504 Subtarget.hasStdExtZfa() ? Legal : Custom);
505 if (Subtarget.hasStdExtZfa())
507 } else {
508 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
509 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Promote);
510 for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
513 setOperationAction(Op, MVT::f16, Custom);
514 setOperationAction(ISD::FABS, MVT::f16, Custom);
515 setOperationAction(ISD::FNEG, MVT::f16, Custom);
519 }
520
521 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
522
525 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
528 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
529
531 ISD::FNEARBYINT, MVT::f16,
532 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
533 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
534 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
535 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
536 ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP, ISD::FMODF},
537 MVT::f16, Promote);
538
539 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
540 // complete support for all operations in LegalizeDAG.
545 MVT::f16, Promote);
546
547 // We need to custom promote this.
548 if (Subtarget.is64Bit())
549 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
550 }
551
552 if (Subtarget.hasStdExtFOrZfinx()) {
553 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
554 setOperationAction(FPRndMode, MVT::f32,
555 Subtarget.hasStdExtZfa() ? Legal : Custom);
556 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
559 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
560 setOperationAction(FPOpToExpand, MVT::f32, Expand);
561 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
562 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
563 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
564 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
566 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
567 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
568 Subtarget.isSoftFPABI() ? LibCall : Custom);
569 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
570 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
571 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
572 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
573
574 if (Subtarget.hasStdExtZfa()) {
576 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
577 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
578 } else {
579 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
580 }
581 }
582
583 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
584 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
585
586 if (Subtarget.hasStdExtDOrZdinx()) {
587 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
588
589 if (!Subtarget.is64Bit())
590 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
591
592 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
593 !Subtarget.is64Bit()) {
594 setOperationAction(ISD::LOAD, MVT::f64, Custom);
595 setOperationAction(ISD::STORE, MVT::f64, Custom);
596 }
597
598 if (Subtarget.hasStdExtZfa()) {
600 setOperationAction(FPRndMode, MVT::f64, Legal);
601 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
602 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
603 } else {
604 if (Subtarget.is64Bit())
605 setOperationAction(FPRndMode, MVT::f64, Custom);
606
607 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
608 }
609
612 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
615 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
616 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
617 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
618 setOperationAction(FPOpToExpand, MVT::f64, Expand);
619 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
620 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
621 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
622 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
624 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
625 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
626 Subtarget.isSoftFPABI() ? LibCall : Custom);
627 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
628 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
629 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Custom);
630 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
631 }
632
633 if (Subtarget.is64Bit()) {
636 MVT::i32, Custom);
637 setOperationAction(ISD::LROUND, MVT::i32, Custom);
638 }
639
640 if (Subtarget.hasStdExtFOrZfinx()) {
642 Custom);
643
644 // f16/bf16 require custom handling.
646 Custom);
648 Custom);
649
651 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
652 setOperationAction(ISD::GET_FPENV, XLenVT, Custom);
653 setOperationAction(ISD::SET_FPENV, XLenVT, Custom);
654 setOperationAction(ISD::RESET_FPENV, MVT::Other, Custom);
655 setOperationAction(ISD::GET_FPMODE, XLenVT, Custom);
656 setOperationAction(ISD::SET_FPMODE, XLenVT, Custom);
657 setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
658 }
659
662 XLenVT, Custom);
663
665
666 if (Subtarget.is64Bit())
668
669 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
670 // Unfortunately this can't be determined just from the ISA naming string.
671 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
672 Subtarget.is64Bit() ? Legal : Custom);
673 setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
674 Subtarget.is64Bit() ? Legal : Custom);
675
676 if (Subtarget.is64Bit()) {
677 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
678 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
679 }
680
681 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
683 if (Subtarget.is64Bit())
685
686 if (Subtarget.hasVendorXMIPSCBOP())
687 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
688 else if (Subtarget.hasStdExtZicbop())
689 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
690
691 if (Subtarget.hasStdExtA()) {
692 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
693 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
695 else
697 } else if (Subtarget.hasForcedAtomics()) {
698 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
699 } else {
701 }
702
703 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
704
706
707 if (getTargetMachine().getTargetTriple().isOSLinux()) {
708 // Custom lowering of llvm.clear_cache.
710 }
711
712 if (Subtarget.hasVInstructions()) {
714
715 setOperationAction(ISD::VSCALE, XLenVT, Custom);
716
717 // RVV intrinsics may have illegal operands.
718 // We also need to custom legalize vmv.x.s.
721 {MVT::i8, MVT::i16}, Custom);
722 if (Subtarget.is64Bit())
724 MVT::i32, Custom);
725 else
727 MVT::i64, Custom);
728
730 MVT::Other, Custom);
731
732 static const unsigned IntegerVPOps[] = {
733 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
734 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
735 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
736 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
737 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
738 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
739 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
740 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
741 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
742 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
743 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
744 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
745 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
746 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
747 ISD::EXPERIMENTAL_VP_SPLAT};
748
749 static const unsigned FloatingPointVPOps[] = {
750 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
751 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
752 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
753 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
754 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
755 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
756 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
757 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
758 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
759 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
760 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
761 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
762 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
763
764 static const unsigned IntegerVecReduceOps[] = {
765 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
766 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
767 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
768
769 static const unsigned FloatingPointVecReduceOps[] = {
770 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
771 ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
772
773 static const unsigned FloatingPointLibCallOps[] = {
774 ISD::FREM, ISD::FPOW, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
775 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, ISD::FLOG10};
776
777 if (!Subtarget.is64Bit()) {
778 // We must custom-lower certain vXi64 operations on RV32 due to the vector
779 // element type being illegal.
781 MVT::i64, Custom);
782
783 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
784
785 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
786 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
787 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
788 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
789 MVT::i64, Custom);
790 }
791
792 for (MVT VT : BoolVecVTs) {
793 if (!isTypeLegal(VT))
794 continue;
795
797
798 // Mask VTs are custom-expanded into a series of standard nodes
802 VT, Custom);
803
805 Custom);
806
808 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
809 Expand);
810 setOperationAction(ISD::VP_MERGE, VT, Custom);
811
812 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
813 Custom);
814
815 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
816
818 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
819 Custom);
820
822 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
823 Custom);
824
825 // RVV has native int->float & float->int conversions where the
826 // element type sizes are within one power-of-two of each other. Any
827 // wider distances between type sizes have to be lowered as sequences
828 // which progressively narrow the gap in stages.
833 VT, Custom);
835 Custom);
836
837 // Expand all extending loads to types larger than this, and truncating
838 // stores from types larger than this.
840 setTruncStoreAction(VT, OtherVT, Expand);
842 OtherVT, Expand);
843 }
844
845 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
846 ISD::VP_TRUNCATE, ISD::VP_SETCC},
847 VT, Custom);
848
851
853
854 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
855 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
856 setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
857
860 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
861 }
862
863 for (MVT VT : IntVecVTs) {
864 if (!isTypeLegal(VT))
865 continue;
866
869
870 // Vectors implement MULHS/MULHU.
872
873 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
874 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
876
878 Legal);
879
881
882 // Custom-lower extensions and truncations from/to mask types.
884 VT, Custom);
885
886 // RVV has native int->float & float->int conversions where the
887 // element type sizes are within one power-of-two of each other. Any
888 // wider distances between type sizes have to be lowered as sequences
889 // which progressively narrow the gap in stages.
894 VT, Custom);
896 Custom);
900 VT, Legal);
901
902 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
903 // nodes which truncate by one power of two at a time.
906 Custom);
907
908 // Custom-lower insert/extract operations to simplify patterns.
910 Custom);
911
912 // Custom-lower reduction operations to set up the corresponding custom
913 // nodes' operands.
914 setOperationAction(IntegerVecReduceOps, VT, Custom);
915
916 setOperationAction(IntegerVPOps, VT, Custom);
917
918 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
919
920 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
921 VT, Custom);
922
924 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
925 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
926 VT, Custom);
927 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
928
931 VT, Custom);
932
935
937
939 setTruncStoreAction(VT, OtherVT, Expand);
941 OtherVT, Expand);
942 }
943
946
947 // Splice
949
950 if (Subtarget.hasStdExtZvkb()) {
952 setOperationAction(ISD::VP_BSWAP, VT, Custom);
953 } else {
954 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
956 }
957
958 if (Subtarget.hasStdExtZvbb()) {
960 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
961 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
962 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
963 VT, Custom);
964 } else {
965 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
967 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
968 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
969 VT, Expand);
970
971 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
972 // range of f32.
973 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
974 if (isTypeLegal(FloatVT)) {
976 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
977 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
978 VT, Custom);
979 }
980 }
981
983 }
984
985 for (MVT VT : VecTupleVTs) {
986 if (!isTypeLegal(VT))
987 continue;
988
989 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
990 }
991
992 // Expand various CCs to best match the RVV ISA, which natively supports UNE
993 // but no other unordered comparisons, and supports all ordered comparisons
994 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
995 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
996 // and we pattern-match those back to the "original", swapping operands once
997 // more. This way we catch both operations and both "vf" and "fv" forms with
998 // fewer patterns.
999 static const ISD::CondCode VFPCCToExpand[] = {
1003 };
1004
1005 // TODO: support more ops.
1006 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1007 ISD::FMINNUM,
1008 ISD::FMAXNUM,
1009 ISD::FMINIMUMNUM,
1010 ISD::FMAXIMUMNUM,
1011 ISD::FADD,
1012 ISD::FSUB,
1013 ISD::FMUL,
1014 ISD::FMA,
1015 ISD::FDIV,
1016 ISD::FSQRT,
1017 ISD::FCEIL,
1018 ISD::FTRUNC,
1019 ISD::FFLOOR,
1020 ISD::FROUND,
1021 ISD::FROUNDEVEN,
1022 ISD::FRINT,
1023 ISD::FNEARBYINT,
1025 ISD::SETCC,
1026 ISD::FMAXIMUM,
1027 ISD::FMINIMUM,
1034 ISD::VECREDUCE_FMIN,
1035 ISD::VECREDUCE_FMAX,
1036 ISD::VECREDUCE_FMINIMUM,
1037 ISD::VECREDUCE_FMAXIMUM};
1038
1039 // TODO: support more vp ops.
1040 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1041 ISD::VP_FADD,
1042 ISD::VP_FSUB,
1043 ISD::VP_FMUL,
1044 ISD::VP_FDIV,
1045 ISD::VP_FMA,
1046 ISD::VP_REDUCE_FMIN,
1047 ISD::VP_REDUCE_FMAX,
1048 ISD::VP_SQRT,
1049 ISD::VP_FMINNUM,
1050 ISD::VP_FMAXNUM,
1051 ISD::VP_FCEIL,
1052 ISD::VP_FFLOOR,
1053 ISD::VP_FROUND,
1054 ISD::VP_FROUNDEVEN,
1055 ISD::VP_FROUNDTOZERO,
1056 ISD::VP_FRINT,
1057 ISD::VP_FNEARBYINT,
1058 ISD::VP_SETCC,
1059 ISD::VP_FMINIMUM,
1060 ISD::VP_FMAXIMUM,
1061 ISD::VP_REDUCE_FMINIMUM,
1062 ISD::VP_REDUCE_FMAXIMUM};
1063
1064 // Sets common operation actions on RVV floating-point vector types.
1065 const auto SetCommonVFPActions = [&](MVT VT) {
1067 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1068 // sizes are within one power-of-two of each other. Therefore conversions
1069 // between vXf16 and vXf64 must be lowered as sequences which convert via
1070 // vXf32.
1071 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1072 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1073 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1074 // Custom-lower insert/extract operations to simplify patterns.
1076 Custom);
1077 // Expand various condition codes (explained above).
1078 setCondCodeAction(VFPCCToExpand, VT, Expand);
1079
1081 {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
1082 Legal);
1083 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
1084
1085 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1086 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
1088 VT, Custom);
1089
1090 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1091
1092 // Expand FP operations that need libcalls.
1093 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1094
1096
1097 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1098
1099 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1100 VT, Custom);
1101
1103 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1104 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1105 VT, Custom);
1106 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1107
1110
1113 VT, Custom);
1114
1117
1119 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1120 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1121
1122 setOperationAction(FloatingPointVPOps, VT, Custom);
1123
1125 Custom);
1128 VT, Legal);
1133 VT, Custom);
1134
1136 };
1137
1138 // Sets common extload/truncstore actions on RVV floating-point vector
1139 // types.
1140 const auto SetCommonVFPExtLoadTruncStoreActions =
1141 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1142 for (auto SmallVT : SmallerVTs) {
1143 setTruncStoreAction(VT, SmallVT, Expand);
1144 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1145 }
1146 };
1147
1148 // Sets common actions for f16 and bf16 for when there's only
1149 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1150 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1151 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1153 Custom);
1154 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1155 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1156 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1157 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1158 Custom);
1160 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1166 VT, Custom);
1167 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1168 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1169 MVT EltVT = VT.getVectorElementType();
1170 if (isTypeLegal(EltVT))
1171 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1173 VT, Custom);
1174 else
1175 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1176 EltVT, Custom);
1177 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1178 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1179 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1180 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1181 ISD::VP_SCATTER},
1182 VT, Custom);
1183 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1184
1185 setOperationAction(ISD::FNEG, VT, Expand);
1186 setOperationAction(ISD::FABS, VT, Expand);
1188
1189 // Expand FP operations that need libcalls.
1190 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1191
1192 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1193 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1194 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1195 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1196 } else {
1197 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1198 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1199 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1200 }
1201 };
1202
1203 if (Subtarget.hasVInstructionsF16()) {
1204 for (MVT VT : F16VecVTs) {
1205 if (!isTypeLegal(VT))
1206 continue;
1207 SetCommonVFPActions(VT);
1208 }
1209 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1210 for (MVT VT : F16VecVTs) {
1211 if (!isTypeLegal(VT))
1212 continue;
1213 SetCommonPromoteToF32Actions(VT);
1214 }
1215 }
1216
1217 if (Subtarget.hasVInstructionsBF16Minimal()) {
1218 for (MVT VT : BF16VecVTs) {
1219 if (!isTypeLegal(VT))
1220 continue;
1221 SetCommonPromoteToF32Actions(VT);
1222 }
1223 }
1224
1225 if (Subtarget.hasVInstructionsF32()) {
1226 for (MVT VT : F32VecVTs) {
1227 if (!isTypeLegal(VT))
1228 continue;
1229 SetCommonVFPActions(VT);
1230 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1231 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1232 }
1233 }
1234
1235 if (Subtarget.hasVInstructionsF64()) {
1236 for (MVT VT : F64VecVTs) {
1237 if (!isTypeLegal(VT))
1238 continue;
1239 SetCommonVFPActions(VT);
1240 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1241 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1242 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1243 }
1244 }
1245
1246 if (Subtarget.useRVVForFixedLengthVectors()) {
1248 if (!useRVVForFixedLengthVectorVT(VT))
1249 continue;
1250
1251 // By default everything must be expanded.
1252 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1255 setTruncStoreAction(VT, OtherVT, Expand);
1257 OtherVT, Expand);
1258 }
1259
1260 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1261 // expansion to a build_vector of 0s.
1263
1264 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1266 Custom);
1267
1270 Custom);
1271
1273 VT, Custom);
1274
1276 VT, Custom);
1277
1279
1280 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1281
1283
1285
1288 Custom);
1289
1290 setOperationAction(ISD::BITCAST, VT, Custom);
1291
1293 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1294 Custom);
1295
1297 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1298 Custom);
1299
1301 {
1310 },
1311 VT, Custom);
1313 Custom);
1314
1316
1317 // Operations below are different for between masks and other vectors.
1318 if (VT.getVectorElementType() == MVT::i1) {
1319 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1320 ISD::OR, ISD::XOR},
1321 VT, Custom);
1322
1323 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1324 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1325 VT, Custom);
1326
1327 setOperationAction(ISD::VP_MERGE, VT, Custom);
1328
1329 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1330 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1331 continue;
1332 }
1333
1334 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1335 // it before type legalization for i64 vectors on RV32. It will then be
1336 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1337 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1338 // improvements first.
1339 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1342
1343 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1345 }
1346
1348 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1349
1350 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1351 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1352 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1353 ISD::VP_SCATTER},
1354 VT, Custom);
1355 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1356
1360 VT, Custom);
1361
1364
1366
1367 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1368 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1370
1374 VT, Custom);
1375
1377
1380
1381 // Custom-lower reduction operations to set up the corresponding custom
1382 // nodes' operands.
1383 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1384 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1385 ISD::VECREDUCE_UMIN},
1386 VT, Custom);
1387
1388 setOperationAction(IntegerVPOps, VT, Custom);
1389
1390 if (Subtarget.hasStdExtZvkb())
1392
1393 if (Subtarget.hasStdExtZvbb()) {
1396 VT, Custom);
1397 } else {
1398 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1399 // range of f32.
1400 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1401 if (isTypeLegal(FloatVT))
1404 Custom);
1405 }
1406
1408 }
1409
1411 // There are no extending loads or truncating stores.
1412 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1413 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1414 setTruncStoreAction(VT, InnerVT, Expand);
1415 }
1416
1417 if (!useRVVForFixedLengthVectorVT(VT))
1418 continue;
1419
1420 // By default everything must be expanded.
1421 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1423
1424 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1425 // expansion to a build_vector of 0s.
1427
1432 VT, Custom);
1433 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1434 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1435
1437 VT, Custom);
1438
1439 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1440 ISD::MGATHER, ISD::MSCATTER},
1441 VT, Custom);
1442 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1443 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1444 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1445 VT, Custom);
1446 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1447
1448 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1450 Custom);
1451
1452 if (VT.getVectorElementType() == MVT::f16 &&
1453 !Subtarget.hasVInstructionsF16()) {
1454 setOperationAction(ISD::BITCAST, VT, Custom);
1455 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1457 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1458 Custom);
1459 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1460 Custom);
1461 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1462 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1463 if (Subtarget.hasStdExtZfhmin()) {
1465 } else {
1466 // We need to custom legalize f16 build vectors if Zfhmin isn't
1467 // available.
1469 }
1470 setOperationAction(ISD::FNEG, VT, Expand);
1471 setOperationAction(ISD::FABS, VT, Expand);
1473 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1474 // Don't promote f16 vector operations to f32 if f32 vector type is
1475 // not legal.
1476 // TODO: could split the f16 vector into two vectors and do promotion.
1477 if (!isTypeLegal(F32VecVT))
1478 continue;
1479 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1480 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1481 continue;
1482 }
1483
1484 if (VT.getVectorElementType() == MVT::bf16) {
1485 setOperationAction(ISD::BITCAST, VT, Custom);
1486 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1487 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1488 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1489 if (Subtarget.hasStdExtZfbfmin()) {
1491 } else {
1492 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1493 // available.
1495 }
1497 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1498 Custom);
1499 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1500 // Don't promote f16 vector operations to f32 if f32 vector type is
1501 // not legal.
1502 // TODO: could split the f16 vector into two vectors and do promotion.
1503 if (!isTypeLegal(F32VecVT))
1504 continue;
1505 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1506 // TODO: Promote VP ops to fp32.
1507 continue;
1508 }
1509
1511 Custom);
1512
1514 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1515 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1516 ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::IS_FPCLASS,
1517 ISD::FMAXIMUM, ISD::FMINIMUM},
1518 VT, Custom);
1519
1520 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1521 ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
1522 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
1523 ISD::FNEARBYINT},
1524 VT, Custom);
1525
1526 setCondCodeAction(VFPCCToExpand, VT, Expand);
1527
1530
1531 setOperationAction(ISD::BITCAST, VT, Custom);
1532
1533 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1534
1535 setOperationAction(FloatingPointVPOps, VT, Custom);
1536
1543 VT, Custom);
1544 }
1545
1546 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1547 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1548 if (Subtarget.is64Bit())
1549 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1550 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1551 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1552 if (Subtarget.hasStdExtZfbfmin())
1553 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
1554 if (Subtarget.hasStdExtFOrZfinx())
1555 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1556 if (Subtarget.hasStdExtDOrZdinx())
1557 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1558 }
1559 }
1560
1561 if (Subtarget.hasStdExtA())
1562 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1563
1564 if (Subtarget.hasForcedAtomics()) {
1565 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1567 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1568 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1569 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1570 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1571 XLenVT, LibCall);
1572 }
1573
1574 if (Subtarget.hasVendorXTHeadMemIdx()) {
1575 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1576 setIndexedLoadAction(im, MVT::i8, Legal);
1577 setIndexedStoreAction(im, MVT::i8, Legal);
1578 setIndexedLoadAction(im, MVT::i16, Legal);
1579 setIndexedStoreAction(im, MVT::i16, Legal);
1580 setIndexedLoadAction(im, MVT::i32, Legal);
1581 setIndexedStoreAction(im, MVT::i32, Legal);
1582
1583 if (Subtarget.is64Bit()) {
1584 setIndexedLoadAction(im, MVT::i64, Legal);
1585 setIndexedStoreAction(im, MVT::i64, Legal);
1586 }
1587 }
1588 }
1589
1590 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1594
1598 }
1599
1600 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1601 if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1602 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1603 ISD::PARTIAL_REDUCE_UMLA,
1604 ISD::PARTIAL_REDUCE_SUMLA};
1605 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1606 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1607 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1608 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1609 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1610
1611 if (Subtarget.useRVVForFixedLengthVectors()) {
1613 if (VT.getVectorElementType() != MVT::i32 ||
1614 !useRVVForFixedLengthVectorVT(VT))
1615 continue;
1616 ElementCount EC = VT.getVectorElementCount();
1617 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1618 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1619 }
1620 }
1621 }
1622
1623 // Customize load and store operation for bf16 if zfh isn't enabled.
1624 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1625 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1626 setOperationAction(ISD::STORE, MVT::bf16, Custom);
1627 }
1628
1629 // Function alignments.
1630 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1631 setMinFunctionAlignment(FunctionAlignment);
1632 // Set preferred alignments.
1633 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1634 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1635
1641
1642 if (Subtarget.hasStdExtFOrZfinx())
1643 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM, ISD::FMUL});
1644
1645 if (Subtarget.hasStdExtZbb())
1647
1648 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1649 Subtarget.hasVInstructions())
1651
1652 if (Subtarget.hasStdExtZbkb())
1654
1655 if (Subtarget.hasStdExtFOrZfinx())
1658 if (Subtarget.hasVInstructions())
1660 {ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1661 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1662 ISD::SRL, ISD::SHL, ISD::STORE,
1664 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1668 ISD::VSELECT, ISD::VECREDUCE_ADD});
1669
1670 if (Subtarget.hasVendorXTHeadMemPair())
1671 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1672 if (Subtarget.useRVVForFixedLengthVectors())
1673 setTargetDAGCombine(ISD::BITCAST);
1674
1675 // Disable strict node mutation.
1676 IsStrictFPEnabled = true;
1677 EnableExtLdPromotion = true;
1678
1679 // Let the subtarget decide if a predictable select is more expensive than the
1680 // corresponding branch. This information is used in CGP/SelectOpt to decide
1681 // when to convert selects into branches.
1682 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1683
1684 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1685 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1686
1687 MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1688 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1689 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1690
1692 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1693 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1694
1695 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1696 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1697}
1698
1700 LLVMContext &Context,
1701 EVT VT) const {
1702 if (!VT.isVector())
1703 return getPointerTy(DL);
1704 if (Subtarget.hasVInstructions() &&
1705 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1706 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1708}
1709
1711 return Subtarget.getXLenVT();
1712}
1713
1714// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1715bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1716 unsigned VF,
1717 bool IsScalable) const {
1718 if (!Subtarget.hasVInstructions())
1719 return true;
1720
1721 if (!IsScalable)
1722 return true;
1723
1724 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1725 return true;
1726
1727 // Don't allow VF=1 if those types are't legal.
1728 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1729 return true;
1730
1731 // VLEN=32 support is incomplete.
1732 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1733 return true;
1734
1735 // The maximum VF is for the smallest element width with LMUL=8.
1736 // VF must be a power of 2.
1737 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1738 return VF > MaxVF || !isPowerOf2_32(VF);
1739}
1740
1742 return !Subtarget.hasVInstructions() ||
1743 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1744}
1745
1747 const CallInst &I,
1748 MachineFunction &MF,
1749 unsigned Intrinsic) const {
1750 auto &DL = I.getDataLayout();
1751
1752 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1753 bool IsUnitStrided, bool UsePtrVal = false) {
1754 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1755 // We can't use ptrVal if the intrinsic can access memory before the
1756 // pointer. This means we can't use it for strided or indexed intrinsics.
1757 if (UsePtrVal)
1758 Info.ptrVal = I.getArgOperand(PtrOp);
1759 else
1760 Info.fallbackAddressSpace =
1761 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1762 Type *MemTy;
1763 if (IsStore) {
1764 // Store value is the first operand.
1765 MemTy = I.getArgOperand(0)->getType();
1766 } else {
1767 // Use return type. If it's segment load, return type is a struct.
1768 MemTy = I.getType();
1769 if (MemTy->isStructTy())
1770 MemTy = MemTy->getStructElementType(0);
1771 }
1772 if (!IsUnitStrided)
1773 MemTy = MemTy->getScalarType();
1774
1775 Info.memVT = getValueType(DL, MemTy);
1776 if (MemTy->isTargetExtTy()) {
1777 // RISC-V vector tuple type's alignment type should be its element type.
1778 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1779 MemTy = Type::getIntNTy(
1780 MemTy->getContext(),
1781 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1782 ->getZExtValue());
1783 Info.align = DL.getABITypeAlign(MemTy);
1784 } else {
1785 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
1786 }
1787 Info.size = MemoryLocation::UnknownSize;
1788 Info.flags |=
1790 return true;
1791 };
1792
1793 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1795
1797 switch (Intrinsic) {
1798 default:
1799 return false;
1800 case Intrinsic::riscv_masked_atomicrmw_xchg:
1801 case Intrinsic::riscv_masked_atomicrmw_add:
1802 case Intrinsic::riscv_masked_atomicrmw_sub:
1803 case Intrinsic::riscv_masked_atomicrmw_nand:
1804 case Intrinsic::riscv_masked_atomicrmw_max:
1805 case Intrinsic::riscv_masked_atomicrmw_min:
1806 case Intrinsic::riscv_masked_atomicrmw_umax:
1807 case Intrinsic::riscv_masked_atomicrmw_umin:
1808 case Intrinsic::riscv_masked_cmpxchg:
1809 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
1810 // narrow atomic operation. These will be expanded to an LR/SC loop that
1811 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
1812 // will be used to modify the appropriate part of the 4 byte data and
1813 // preserve the rest.
1814 Info.opc = ISD::INTRINSIC_W_CHAIN;
1815 Info.memVT = MVT::i32;
1816 Info.ptrVal = I.getArgOperand(0);
1817 Info.offset = 0;
1818 Info.align = Align(4);
1821 return true;
1822 case Intrinsic::riscv_seg2_load_mask:
1823 case Intrinsic::riscv_seg3_load_mask:
1824 case Intrinsic::riscv_seg4_load_mask:
1825 case Intrinsic::riscv_seg5_load_mask:
1826 case Intrinsic::riscv_seg6_load_mask:
1827 case Intrinsic::riscv_seg7_load_mask:
1828 case Intrinsic::riscv_seg8_load_mask:
1829 case Intrinsic::riscv_sseg2_load_mask:
1830 case Intrinsic::riscv_sseg3_load_mask:
1831 case Intrinsic::riscv_sseg4_load_mask:
1832 case Intrinsic::riscv_sseg5_load_mask:
1833 case Intrinsic::riscv_sseg6_load_mask:
1834 case Intrinsic::riscv_sseg7_load_mask:
1835 case Intrinsic::riscv_sseg8_load_mask:
1836 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1837 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1838 case Intrinsic::riscv_seg2_store_mask:
1839 case Intrinsic::riscv_seg3_store_mask:
1840 case Intrinsic::riscv_seg4_store_mask:
1841 case Intrinsic::riscv_seg5_store_mask:
1842 case Intrinsic::riscv_seg6_store_mask:
1843 case Intrinsic::riscv_seg7_store_mask:
1844 case Intrinsic::riscv_seg8_store_mask:
1845 // Operands are (vec, ..., vec, ptr, mask, vl)
1846 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1847 /*IsStore*/ true,
1848 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1849 case Intrinsic::riscv_sseg2_store_mask:
1850 case Intrinsic::riscv_sseg3_store_mask:
1851 case Intrinsic::riscv_sseg4_store_mask:
1852 case Intrinsic::riscv_sseg5_store_mask:
1853 case Intrinsic::riscv_sseg6_store_mask:
1854 case Intrinsic::riscv_sseg7_store_mask:
1855 case Intrinsic::riscv_sseg8_store_mask:
1856 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
1857 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1858 /*IsStore*/ true,
1859 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1860 case Intrinsic::riscv_vlm:
1861 return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
1862 /*IsStore*/ false,
1863 /*IsUnitStrided*/ true,
1864 /*UsePtrVal*/ true);
1865 case Intrinsic::riscv_vle:
1866 case Intrinsic::riscv_vle_mask:
1867 case Intrinsic::riscv_vleff:
1868 case Intrinsic::riscv_vleff_mask:
1869 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1870 /*IsStore*/ false,
1871 /*IsUnitStrided*/ true,
1872 /*UsePtrVal*/ true);
1873 case Intrinsic::riscv_vsm:
1874 case Intrinsic::riscv_vse:
1875 case Intrinsic::riscv_vse_mask:
1876 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1877 /*IsStore*/ true,
1878 /*IsUnitStrided*/ true,
1879 /*UsePtrVal*/ true);
1880 case Intrinsic::riscv_vlse:
1881 case Intrinsic::riscv_vlse_mask:
1882 case Intrinsic::riscv_vloxei:
1883 case Intrinsic::riscv_vloxei_mask:
1884 case Intrinsic::riscv_vluxei:
1885 case Intrinsic::riscv_vluxei_mask:
1886 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1887 /*IsStore*/ false,
1888 /*IsUnitStrided*/ false);
1889 case Intrinsic::riscv_vsse:
1890 case Intrinsic::riscv_vsse_mask:
1891 case Intrinsic::riscv_vsoxei:
1892 case Intrinsic::riscv_vsoxei_mask:
1893 case Intrinsic::riscv_vsuxei:
1894 case Intrinsic::riscv_vsuxei_mask:
1895 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1896 /*IsStore*/ true,
1897 /*IsUnitStrided*/ false);
1898 case Intrinsic::riscv_vlseg2:
1899 case Intrinsic::riscv_vlseg3:
1900 case Intrinsic::riscv_vlseg4:
1901 case Intrinsic::riscv_vlseg5:
1902 case Intrinsic::riscv_vlseg6:
1903 case Intrinsic::riscv_vlseg7:
1904 case Intrinsic::riscv_vlseg8:
1905 case Intrinsic::riscv_vlseg2ff:
1906 case Intrinsic::riscv_vlseg3ff:
1907 case Intrinsic::riscv_vlseg4ff:
1908 case Intrinsic::riscv_vlseg5ff:
1909 case Intrinsic::riscv_vlseg6ff:
1910 case Intrinsic::riscv_vlseg7ff:
1911 case Intrinsic::riscv_vlseg8ff:
1912 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1913 /*IsStore*/ false,
1914 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1915 case Intrinsic::riscv_vlseg2_mask:
1916 case Intrinsic::riscv_vlseg3_mask:
1917 case Intrinsic::riscv_vlseg4_mask:
1918 case Intrinsic::riscv_vlseg5_mask:
1919 case Intrinsic::riscv_vlseg6_mask:
1920 case Intrinsic::riscv_vlseg7_mask:
1921 case Intrinsic::riscv_vlseg8_mask:
1922 case Intrinsic::riscv_vlseg2ff_mask:
1923 case Intrinsic::riscv_vlseg3ff_mask:
1924 case Intrinsic::riscv_vlseg4ff_mask:
1925 case Intrinsic::riscv_vlseg5ff_mask:
1926 case Intrinsic::riscv_vlseg6ff_mask:
1927 case Intrinsic::riscv_vlseg7ff_mask:
1928 case Intrinsic::riscv_vlseg8ff_mask:
1929 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1930 /*IsStore*/ false,
1931 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1932 case Intrinsic::riscv_vlsseg2:
1933 case Intrinsic::riscv_vlsseg3:
1934 case Intrinsic::riscv_vlsseg4:
1935 case Intrinsic::riscv_vlsseg5:
1936 case Intrinsic::riscv_vlsseg6:
1937 case Intrinsic::riscv_vlsseg7:
1938 case Intrinsic::riscv_vlsseg8:
1939 case Intrinsic::riscv_vloxseg2:
1940 case Intrinsic::riscv_vloxseg3:
1941 case Intrinsic::riscv_vloxseg4:
1942 case Intrinsic::riscv_vloxseg5:
1943 case Intrinsic::riscv_vloxseg6:
1944 case Intrinsic::riscv_vloxseg7:
1945 case Intrinsic::riscv_vloxseg8:
1946 case Intrinsic::riscv_vluxseg2:
1947 case Intrinsic::riscv_vluxseg3:
1948 case Intrinsic::riscv_vluxseg4:
1949 case Intrinsic::riscv_vluxseg5:
1950 case Intrinsic::riscv_vluxseg6:
1951 case Intrinsic::riscv_vluxseg7:
1952 case Intrinsic::riscv_vluxseg8:
1953 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1954 /*IsStore*/ false,
1955 /*IsUnitStrided*/ false);
1956 case Intrinsic::riscv_vlsseg2_mask:
1957 case Intrinsic::riscv_vlsseg3_mask:
1958 case Intrinsic::riscv_vlsseg4_mask:
1959 case Intrinsic::riscv_vlsseg5_mask:
1960 case Intrinsic::riscv_vlsseg6_mask:
1961 case Intrinsic::riscv_vlsseg7_mask:
1962 case Intrinsic::riscv_vlsseg8_mask:
1963 case Intrinsic::riscv_vloxseg2_mask:
1964 case Intrinsic::riscv_vloxseg3_mask:
1965 case Intrinsic::riscv_vloxseg4_mask:
1966 case Intrinsic::riscv_vloxseg5_mask:
1967 case Intrinsic::riscv_vloxseg6_mask:
1968 case Intrinsic::riscv_vloxseg7_mask:
1969 case Intrinsic::riscv_vloxseg8_mask:
1970 case Intrinsic::riscv_vluxseg2_mask:
1971 case Intrinsic::riscv_vluxseg3_mask:
1972 case Intrinsic::riscv_vluxseg4_mask:
1973 case Intrinsic::riscv_vluxseg5_mask:
1974 case Intrinsic::riscv_vluxseg6_mask:
1975 case Intrinsic::riscv_vluxseg7_mask:
1976 case Intrinsic::riscv_vluxseg8_mask:
1977 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1978 /*IsStore*/ false,
1979 /*IsUnitStrided*/ false);
1980 case Intrinsic::riscv_vsseg2:
1981 case Intrinsic::riscv_vsseg3:
1982 case Intrinsic::riscv_vsseg4:
1983 case Intrinsic::riscv_vsseg5:
1984 case Intrinsic::riscv_vsseg6:
1985 case Intrinsic::riscv_vsseg7:
1986 case Intrinsic::riscv_vsseg8:
1987 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1988 /*IsStore*/ true,
1989 /*IsUnitStrided*/ false);
1990 case Intrinsic::riscv_vsseg2_mask:
1991 case Intrinsic::riscv_vsseg3_mask:
1992 case Intrinsic::riscv_vsseg4_mask:
1993 case Intrinsic::riscv_vsseg5_mask:
1994 case Intrinsic::riscv_vsseg6_mask:
1995 case Intrinsic::riscv_vsseg7_mask:
1996 case Intrinsic::riscv_vsseg8_mask:
1997 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1998 /*IsStore*/ true,
1999 /*IsUnitStrided*/ false);
2000 case Intrinsic::riscv_vssseg2:
2001 case Intrinsic::riscv_vssseg3:
2002 case Intrinsic::riscv_vssseg4:
2003 case Intrinsic::riscv_vssseg5:
2004 case Intrinsic::riscv_vssseg6:
2005 case Intrinsic::riscv_vssseg7:
2006 case Intrinsic::riscv_vssseg8:
2007 case Intrinsic::riscv_vsoxseg2:
2008 case Intrinsic::riscv_vsoxseg3:
2009 case Intrinsic::riscv_vsoxseg4:
2010 case Intrinsic::riscv_vsoxseg5:
2011 case Intrinsic::riscv_vsoxseg6:
2012 case Intrinsic::riscv_vsoxseg7:
2013 case Intrinsic::riscv_vsoxseg8:
2014 case Intrinsic::riscv_vsuxseg2:
2015 case Intrinsic::riscv_vsuxseg3:
2016 case Intrinsic::riscv_vsuxseg4:
2017 case Intrinsic::riscv_vsuxseg5:
2018 case Intrinsic::riscv_vsuxseg6:
2019 case Intrinsic::riscv_vsuxseg7:
2020 case Intrinsic::riscv_vsuxseg8:
2021 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2022 /*IsStore*/ true,
2023 /*IsUnitStrided*/ false);
2024 case Intrinsic::riscv_vssseg2_mask:
2025 case Intrinsic::riscv_vssseg3_mask:
2026 case Intrinsic::riscv_vssseg4_mask:
2027 case Intrinsic::riscv_vssseg5_mask:
2028 case Intrinsic::riscv_vssseg6_mask:
2029 case Intrinsic::riscv_vssseg7_mask:
2030 case Intrinsic::riscv_vssseg8_mask:
2031 case Intrinsic::riscv_vsoxseg2_mask:
2032 case Intrinsic::riscv_vsoxseg3_mask:
2033 case Intrinsic::riscv_vsoxseg4_mask:
2034 case Intrinsic::riscv_vsoxseg5_mask:
2035 case Intrinsic::riscv_vsoxseg6_mask:
2036 case Intrinsic::riscv_vsoxseg7_mask:
2037 case Intrinsic::riscv_vsoxseg8_mask:
2038 case Intrinsic::riscv_vsuxseg2_mask:
2039 case Intrinsic::riscv_vsuxseg3_mask:
2040 case Intrinsic::riscv_vsuxseg4_mask:
2041 case Intrinsic::riscv_vsuxseg5_mask:
2042 case Intrinsic::riscv_vsuxseg6_mask:
2043 case Intrinsic::riscv_vsuxseg7_mask:
2044 case Intrinsic::riscv_vsuxseg8_mask:
2045 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2046 /*IsStore*/ true,
2047 /*IsUnitStrided*/ false);
2048 }
2049}
2050
2052 const AddrMode &AM, Type *Ty,
2053 unsigned AS,
2054 Instruction *I) const {
2055 // No global is ever allowed as a base.
2056 if (AM.BaseGV)
2057 return false;
2058
2059 // None of our addressing modes allows a scalable offset
2060 if (AM.ScalableOffset)
2061 return false;
2062
2063 // RVV instructions only support register addressing.
2064 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2065 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2066
2067 // Require a 12-bit signed offset.
2068 if (!isInt<12>(AM.BaseOffs))
2069 return false;
2070
2071 switch (AM.Scale) {
2072 case 0: // "r+i" or just "i", depending on HasBaseReg.
2073 break;
2074 case 1:
2075 if (!AM.HasBaseReg) // allow "r+i".
2076 break;
2077 return false; // disallow "r+r" or "r+r+i".
2078 default:
2079 return false;
2080 }
2081
2082 return true;
2083}
2084
2086 return isInt<12>(Imm);
2087}
2088
2090 return isInt<12>(Imm);
2091}
2092
2093// On RV32, 64-bit integers are split into their high and low parts and held
2094// in two different registers, so the trunc is free since the low register can
2095// just be used.
2096// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2097// isTruncateFree?
2099 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2100 return false;
2101 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2102 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2103 return (SrcBits == 64 && DestBits == 32);
2104}
2105
2107 // We consider i64->i32 free on RV64 since we have good selection of W
2108 // instructions that make promoting operations back to i64 free in many cases.
2109 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2110 !DstVT.isInteger())
2111 return false;
2112 unsigned SrcBits = SrcVT.getSizeInBits();
2113 unsigned DestBits = DstVT.getSizeInBits();
2114 return (SrcBits == 64 && DestBits == 32);
2115}
2116
2118 EVT SrcVT = Val.getValueType();
2119 // free truncate from vnsrl and vnsra
2120 if (Subtarget.hasVInstructions() &&
2121 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2122 SrcVT.isVector() && VT2.isVector()) {
2123 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2124 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2125 if (SrcBits == DestBits * 2) {
2126 return true;
2127 }
2128 }
2129 return TargetLowering::isTruncateFree(Val, VT2);
2130}
2131
2133 // Zexts are free if they can be combined with a load.
2134 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2135 // poorly with type legalization of compares preferring sext.
2136 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2137 EVT MemVT = LD->getMemoryVT();
2138 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2139 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2140 LD->getExtensionType() == ISD::ZEXTLOAD))
2141 return true;
2142 }
2143
2144 return TargetLowering::isZExtFree(Val, VT2);
2145}
2146
2148 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2149}
2150
2152 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2153}
2154
2156 return Subtarget.hasCTZLike();
2157}
2158
2160 return Subtarget.hasCLZLike();
2161}
2162
2164 const Instruction &AndI) const {
2165 // We expect to be able to match a bit extraction instruction if the Zbs
2166 // extension is supported and the mask is a power of two. However, we
2167 // conservatively return false if the mask would fit in an ANDI instruction,
2168 // on the basis that it's possible the sinking+duplication of the AND in
2169 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2170 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2171 if (!Subtarget.hasBEXTILike())
2172 return false;
2174 if (!Mask)
2175 return false;
2176 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2177}
2178
2180 EVT VT = Y.getValueType();
2181
2182 if (VT.isVector())
2183 return false;
2184
2185 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2186 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2187}
2188
2190 EVT VT = Y.getValueType();
2191
2192 if (!VT.isVector())
2193 return hasAndNotCompare(Y);
2194
2195 return Subtarget.hasStdExtZvkb();
2196}
2197
2199 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2200 if (Subtarget.hasStdExtZbs())
2201 return X.getValueType().isScalarInteger();
2202 auto *C = dyn_cast<ConstantSDNode>(Y);
2203 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2204 if (Subtarget.hasVendorXTHeadBs())
2205 return C != nullptr;
2206 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2207 return C && C->getAPIntValue().ule(10);
2208}
2209
2211 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2212 SDValue Y) const {
2213 if (SelectOpcode != ISD::VSELECT)
2214 return false;
2215
2216 // Only enable for rvv.
2217 if (!VT.isVector() || !Subtarget.hasVInstructions())
2218 return false;
2219
2220 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2221 return false;
2222
2223 return true;
2224}
2225
2227 Type *Ty) const {
2228 assert(Ty->isIntegerTy());
2229
2230 unsigned BitSize = Ty->getIntegerBitWidth();
2231 if (BitSize > Subtarget.getXLen())
2232 return false;
2233
2234 // Fast path, assume 32-bit immediates are cheap.
2235 int64_t Val = Imm.getSExtValue();
2236 if (isInt<32>(Val))
2237 return true;
2238
2239 // A constant pool entry may be more aligned than the load we're trying to
2240 // replace. If we don't support unaligned scalar mem, prefer the constant
2241 // pool.
2242 // TODO: Can the caller pass down the alignment?
2243 if (!Subtarget.enableUnalignedScalarMem())
2244 return true;
2245
2246 // Prefer to keep the load if it would require many instructions.
2247 // This uses the same threshold we use for constant pools but doesn't
2248 // check useConstantPoolForLargeInts.
2249 // TODO: Should we keep the load only when we're definitely going to emit a
2250 // constant pool?
2251
2253 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2254}
2255
2259 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2260 SelectionDAG &DAG) const {
2261 // One interesting pattern that we'd want to form is 'bit extract':
2262 // ((1 >> Y) & 1) ==/!= 0
2263 // But we also need to be careful not to try to reverse that fold.
2264
2265 // Is this '((1 >> Y) & 1)'?
2266 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2267 return false; // Keep the 'bit extract' pattern.
2268
2269 // Will this be '((1 >> Y) & 1)' after the transform?
2270 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2271 return true; // Do form the 'bit extract' pattern.
2272
2273 // If 'X' is a constant, and we transform, then we will immediately
2274 // try to undo the fold, thus causing endless combine loop.
2275 // So only do the transform if X is not a constant. This matches the default
2276 // implementation of this function.
2277 return !XC;
2278}
2279
2281 unsigned Opc = VecOp.getOpcode();
2282
2283 // Assume target opcodes can't be scalarized.
2284 // TODO - do we have any exceptions?
2285 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2286 return false;
2287
2288 // If the vector op is not supported, try to convert to scalar.
2289 EVT VecVT = VecOp.getValueType();
2291 return true;
2292
2293 // If the vector op is supported, but the scalar op is not, the transform may
2294 // not be worthwhile.
2295 // Permit a vector binary operation can be converted to scalar binary
2296 // operation which is custom lowered with illegal type.
2297 EVT ScalarVT = VecVT.getScalarType();
2298 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2299 isOperationCustom(Opc, ScalarVT);
2300}
2301
2303 const GlobalAddressSDNode *GA) const {
2304 // In order to maximise the opportunity for common subexpression elimination,
2305 // keep a separate ADD node for the global address offset instead of folding
2306 // it in the global address node. Later peephole optimisations may choose to
2307 // fold it back in when profitable.
2308 return false;
2309}
2310
2311// Returns 0-31 if the fli instruction is available for the type and this is
2312// legal FP immediate for the type. Returns -1 otherwise.
2314 if (!Subtarget.hasStdExtZfa())
2315 return -1;
2316
2317 bool IsSupportedVT = false;
2318 if (VT == MVT::f16) {
2319 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2320 } else if (VT == MVT::f32) {
2321 IsSupportedVT = true;
2322 } else if (VT == MVT::f64) {
2323 assert(Subtarget.hasStdExtD() && "Expect D extension");
2324 IsSupportedVT = true;
2325 }
2326
2327 if (!IsSupportedVT)
2328 return -1;
2329
2330 return RISCVLoadFPImm::getLoadFPImm(Imm);
2331}
2332
2334 bool ForCodeSize) const {
2335 bool IsLegalVT = false;
2336 if (VT == MVT::f16)
2337 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2338 else if (VT == MVT::f32)
2339 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2340 else if (VT == MVT::f64)
2341 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2342 else if (VT == MVT::bf16)
2343 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2344
2345 if (!IsLegalVT)
2346 return false;
2347
2348 if (getLegalZfaFPImm(Imm, VT) >= 0)
2349 return true;
2350
2351 // Some constants can be produced by fli+fneg.
2352 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2353 return true;
2354
2355 // Cannot create a 64 bit floating-point immediate value for rv32.
2356 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2357 // td can handle +0.0 or -0.0 already.
2358 // -0.0 can be created by fmv + fneg.
2359 return Imm.isZero();
2360 }
2361
2362 // Special case: fmv + fneg
2363 if (Imm.isNegZero())
2364 return true;
2365
2366 // Building an integer and then converting requires a fmv at the end of
2367 // the integer sequence. The fmv is not required for Zfinx.
2368 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2369 const int Cost =
2370 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2371 Subtarget.getXLen(), Subtarget);
2372 return Cost <= FPImmCost;
2373}
2374
2375// TODO: This is very conservative.
2377 unsigned Index) const {
2379 return false;
2380
2381 // Extracts from index 0 are just subreg extracts.
2382 if (Index == 0)
2383 return true;
2384
2385 // Only support extracting a fixed from a fixed vector for now.
2386 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2387 return false;
2388
2389 EVT EltVT = ResVT.getVectorElementType();
2390 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2391
2392 // The smallest type we can slide is i8.
2393 // TODO: We can extract index 0 from a mask vector without a slide.
2394 if (EltVT == MVT::i1)
2395 return false;
2396
2397 unsigned ResElts = ResVT.getVectorNumElements();
2398 unsigned SrcElts = SrcVT.getVectorNumElements();
2399
2400 unsigned MinVLen = Subtarget.getRealMinVLen();
2401 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2402
2403 // If we're extracting only data from the first VLEN bits of the source
2404 // then we can always do this with an m1 vslidedown.vx. Restricting the
2405 // Index ensures we can use a vslidedown.vi.
2406 // TODO: We can generalize this when the exact VLEN is known.
2407 if (Index + ResElts <= MinVLMAX && Index < 31)
2408 return true;
2409
2410 // Convervatively only handle extracting half of a vector.
2411 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2412 // the upper half of a vector until we have more test coverage.
2413 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2414 // a cheap extract. However, this case is important in practice for
2415 // shuffled extracts of longer vectors. How resolve?
2416 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2417}
2418
2420 CallingConv::ID CC,
2421 EVT VT) const {
2422 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2423 // We might still end up using a GPR but that will be decided based on ABI.
2424 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2425 !Subtarget.hasStdExtZfhminOrZhinxmin())
2426 return MVT::f32;
2427
2428 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2429
2430 return PartVT;
2431}
2432
2433unsigned
2435 std::optional<MVT> RegisterVT) const {
2436 // Pair inline assembly operand
2437 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2438 *RegisterVT == MVT::Untyped)
2439 return 1;
2440
2441 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2442}
2443
2445 CallingConv::ID CC,
2446 EVT VT) const {
2447 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2448 // We might still end up using a GPR but that will be decided based on ABI.
2449 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2450 !Subtarget.hasStdExtZfhminOrZhinxmin())
2451 return 1;
2452
2453 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2454}
2455
2457 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2458 unsigned &NumIntermediates, MVT &RegisterVT) const {
2460 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2461
2462 return NumRegs;
2463}
2464
2465// Changes the condition code and swaps operands if necessary, so the SetCC
2466// operation matches one of the comparisons supported directly by branches
2467// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2468// with 1/-1.
2470 ISD::CondCode &CC, SelectionDAG &DAG,
2471 const RISCVSubtarget &Subtarget) {
2472 // If this is a single bit test that can't be handled by ANDI, shift the
2473 // bit to be tested to the MSB and perform a signed compare with 0.
2474 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2475 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2476 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2477 // XAndesPerf supports branch on test bit.
2478 !Subtarget.hasVendorXAndesPerf()) {
2479 uint64_t Mask = LHS.getConstantOperandVal(1);
2480 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2481 unsigned ShAmt = 0;
2482 if (isPowerOf2_64(Mask)) {
2483 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2484 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2485 } else {
2486 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2487 }
2488
2489 LHS = LHS.getOperand(0);
2490 if (ShAmt != 0)
2491 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2492 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2493 return;
2494 }
2495 }
2496
2497 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2498 int64_t C = RHSC->getSExtValue();
2499 switch (CC) {
2500 default: break;
2501 case ISD::SETGT:
2502 // Convert X > -1 to X >= 0.
2503 if (C == -1) {
2504 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2505 CC = ISD::SETGE;
2506 return;
2507 }
2508 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2509 C != INT64_MAX && isInt<5>(C + 1)) {
2510 // We have a conditional move instruction for SETGE but not SETGT.
2511 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2512 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2513 CC = ISD::SETGE;
2514 return;
2515 }
2516 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2517 // We have a branch immediate instruction for SETGE but not SETGT.
2518 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2519 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2520 CC = ISD::SETGE;
2521 return;
2522 }
2523 break;
2524 case ISD::SETLT:
2525 // Convert X < 1 to 0 >= X.
2526 if (C == 1) {
2527 RHS = LHS;
2528 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2529 CC = ISD::SETGE;
2530 return;
2531 }
2532 break;
2533 case ISD::SETUGT:
2534 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2535 C != INT64_MAX && isUInt<5>(C + 1)) {
2536 // We have a conditional move instruction for SETUGE but not SETUGT.
2537 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2538 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2539 CC = ISD::SETUGE;
2540 return;
2541 }
2542 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2543 // We have a branch immediate instruction for SETUGE but not SETUGT.
2544 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2545 // immediate.
2546 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2547 CC = ISD::SETUGE;
2548 return;
2549 }
2550 break;
2551 }
2552 }
2553
2554 switch (CC) {
2555 default:
2556 break;
2557 case ISD::SETGT:
2558 case ISD::SETLE:
2559 case ISD::SETUGT:
2560 case ISD::SETULE:
2562 std::swap(LHS, RHS);
2563 break;
2564 }
2565}
2566
2568 if (VT.isRISCVVectorTuple()) {
2569 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2570 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2571 return RISCVVType::LMUL_F8;
2572 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2573 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2574 return RISCVVType::LMUL_F4;
2575 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2576 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2577 return RISCVVType::LMUL_F2;
2578 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2579 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2580 return RISCVVType::LMUL_1;
2581 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2582 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2583 return RISCVVType::LMUL_2;
2584 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2585 return RISCVVType::LMUL_4;
2586 llvm_unreachable("Invalid vector tuple type LMUL.");
2587 }
2588
2589 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2590 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2591 if (VT.getVectorElementType() == MVT::i1)
2592 KnownSize *= 8;
2593
2594 switch (KnownSize) {
2595 default:
2596 llvm_unreachable("Invalid LMUL.");
2597 case 8:
2598 return RISCVVType::LMUL_F8;
2599 case 16:
2600 return RISCVVType::LMUL_F4;
2601 case 32:
2602 return RISCVVType::LMUL_F2;
2603 case 64:
2604 return RISCVVType::LMUL_1;
2605 case 128:
2606 return RISCVVType::LMUL_2;
2607 case 256:
2608 return RISCVVType::LMUL_4;
2609 case 512:
2610 return RISCVVType::LMUL_8;
2611 }
2612}
2613
2615 switch (LMul) {
2616 default:
2617 llvm_unreachable("Invalid LMUL.");
2621 case RISCVVType::LMUL_1:
2622 return RISCV::VRRegClassID;
2623 case RISCVVType::LMUL_2:
2624 return RISCV::VRM2RegClassID;
2625 case RISCVVType::LMUL_4:
2626 return RISCV::VRM4RegClassID;
2627 case RISCVVType::LMUL_8:
2628 return RISCV::VRM8RegClassID;
2629 }
2630}
2631
2632unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2633 RISCVVType::VLMUL LMUL = getLMUL(VT);
2634 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2635 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2636 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2637 "Unexpected subreg numbering");
2638 return RISCV::sub_vrm1_0 + Index;
2639 }
2640 if (LMUL == RISCVVType::LMUL_2) {
2641 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2642 "Unexpected subreg numbering");
2643 return RISCV::sub_vrm2_0 + Index;
2644 }
2645 if (LMUL == RISCVVType::LMUL_4) {
2646 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2647 "Unexpected subreg numbering");
2648 return RISCV::sub_vrm4_0 + Index;
2649 }
2650 llvm_unreachable("Invalid vector type.");
2651}
2652
2654 if (VT.isRISCVVectorTuple()) {
2655 unsigned NF = VT.getRISCVVectorTupleNumFields();
2656 unsigned RegsPerField =
2657 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2658 (NF * RISCV::RVVBitsPerBlock));
2659 switch (RegsPerField) {
2660 case 1:
2661 if (NF == 2)
2662 return RISCV::VRN2M1RegClassID;
2663 if (NF == 3)
2664 return RISCV::VRN3M1RegClassID;
2665 if (NF == 4)
2666 return RISCV::VRN4M1RegClassID;
2667 if (NF == 5)
2668 return RISCV::VRN5M1RegClassID;
2669 if (NF == 6)
2670 return RISCV::VRN6M1RegClassID;
2671 if (NF == 7)
2672 return RISCV::VRN7M1RegClassID;
2673 if (NF == 8)
2674 return RISCV::VRN8M1RegClassID;
2675 break;
2676 case 2:
2677 if (NF == 2)
2678 return RISCV::VRN2M2RegClassID;
2679 if (NF == 3)
2680 return RISCV::VRN3M2RegClassID;
2681 if (NF == 4)
2682 return RISCV::VRN4M2RegClassID;
2683 break;
2684 case 4:
2685 assert(NF == 2);
2686 return RISCV::VRN2M4RegClassID;
2687 default:
2688 break;
2689 }
2690 llvm_unreachable("Invalid vector tuple type RegClass.");
2691 }
2692
2693 if (VT.getVectorElementType() == MVT::i1)
2694 return RISCV::VRRegClassID;
2695 return getRegClassIDForLMUL(getLMUL(VT));
2696}
2697
2698// Attempt to decompose a subvector insert/extract between VecVT and
2699// SubVecVT via subregister indices. Returns the subregister index that
2700// can perform the subvector insert/extract with the given element index, as
2701// well as the index corresponding to any leftover subvectors that must be
2702// further inserted/extracted within the register class for SubVecVT.
2703std::pair<unsigned, unsigned>
2705 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2706 const RISCVRegisterInfo *TRI) {
2707 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2708 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2709 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2710 "Register classes not ordered");
2711 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2712 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2713
2714 // If VecVT is a vector tuple type, either it's the tuple type with same
2715 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2716 if (VecVT.isRISCVVectorTuple()) {
2717 if (VecRegClassID == SubRegClassID)
2718 return {RISCV::NoSubRegister, 0};
2719
2720 assert(SubVecVT.isScalableVector() &&
2721 "Only allow scalable vector subvector.");
2722 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2723 "Invalid vector tuple insert/extract for vector and subvector with "
2724 "different LMUL.");
2725 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2726 }
2727
2728 // Try to compose a subregister index that takes us from the incoming
2729 // LMUL>1 register class down to the outgoing one. At each step we half
2730 // the LMUL:
2731 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2732 // Note that this is not guaranteed to find a subregister index, such as
2733 // when we are extracting from one VR type to another.
2734 unsigned SubRegIdx = RISCV::NoSubRegister;
2735 for (const unsigned RCID :
2736 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2737 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2738 VecVT = VecVT.getHalfNumVectorElementsVT();
2739 bool IsHi =
2740 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2741 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2742 getSubregIndexByMVT(VecVT, IsHi));
2743 if (IsHi)
2744 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2745 }
2746 return {SubRegIdx, InsertExtractIdx};
2747}
2748
2749// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2750// stores for those types.
2751bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2752 return !Subtarget.useRVVForFixedLengthVectors() ||
2753 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2754}
2755
2757 if (!ScalarTy.isSimple())
2758 return false;
2759 switch (ScalarTy.getSimpleVT().SimpleTy) {
2760 case MVT::iPTR:
2761 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2762 case MVT::i8:
2763 case MVT::i16:
2764 case MVT::i32:
2765 return Subtarget.hasVInstructions();
2766 case MVT::i64:
2767 return Subtarget.hasVInstructionsI64();
2768 case MVT::f16:
2769 return Subtarget.hasVInstructionsF16Minimal();
2770 case MVT::bf16:
2771 return Subtarget.hasVInstructionsBF16Minimal();
2772 case MVT::f32:
2773 return Subtarget.hasVInstructionsF32();
2774 case MVT::f64:
2775 return Subtarget.hasVInstructionsF64();
2776 default:
2777 return false;
2778 }
2779}
2780
2781
2783 return NumRepeatedDivisors;
2784}
2785
2787 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2788 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2789 "Unexpected opcode");
2790 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2791 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2793 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2794 if (!II)
2795 return SDValue();
2796 return Op.getOperand(II->VLOperand + 1 + HasChain);
2797}
2798
2800 const RISCVSubtarget &Subtarget) {
2801 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2802 if (!Subtarget.useRVVForFixedLengthVectors())
2803 return false;
2804
2805 // We only support a set of vector types with a consistent maximum fixed size
2806 // across all supported vector element types to avoid legalization issues.
2807 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2808 // fixed-length vector type we support is 1024 bytes.
2809 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
2810 return false;
2811
2812 unsigned MinVLen = Subtarget.getRealMinVLen();
2813
2814 MVT EltVT = VT.getVectorElementType();
2815
2816 // Don't use RVV for vectors we cannot scalarize if required.
2817 switch (EltVT.SimpleTy) {
2818 // i1 is supported but has different rules.
2819 default:
2820 return false;
2821 case MVT::i1:
2822 // Masks can only use a single register.
2823 if (VT.getVectorNumElements() > MinVLen)
2824 return false;
2825 MinVLen /= 8;
2826 break;
2827 case MVT::i8:
2828 case MVT::i16:
2829 case MVT::i32:
2830 break;
2831 case MVT::i64:
2832 if (!Subtarget.hasVInstructionsI64())
2833 return false;
2834 break;
2835 case MVT::f16:
2836 if (!Subtarget.hasVInstructionsF16Minimal())
2837 return false;
2838 break;
2839 case MVT::bf16:
2840 if (!Subtarget.hasVInstructionsBF16Minimal())
2841 return false;
2842 break;
2843 case MVT::f32:
2844 if (!Subtarget.hasVInstructionsF32())
2845 return false;
2846 break;
2847 case MVT::f64:
2848 if (!Subtarget.hasVInstructionsF64())
2849 return false;
2850 break;
2851 }
2852
2853 // Reject elements larger than ELEN.
2854 if (EltVT.getSizeInBits() > Subtarget.getELen())
2855 return false;
2856
2857 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2858 // Don't use RVV for types that don't fit.
2859 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2860 return false;
2861
2862 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2863 // the base fixed length RVV support in place.
2864 if (!VT.isPow2VectorType())
2865 return false;
2866
2867 return true;
2868}
2869
2870bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2871 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2872}
2873
2874// Return the largest legal scalable vector type that matches VT's element type.
2876 const RISCVSubtarget &Subtarget) {
2877 // This may be called before legal types are setup.
2878 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2879 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2880 "Expected legal fixed length vector!");
2881
2882 unsigned MinVLen = Subtarget.getRealMinVLen();
2883 unsigned MaxELen = Subtarget.getELen();
2884
2885 MVT EltVT = VT.getVectorElementType();
2886 switch (EltVT.SimpleTy) {
2887 default:
2888 llvm_unreachable("unexpected element type for RVV container");
2889 case MVT::i1:
2890 case MVT::i8:
2891 case MVT::i16:
2892 case MVT::i32:
2893 case MVT::i64:
2894 case MVT::bf16:
2895 case MVT::f16:
2896 case MVT::f32:
2897 case MVT::f64: {
2898 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2899 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2900 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2901 unsigned NumElts =
2903 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2904 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2905 return MVT::getScalableVectorVT(EltVT, NumElts);
2906 }
2907 }
2908}
2909
2911 const RISCVSubtarget &Subtarget) {
2913 Subtarget);
2914}
2915
2917 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2918}
2919
2920// Grow V to consume an entire RVV register.
2922 const RISCVSubtarget &Subtarget) {
2923 assert(VT.isScalableVector() &&
2924 "Expected to convert into a scalable vector!");
2925 assert(V.getValueType().isFixedLengthVector() &&
2926 "Expected a fixed length vector operand!");
2927 SDLoc DL(V);
2928 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
2929}
2930
2931// Shrink V so it's just big enough to maintain a VT's worth of data.
2933 const RISCVSubtarget &Subtarget) {
2935 "Expected to convert into a fixed length vector!");
2936 assert(V.getValueType().isScalableVector() &&
2937 "Expected a scalable vector operand!");
2938 SDLoc DL(V);
2939 return DAG.getExtractSubvector(DL, VT, V, 0);
2940}
2941
2942/// Return the type of the mask type suitable for masking the provided
2943/// vector type. This is simply an i1 element type vector of the same
2944/// (possibly scalable) length.
2945static MVT getMaskTypeFor(MVT VecVT) {
2946 assert(VecVT.isVector());
2948 return MVT::getVectorVT(MVT::i1, EC);
2949}
2950
2951/// Creates an all ones mask suitable for masking a vector of type VecTy with
2952/// vector length VL. .
2953static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2954 SelectionDAG &DAG) {
2955 MVT MaskVT = getMaskTypeFor(VecVT);
2956 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2957}
2958
2959static std::pair<SDValue, SDValue>
2961 const RISCVSubtarget &Subtarget) {
2962 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2963 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2964 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2965 return {Mask, VL};
2966}
2967
2968static std::pair<SDValue, SDValue>
2969getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2970 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2971 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2972 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2973 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2974 return {Mask, VL};
2975}
2976
2977// Gets the two common "VL" operands: an all-ones mask and the vector length.
2978// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2979// the vector type that the fixed-length vector is contained in. Otherwise if
2980// VecVT is scalable, then ContainerVT should be the same as VecVT.
2981static std::pair<SDValue, SDValue>
2982getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2983 const RISCVSubtarget &Subtarget) {
2984 if (VecVT.isFixedLengthVector())
2985 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2986 Subtarget);
2987 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2988 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2989}
2990
2992 SelectionDAG &DAG) const {
2993 assert(VecVT.isScalableVector() && "Expected scalable vector");
2994 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2995 VecVT.getVectorElementCount());
2996}
2997
2998std::pair<unsigned, unsigned>
3000 const RISCVSubtarget &Subtarget) {
3001 assert(VecVT.isScalableVector() && "Expected scalable vector");
3002
3003 unsigned EltSize = VecVT.getScalarSizeInBits();
3004 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
3005
3006 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
3007 unsigned MaxVLMAX =
3008 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
3009
3010 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
3011 unsigned MinVLMAX =
3012 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
3013
3014 return std::make_pair(MinVLMAX, MaxVLMAX);
3015}
3016
3017// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3018// of either is (currently) supported. This can get us into an infinite loop
3019// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3020// as a ..., etc.
3021// Until either (or both) of these can reliably lower any node, reporting that
3022// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3023// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3024// which is not desirable.
3026 EVT VT, unsigned DefinedValues) const {
3027 return false;
3028}
3029
3031 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3032 // implementation-defined.
3033 if (!VT.isVector())
3035 unsigned DLenFactor = Subtarget.getDLenFactor();
3036 unsigned Cost;
3037 if (VT.isScalableVector()) {
3038 unsigned LMul;
3039 bool Fractional;
3040 std::tie(LMul, Fractional) =
3042 if (Fractional)
3043 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3044 else
3045 Cost = (LMul * DLenFactor);
3046 } else {
3047 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3048 }
3049 return Cost;
3050}
3051
3052
3053/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3054/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3055/// be by default. VRGatherCostModel reflects available options. Note that
3056/// operand (index and possibly mask) are handled separately.
3058 auto LMULCost = getLMULCost(VT);
3059 bool Log2CostModel =
3060 Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
3061 if (Log2CostModel && LMULCost.isValid()) {
3062 unsigned Log = Log2_64(LMULCost.getValue());
3063 if (Log > 0)
3064 return LMULCost * Log;
3065 }
3066 return LMULCost * LMULCost;
3067}
3068
3069/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3070/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3071/// or may track the vrgather.vv cost. It is implementation-dependent.
3075
3076/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3077/// for the type VT. (This does not cover the vslide1up or vslide1down
3078/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3079/// or may track the vrgather.vv cost. It is implementation-dependent.
3083
3084/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3085/// for the type VT. (This does not cover the vslide1up or vslide1down
3086/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3087/// or may track the vrgather.vv cost. It is implementation-dependent.
3091
3093 const RISCVSubtarget &Subtarget) {
3094 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3095 // bf16 conversions are always promoted to f32.
3096 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3097 Op.getValueType() == MVT::bf16) {
3098 bool IsStrict = Op->isStrictFPOpcode();
3099
3100 SDLoc DL(Op);
3101 if (IsStrict) {
3102 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3103 {Op.getOperand(0), Op.getOperand(1)});
3104 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3105 {Op.getValueType(), MVT::Other},
3106 {Val.getValue(1), Val.getValue(0),
3107 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3108 }
3109 return DAG.getNode(
3110 ISD::FP_ROUND, DL, Op.getValueType(),
3111 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3112 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3113 }
3114
3115 // Other operations are legal.
3116 return Op;
3117}
3118
3120 const RISCVSubtarget &Subtarget) {
3121 // RISC-V FP-to-int conversions saturate to the destination register size, but
3122 // don't produce 0 for nan. We can use a conversion instruction and fix the
3123 // nan case with a compare and a select.
3124 SDValue Src = Op.getOperand(0);
3125
3126 MVT DstVT = Op.getSimpleValueType();
3127 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3128
3129 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3130
3131 if (!DstVT.isVector()) {
3132 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3133 // the result.
3134 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3135 Src.getValueType() == MVT::bf16) {
3136 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3137 }
3138
3139 unsigned Opc;
3140 if (SatVT == DstVT)
3141 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3142 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3143 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3144 else
3145 return SDValue();
3146 // FIXME: Support other SatVTs by clamping before or after the conversion.
3147
3148 SDLoc DL(Op);
3149 SDValue FpToInt = DAG.getNode(
3150 Opc, DL, DstVT, Src,
3152
3153 if (Opc == RISCVISD::FCVT_WU_RV64)
3154 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3155
3156 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3157 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3159 }
3160
3161 // Vectors.
3162
3163 MVT DstEltVT = DstVT.getVectorElementType();
3164 MVT SrcVT = Src.getSimpleValueType();
3165 MVT SrcEltVT = SrcVT.getVectorElementType();
3166 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3167 unsigned DstEltSize = DstEltVT.getSizeInBits();
3168
3169 // Only handle saturating to the destination type.
3170 if (SatVT != DstEltVT)
3171 return SDValue();
3172
3173 MVT DstContainerVT = DstVT;
3174 MVT SrcContainerVT = SrcVT;
3175 if (DstVT.isFixedLengthVector()) {
3176 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3177 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3178 assert(DstContainerVT.getVectorElementCount() ==
3179 SrcContainerVT.getVectorElementCount() &&
3180 "Expected same element count");
3181 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3182 }
3183
3184 SDLoc DL(Op);
3185
3186 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3187
3188 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3189 {Src, Src, DAG.getCondCode(ISD::SETNE),
3190 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3191
3192 // Need to widen by more than 1 step, promote the FP type, then do a widening
3193 // convert.
3194 if (DstEltSize > (2 * SrcEltSize)) {
3195 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3196 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3197 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3198 }
3199
3200 MVT CvtContainerVT = DstContainerVT;
3201 MVT CvtEltVT = DstEltVT;
3202 if (SrcEltSize > (2 * DstEltSize)) {
3203 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3204 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3205 }
3206
3207 unsigned RVVOpc =
3208 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3209 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3210
3211 while (CvtContainerVT != DstContainerVT) {
3212 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3213 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3214 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3215 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3216 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3217 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3218 }
3219
3220 SDValue SplatZero = DAG.getNode(
3221 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3222 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3223 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3224 Res, DAG.getUNDEF(DstContainerVT), VL);
3225
3226 if (DstVT.isFixedLengthVector())
3227 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3228
3229 return Res;
3230}
3231
3233 const RISCVSubtarget &Subtarget) {
3234 bool IsStrict = Op->isStrictFPOpcode();
3235 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3236
3237 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3238 // bf16 conversions are always promoted to f32.
3239 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3240 SrcVal.getValueType() == MVT::bf16) {
3241 SDLoc DL(Op);
3242 if (IsStrict) {
3243 SDValue Ext =
3244 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3245 {Op.getOperand(0), SrcVal});
3246 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3247 {Ext.getValue(1), Ext.getValue(0)});
3248 }
3249 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3250 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3251 }
3252
3253 // Other operations are legal.
3254 return Op;
3255}
3256
3258 switch (Opc) {
3259 case ISD::FROUNDEVEN:
3261 case ISD::VP_FROUNDEVEN:
3262 return RISCVFPRndMode::RNE;
3263 case ISD::FTRUNC:
3264 case ISD::STRICT_FTRUNC:
3265 case ISD::VP_FROUNDTOZERO:
3266 return RISCVFPRndMode::RTZ;
3267 case ISD::FFLOOR:
3268 case ISD::STRICT_FFLOOR:
3269 case ISD::VP_FFLOOR:
3270 return RISCVFPRndMode::RDN;
3271 case ISD::FCEIL:
3272 case ISD::STRICT_FCEIL:
3273 case ISD::VP_FCEIL:
3274 return RISCVFPRndMode::RUP;
3275 case ISD::FROUND:
3276 case ISD::LROUND:
3277 case ISD::LLROUND:
3278 case ISD::STRICT_FROUND:
3279 case ISD::STRICT_LROUND:
3281 case ISD::VP_FROUND:
3282 return RISCVFPRndMode::RMM;
3283 case ISD::FRINT:
3284 case ISD::LRINT:
3285 case ISD::LLRINT:
3286 case ISD::STRICT_FRINT:
3287 case ISD::STRICT_LRINT:
3288 case ISD::STRICT_LLRINT:
3289 case ISD::VP_FRINT:
3290 case ISD::VP_LRINT:
3291 case ISD::VP_LLRINT:
3292 return RISCVFPRndMode::DYN;
3293 }
3294
3296}
3297
3298// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3299// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3300// the integer domain and back. Taking care to avoid converting values that are
3301// nan or already correct.
3302static SDValue
3304 const RISCVSubtarget &Subtarget) {
3305 MVT VT = Op.getSimpleValueType();
3306 assert(VT.isVector() && "Unexpected type");
3307
3308 SDLoc DL(Op);
3309
3310 SDValue Src = Op.getOperand(0);
3311
3312 // Freeze the source since we are increasing the number of uses.
3313 Src = DAG.getFreeze(Src);
3314
3315 MVT ContainerVT = VT;
3316 if (VT.isFixedLengthVector()) {
3317 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3318 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3319 }
3320
3321 SDValue Mask, VL;
3322 if (Op->isVPOpcode()) {
3323 Mask = Op.getOperand(1);
3324 if (VT.isFixedLengthVector())
3325 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3326 Subtarget);
3327 VL = Op.getOperand(2);
3328 } else {
3329 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3330 }
3331
3332 // We do the conversion on the absolute value and fix the sign at the end.
3333 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3334
3335 // Determine the largest integer that can be represented exactly. This and
3336 // values larger than it don't have any fractional bits so don't need to
3337 // be converted.
3338 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3339 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3340 APFloat MaxVal = APFloat(FltSem);
3341 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3342 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3343 SDValue MaxValNode =
3344 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3345 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3346 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3347
3348 // If abs(Src) was larger than MaxVal or nan, keep it.
3349 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3350 Mask =
3351 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3352 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3353 Mask, Mask, VL});
3354
3355 // Truncate to integer and convert back to FP.
3356 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3357 MVT XLenVT = Subtarget.getXLenVT();
3358 SDValue Truncated;
3359
3360 switch (Op.getOpcode()) {
3361 default:
3362 llvm_unreachable("Unexpected opcode");
3363 case ISD::FRINT:
3364 case ISD::VP_FRINT:
3365 case ISD::FCEIL:
3366 case ISD::VP_FCEIL:
3367 case ISD::FFLOOR:
3368 case ISD::VP_FFLOOR:
3369 case ISD::FROUND:
3370 case ISD::FROUNDEVEN:
3371 case ISD::VP_FROUND:
3372 case ISD::VP_FROUNDEVEN:
3373 case ISD::VP_FROUNDTOZERO: {
3376 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3377 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3378 break;
3379 }
3380 case ISD::FTRUNC:
3381 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3382 Mask, VL);
3383 break;
3384 case ISD::FNEARBYINT:
3385 case ISD::VP_FNEARBYINT:
3386 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3387 Mask, VL);
3388 break;
3389 }
3390
3391 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3392 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3393 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3394 Mask, VL);
3395
3396 // Restore the original sign so that -0.0 is preserved.
3397 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3398 Src, Src, Mask, VL);
3399
3400 if (!VT.isFixedLengthVector())
3401 return Truncated;
3402
3403 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3404}
3405
3406// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3407// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3408// qNan and converting the new source to integer and back to FP.
3409static SDValue
3411 const RISCVSubtarget &Subtarget) {
3412 SDLoc DL(Op);
3413 MVT VT = Op.getSimpleValueType();
3414 SDValue Chain = Op.getOperand(0);
3415 SDValue Src = Op.getOperand(1);
3416
3417 MVT ContainerVT = VT;
3418 if (VT.isFixedLengthVector()) {
3419 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3420 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3421 }
3422
3423 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3424
3425 // Freeze the source since we are increasing the number of uses.
3426 Src = DAG.getFreeze(Src);
3427
3428 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3429 MVT MaskVT = Mask.getSimpleValueType();
3430 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3431 DAG.getVTList(MaskVT, MVT::Other),
3432 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3433 DAG.getUNDEF(MaskVT), Mask, VL});
3434 Chain = Unorder.getValue(1);
3435 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3436 DAG.getVTList(ContainerVT, MVT::Other),
3437 {Chain, Src, Src, Src, Unorder, VL});
3438 Chain = Src.getValue(1);
3439
3440 // We do the conversion on the absolute value and fix the sign at the end.
3441 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3442
3443 // Determine the largest integer that can be represented exactly. This and
3444 // values larger than it don't have any fractional bits so don't need to
3445 // be converted.
3446 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3447 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3448 APFloat MaxVal = APFloat(FltSem);
3449 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3450 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3451 SDValue MaxValNode =
3452 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3453 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3454 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3455
3456 // If abs(Src) was larger than MaxVal or nan, keep it.
3457 Mask = DAG.getNode(
3458 RISCVISD::SETCC_VL, DL, MaskVT,
3459 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3460
3461 // Truncate to integer and convert back to FP.
3462 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3463 MVT XLenVT = Subtarget.getXLenVT();
3464 SDValue Truncated;
3465
3466 switch (Op.getOpcode()) {
3467 default:
3468 llvm_unreachable("Unexpected opcode");
3469 case ISD::STRICT_FCEIL:
3470 case ISD::STRICT_FFLOOR:
3471 case ISD::STRICT_FROUND:
3475 Truncated = DAG.getNode(
3476 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3477 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3478 break;
3479 }
3480 case ISD::STRICT_FTRUNC:
3481 Truncated =
3482 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3483 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3484 break;
3486 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3487 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3488 Mask, VL);
3489 break;
3490 }
3491 Chain = Truncated.getValue(1);
3492
3493 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3494 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3495 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3496 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3497 Truncated, Mask, VL);
3498 Chain = Truncated.getValue(1);
3499 }
3500
3501 // Restore the original sign so that -0.0 is preserved.
3502 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3503 Src, Src, Mask, VL);
3504
3505 if (VT.isFixedLengthVector())
3506 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3507 return DAG.getMergeValues({Truncated, Chain}, DL);
3508}
3509
3510static SDValue
3512 const RISCVSubtarget &Subtarget) {
3513 MVT VT = Op.getSimpleValueType();
3514 if (VT.isVector())
3515 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3516
3517 if (DAG.shouldOptForSize())
3518 return SDValue();
3519
3520 SDLoc DL(Op);
3521 SDValue Src = Op.getOperand(0);
3522
3523 // Create an integer the size of the mantissa with the MSB set. This and all
3524 // values larger than it don't have any fractional bits so don't need to be
3525 // converted.
3526 const fltSemantics &FltSem = VT.getFltSemantics();
3527 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3528 APFloat MaxVal = APFloat(FltSem);
3529 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3530 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3531 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3532
3534 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3535 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3536}
3537
3538// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3540 const RISCVSubtarget &Subtarget) {
3541 SDLoc DL(Op);
3542 MVT DstVT = Op.getSimpleValueType();
3543 SDValue Src = Op.getOperand(0);
3544 MVT SrcVT = Src.getSimpleValueType();
3545 assert(SrcVT.isVector() && DstVT.isVector() &&
3546 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3547 "Unexpected type");
3548
3549 MVT DstContainerVT = DstVT;
3550 MVT SrcContainerVT = SrcVT;
3551
3552 if (DstVT.isFixedLengthVector()) {
3553 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3554 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3555 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3556 }
3557
3558 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3559
3560 // [b]f16 -> f32
3561 MVT SrcElemType = SrcVT.getVectorElementType();
3562 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3563 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3564 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3565 }
3566
3567 SDValue Res =
3568 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3569 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3570 Subtarget.getXLenVT()),
3571 VL);
3572
3573 if (!DstVT.isFixedLengthVector())
3574 return Res;
3575
3576 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3577}
3578
3579static SDValue
3581 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3582 SDValue Offset, SDValue Mask, SDValue VL,
3584 if (Passthru.isUndef())
3586 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3587 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3588 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3589}
3590
3591static SDValue
3592getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3593 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3594 SDValue VL,
3596 if (Passthru.isUndef())
3598 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3599 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3600 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3601}
3602
3606 int64_t Addend;
3607};
3608
3609static std::optional<APInt> getExactInteger(const APFloat &APF,
3611 // We will use a SINT_TO_FP to materialize this constant so we should use a
3612 // signed APSInt here.
3613 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3614 // We use an arbitrary rounding mode here. If a floating-point is an exact
3615 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3616 // the rounding mode changes the output value, then it is not an exact
3617 // integer.
3619 bool IsExact;
3620 // If it is out of signed integer range, it will return an invalid operation.
3621 // If it is not an exact integer, IsExact is false.
3622 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3624 !IsExact)
3625 return std::nullopt;
3626 return ValInt.extractBits(BitWidth, 0);
3627}
3628
3629// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3630// to the (non-zero) step S and start value X. This can be then lowered as the
3631// RVV sequence (VID * S) + X, for example.
3632// The step S is represented as an integer numerator divided by a positive
3633// denominator. Note that the implementation currently only identifies
3634// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3635// cannot detect 2/3, for example.
3636// Note that this method will also match potentially unappealing index
3637// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3638// determine whether this is worth generating code for.
3639//
3640// EltSizeInBits is the size of the type that the sequence will be calculated
3641// in, i.e. SEW for build_vectors or XLEN for address calculations.
3642static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3643 unsigned EltSizeInBits) {
3644 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3646 return std::nullopt;
3647 bool IsInteger = Op.getValueType().isInteger();
3648
3649 std::optional<unsigned> SeqStepDenom;
3650 std::optional<APInt> SeqStepNum;
3651 std::optional<APInt> SeqAddend;
3652 std::optional<std::pair<APInt, unsigned>> PrevElt;
3653 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3654
3655 // First extract the ops into a list of constant integer values. This may not
3656 // be possible for floats if they're not all representable as integers.
3657 SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3658 const unsigned OpSize = Op.getScalarValueSizeInBits();
3659 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3660 if (Elt.isUndef()) {
3661 Elts[Idx] = std::nullopt;
3662 continue;
3663 }
3664 if (IsInteger) {
3665 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3666 } else {
3667 auto ExactInteger =
3668 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3669 if (!ExactInteger)
3670 return std::nullopt;
3671 Elts[Idx] = *ExactInteger;
3672 }
3673 }
3674
3675 for (auto [Idx, Elt] : enumerate(Elts)) {
3676 // Assume undef elements match the sequence; we just have to be careful
3677 // when interpolating across them.
3678 if (!Elt)
3679 continue;
3680
3681 if (PrevElt) {
3682 // Calculate the step since the last non-undef element, and ensure
3683 // it's consistent across the entire sequence.
3684 unsigned IdxDiff = Idx - PrevElt->second;
3685 APInt ValDiff = *Elt - PrevElt->first;
3686
3687 // A zero-value value difference means that we're somewhere in the middle
3688 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3689 // step change before evaluating the sequence.
3690 if (ValDiff == 0)
3691 continue;
3692
3693 int64_t Remainder = ValDiff.srem(IdxDiff);
3694 // Normalize the step if it's greater than 1.
3695 if (Remainder != ValDiff.getSExtValue()) {
3696 // The difference must cleanly divide the element span.
3697 if (Remainder != 0)
3698 return std::nullopt;
3699 ValDiff = ValDiff.sdiv(IdxDiff);
3700 IdxDiff = 1;
3701 }
3702
3703 if (!SeqStepNum)
3704 SeqStepNum = ValDiff;
3705 else if (ValDiff != SeqStepNum)
3706 return std::nullopt;
3707
3708 if (!SeqStepDenom)
3709 SeqStepDenom = IdxDiff;
3710 else if (IdxDiff != *SeqStepDenom)
3711 return std::nullopt;
3712 }
3713
3714 // Record this non-undef element for later.
3715 if (!PrevElt || PrevElt->first != *Elt)
3716 PrevElt = std::make_pair(*Elt, Idx);
3717 }
3718
3719 // We need to have logged a step for this to count as a legal index sequence.
3720 if (!SeqStepNum || !SeqStepDenom)
3721 return std::nullopt;
3722
3723 // Loop back through the sequence and validate elements we might have skipped
3724 // while waiting for a valid step. While doing this, log any sequence addend.
3725 for (auto [Idx, Elt] : enumerate(Elts)) {
3726 if (!Elt)
3727 continue;
3728 APInt ExpectedVal =
3729 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3730 *SeqStepNum)
3731 .sdiv(*SeqStepDenom);
3732
3733 APInt Addend = *Elt - ExpectedVal;
3734 if (!SeqAddend)
3735 SeqAddend = Addend;
3736 else if (Addend != SeqAddend)
3737 return std::nullopt;
3738 }
3739
3740 assert(SeqAddend && "Must have an addend if we have a step");
3741
3742 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3743 SeqAddend->getSExtValue()};
3744}
3745
3746// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3747// and lower it as a VRGATHER_VX_VL from the source vector.
3748static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3749 SelectionDAG &DAG,
3750 const RISCVSubtarget &Subtarget) {
3751 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3752 return SDValue();
3753 SDValue Src = SplatVal.getOperand(0);
3754 // Don't perform this optimization for i1 vectors, or if the element types are
3755 // different
3756 // FIXME: Support i1 vectors, maybe by promoting to i8?
3757 MVT EltTy = VT.getVectorElementType();
3758 if (EltTy == MVT::i1 ||
3759 !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
3760 return SDValue();
3761 MVT SrcVT = Src.getSimpleValueType();
3762 if (EltTy != SrcVT.getVectorElementType())
3763 return SDValue();
3764 SDValue Idx = SplatVal.getOperand(1);
3765 // The index must be a legal type.
3766 if (Idx.getValueType() != Subtarget.getXLenVT())
3767 return SDValue();
3768
3769 // Check that we know Idx lies within VT
3770 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3771 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3772 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3773 return SDValue();
3774 }
3775
3776 // Convert fixed length vectors to scalable
3777 MVT ContainerVT = VT;
3778 if (VT.isFixedLengthVector())
3779 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3780
3781 MVT SrcContainerVT = SrcVT;
3782 if (SrcVT.isFixedLengthVector()) {
3783 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3784 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3785 }
3786
3787 // Put Vec in a VT sized vector
3788 if (SrcContainerVT.getVectorMinNumElements() <
3789 ContainerVT.getVectorMinNumElements())
3790 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
3791 else
3792 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
3793
3794 // We checked that Idx fits inside VT earlier
3795 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3796 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3797 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3798 if (VT.isFixedLengthVector())
3799 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3800 return Gather;
3801}
3802
3804 const RISCVSubtarget &Subtarget) {
3805 MVT VT = Op.getSimpleValueType();
3806 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3807
3808 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3809
3810 SDLoc DL(Op);
3811 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3812
3813 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3814 int64_t StepNumerator = SimpleVID->StepNumerator;
3815 unsigned StepDenominator = SimpleVID->StepDenominator;
3816 int64_t Addend = SimpleVID->Addend;
3817
3818 assert(StepNumerator != 0 && "Invalid step");
3819 bool Negate = false;
3820 int64_t SplatStepVal = StepNumerator;
3821 unsigned StepOpcode = ISD::MUL;
3822 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3823 // anyway as the shift of 63 won't fit in uimm5.
3824 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3825 isPowerOf2_64(std::abs(StepNumerator))) {
3826 Negate = StepNumerator < 0;
3827 StepOpcode = ISD::SHL;
3828 SplatStepVal = Log2_64(std::abs(StepNumerator));
3829 }
3830
3831 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
3832 // since it's the immediate value many RVV instructions accept. There is
3833 // no vmul.vi instruction so ensure multiply constant can fit in a
3834 // single addi instruction. For the addend, we allow up to 32 bits..
3835 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3836 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3837 isPowerOf2_32(StepDenominator) &&
3838 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
3839 MVT VIDVT =
3841 MVT VIDContainerVT =
3842 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3843 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3844 // Convert right out of the scalable type so we can use standard ISD
3845 // nodes for the rest of the computation. If we used scalable types with
3846 // these, we'd lose the fixed-length vector info and generate worse
3847 // vsetvli code.
3848 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3849 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3850 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3851 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3852 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3853 }
3854 if (StepDenominator != 1) {
3855 SDValue SplatStep =
3856 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3857 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3858 }
3859 if (Addend != 0 || Negate) {
3860 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3861 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3862 VID);
3863 }
3864 if (VT.isFloatingPoint()) {
3865 // TODO: Use vfwcvt to reduce register pressure.
3866 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3867 }
3868 return VID;
3869 }
3870 }
3871
3872 return SDValue();
3873}
3874
3875/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3876/// which constitute a large proportion of the elements. In such cases we can
3877/// splat a vector with the dominant element and make up the shortfall with
3878/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3879/// Note that this includes vectors of 2 elements by association. The
3880/// upper-most element is the "dominant" one, allowing us to use a splat to
3881/// "insert" the upper element, and an insert of the lower element at position
3882/// 0, which improves codegen.
3884 const RISCVSubtarget &Subtarget) {
3885 MVT VT = Op.getSimpleValueType();
3886 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3887
3888 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3889
3890 SDLoc DL(Op);
3891 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3892
3893 MVT XLenVT = Subtarget.getXLenVT();
3894 unsigned NumElts = Op.getNumOperands();
3895
3896 SDValue DominantValue;
3897 unsigned MostCommonCount = 0;
3898 DenseMap<SDValue, unsigned> ValueCounts;
3899 unsigned NumUndefElts =
3900 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3901
3902 // Track the number of scalar loads we know we'd be inserting, estimated as
3903 // any non-zero floating-point constant. Other kinds of element are either
3904 // already in registers or are materialized on demand. The threshold at which
3905 // a vector load is more desirable than several scalar materializion and
3906 // vector-insertion instructions is not known.
3907 unsigned NumScalarLoads = 0;
3908
3909 for (SDValue V : Op->op_values()) {
3910 if (V.isUndef())
3911 continue;
3912
3913 unsigned &Count = ValueCounts[V];
3914 if (0 == Count)
3915 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3916 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3917
3918 // Is this value dominant? In case of a tie, prefer the highest element as
3919 // it's cheaper to insert near the beginning of a vector than it is at the
3920 // end.
3921 if (++Count >= MostCommonCount) {
3922 DominantValue = V;
3923 MostCommonCount = Count;
3924 }
3925 }
3926
3927 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3928 unsigned NumDefElts = NumElts - NumUndefElts;
3929 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3930
3931 // Don't perform this optimization when optimizing for size, since
3932 // materializing elements and inserting them tends to cause code bloat.
3933 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3934 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3935 ((MostCommonCount > DominantValueCountThreshold) ||
3936 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3937 // Start by splatting the most common element.
3938 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3939
3940 DenseSet<SDValue> Processed{DominantValue};
3941
3942 // We can handle an insert into the last element (of a splat) via
3943 // v(f)slide1down. This is slightly better than the vslideup insert
3944 // lowering as it avoids the need for a vector group temporary. It
3945 // is also better than using vmerge.vx as it avoids the need to
3946 // materialize the mask in a vector register.
3947 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3948 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3949 LastOp != DominantValue) {
3950 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3951 auto OpCode =
3952 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3953 if (!VT.isFloatingPoint())
3954 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3955 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3956 LastOp, Mask, VL);
3957 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3958 Processed.insert(LastOp);
3959 }
3960
3961 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3962 for (const auto &OpIdx : enumerate(Op->ops())) {
3963 const SDValue &V = OpIdx.value();
3964 if (V.isUndef() || !Processed.insert(V).second)
3965 continue;
3966 if (ValueCounts[V] == 1) {
3967 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
3968 } else {
3969 // Blend in all instances of this value using a VSELECT, using a
3970 // mask where each bit signals whether that element is the one
3971 // we're after.
3973 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3974 return DAG.getConstant(V == V1, DL, XLenVT);
3975 });
3976 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3977 DAG.getBuildVector(SelMaskTy, DL, Ops),
3978 DAG.getSplatBuildVector(VT, DL, V), Vec);
3979 }
3980 }
3981
3982 return Vec;
3983 }
3984
3985 return SDValue();
3986}
3987
3989 const RISCVSubtarget &Subtarget) {
3990 MVT VT = Op.getSimpleValueType();
3991 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3992
3993 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3994
3995 SDLoc DL(Op);
3996 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3997
3998 MVT XLenVT = Subtarget.getXLenVT();
3999 unsigned NumElts = Op.getNumOperands();
4000
4001 if (VT.getVectorElementType() == MVT::i1) {
4002 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
4003 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
4004 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
4005 }
4006
4007 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
4008 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
4009 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
4010 }
4011
4012 // Lower constant mask BUILD_VECTORs via an integer vector type, in
4013 // scalar integer chunks whose bit-width depends on the number of mask
4014 // bits and XLEN.
4015 // First, determine the most appropriate scalar integer type to use. This
4016 // is at most XLenVT, but may be shrunk to a smaller vector element type
4017 // according to the size of the final vector - use i8 chunks rather than
4018 // XLenVT if we're producing a v8i1. This results in more consistent
4019 // codegen across RV32 and RV64.
4020 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4021 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4022 // If we have to use more than one INSERT_VECTOR_ELT then this
4023 // optimization is likely to increase code size; avoid performing it in
4024 // such a case. We can use a load from a constant pool in this case.
4025 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4026 return SDValue();
4027 // Now we can create our integer vector type. Note that it may be larger
4028 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4029 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4030 MVT IntegerViaVecVT =
4031 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4032 IntegerViaVecElts);
4033
4034 uint64_t Bits = 0;
4035 unsigned BitPos = 0, IntegerEltIdx = 0;
4036 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4037
4038 for (unsigned I = 0; I < NumElts;) {
4039 SDValue V = Op.getOperand(I);
4040 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4041 Bits |= ((uint64_t)BitValue << BitPos);
4042 ++BitPos;
4043 ++I;
4044
4045 // Once we accumulate enough bits to fill our scalar type or process the
4046 // last element, insert into our vector and clear our accumulated data.
4047 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4048 if (NumViaIntegerBits <= 32)
4049 Bits = SignExtend64<32>(Bits);
4050 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4051 Elts[IntegerEltIdx] = Elt;
4052 Bits = 0;
4053 BitPos = 0;
4054 IntegerEltIdx++;
4055 }
4056 }
4057
4058 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4059
4060 if (NumElts < NumViaIntegerBits) {
4061 // If we're producing a smaller vector than our minimum legal integer
4062 // type, bitcast to the equivalent (known-legal) mask type, and extract
4063 // our final mask.
4064 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4065 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4066 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4067 } else {
4068 // Else we must have produced an integer type with the same size as the
4069 // mask type; bitcast for the final result.
4070 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4071 Vec = DAG.getBitcast(VT, Vec);
4072 }
4073
4074 return Vec;
4075 }
4076
4078 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4079 : RISCVISD::VMV_V_X_VL;
4080 if (!VT.isFloatingPoint())
4081 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4082 Splat =
4083 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4084 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4085 }
4086
4087 // Try and match index sequences, which we can lower to the vid instruction
4088 // with optional modifications. An all-undef vector is matched by
4089 // getSplatValue, above.
4090 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4091 return Res;
4092
4093 // For very small build_vectors, use a single scalar insert of a constant.
4094 // TODO: Base this on constant rematerialization cost, not size.
4095 const unsigned EltBitSize = VT.getScalarSizeInBits();
4096 if (VT.getSizeInBits() <= 32 &&
4098 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4099 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4100 "Unexpected sequence type");
4101 // If we can use the original VL with the modified element type, this
4102 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4103 // be moved into InsertVSETVLI?
4104 unsigned ViaVecLen =
4105 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4106 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4107
4108 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4109 uint64_t SplatValue = 0;
4110 // Construct the amalgamated value at this larger vector type.
4111 for (const auto &OpIdx : enumerate(Op->op_values())) {
4112 const auto &SeqV = OpIdx.value();
4113 if (!SeqV.isUndef())
4114 SplatValue |=
4115 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4116 }
4117
4118 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4119 // achieve better constant materializion.
4120 // On RV32, we need to sign-extend to use getSignedConstant.
4121 if (ViaIntVT == MVT::i32)
4122 SplatValue = SignExtend64<32>(SplatValue);
4123
4124 SDValue Vec = DAG.getInsertVectorElt(
4125 DL, DAG.getUNDEF(ViaVecVT),
4126 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4127 if (ViaVecLen != 1)
4128 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4129 return DAG.getBitcast(VT, Vec);
4130 }
4131
4132
4133 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4134 // when re-interpreted as a vector with a larger element type. For example,
4135 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4136 // could be instead splat as
4137 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4138 // TODO: This optimization could also work on non-constant splats, but it
4139 // would require bit-manipulation instructions to construct the splat value.
4140 SmallVector<SDValue> Sequence;
4141 const auto *BV = cast<BuildVectorSDNode>(Op);
4142 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4144 BV->getRepeatedSequence(Sequence) &&
4145 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4146 unsigned SeqLen = Sequence.size();
4147 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4148 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4149 ViaIntVT == MVT::i64) &&
4150 "Unexpected sequence type");
4151
4152 // If we can use the original VL with the modified element type, this
4153 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4154 // be moved into InsertVSETVLI?
4155 const unsigned RequiredVL = NumElts / SeqLen;
4156 const unsigned ViaVecLen =
4157 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4158 NumElts : RequiredVL;
4159 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4160
4161 unsigned EltIdx = 0;
4162 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4163 uint64_t SplatValue = 0;
4164 // Construct the amalgamated value which can be splatted as this larger
4165 // vector type.
4166 for (const auto &SeqV : Sequence) {
4167 if (!SeqV.isUndef())
4168 SplatValue |=
4169 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4170 EltIdx++;
4171 }
4172
4173 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4174 // achieve better constant materializion.
4175 // On RV32, we need to sign-extend to use getSignedConstant.
4176 if (ViaIntVT == MVT::i32)
4177 SplatValue = SignExtend64<32>(SplatValue);
4178
4179 // Since we can't introduce illegal i64 types at this stage, we can only
4180 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4181 // way we can use RVV instructions to splat.
4182 assert((ViaIntVT.bitsLE(XLenVT) ||
4183 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4184 "Unexpected bitcast sequence");
4185 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4186 SDValue ViaVL =
4187 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4188 MVT ViaContainerVT =
4189 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4190 SDValue Splat =
4191 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4192 DAG.getUNDEF(ViaContainerVT),
4193 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4194 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4195 if (ViaVecLen != RequiredVL)
4197 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4198 return DAG.getBitcast(VT, Splat);
4199 }
4200 }
4201
4202 // If the number of signbits allows, see if we can lower as a <N x i8>.
4203 // Our main goal here is to reduce LMUL (and thus work) required to
4204 // build the constant, but we will also narrow if the resulting
4205 // narrow vector is known to materialize cheaply.
4206 // TODO: We really should be costing the smaller vector. There are
4207 // profitable cases this misses.
4208 if (EltBitSize > 8 && VT.isInteger() &&
4209 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4210 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4211 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4212 DL, Op->ops());
4213 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4214 Source, DAG, Subtarget);
4215 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4216 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4217 }
4218
4219 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4220 return Res;
4221
4222 // For constant vectors, use generic constant pool lowering. Otherwise,
4223 // we'd have to materialize constants in GPRs just to move them into the
4224 // vector.
4225 return SDValue();
4226}
4227
4228static unsigned getPACKOpcode(unsigned DestBW,
4229 const RISCVSubtarget &Subtarget) {
4230 switch (DestBW) {
4231 default:
4232 llvm_unreachable("Unsupported pack size");
4233 case 16:
4234 return RISCV::PACKH;
4235 case 32:
4236 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4237 case 64:
4238 assert(Subtarget.is64Bit());
4239 return RISCV::PACK;
4240 }
4241}
4242
4243/// Double the element size of the build vector to reduce the number
4244/// of vslide1down in the build vector chain. In the worst case, this
4245/// trades three scalar operations for 1 vector operation. Scalar
4246/// operations are generally lower latency, and for out-of-order cores
4247/// we also benefit from additional parallelism.
4249 const RISCVSubtarget &Subtarget) {
4250 SDLoc DL(Op);
4251 MVT VT = Op.getSimpleValueType();
4252 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4253 MVT ElemVT = VT.getVectorElementType();
4254 if (!ElemVT.isInteger())
4255 return SDValue();
4256
4257 // TODO: Relax these architectural restrictions, possibly with costing
4258 // of the actual instructions required.
4259 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4260 return SDValue();
4261
4262 unsigned NumElts = VT.getVectorNumElements();
4263 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4264 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4265 NumElts % 2 != 0)
4266 return SDValue();
4267
4268 // Produce [B,A] packed into a type twice as wide. Note that all
4269 // scalars are XLenVT, possibly masked (see below).
4270 MVT XLenVT = Subtarget.getXLenVT();
4271 SDValue Mask = DAG.getConstant(
4272 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4273 auto pack = [&](SDValue A, SDValue B) {
4274 // Bias the scheduling of the inserted operations to near the
4275 // definition of the element - this tends to reduce register
4276 // pressure overall.
4277 SDLoc ElemDL(B);
4278 if (Subtarget.hasStdExtZbkb())
4279 // Note that we're relying on the high bits of the result being
4280 // don't care. For PACKW, the result is *sign* extended.
4281 return SDValue(
4282 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4283 ElemDL, XLenVT, A, B),
4284 0);
4285
4286 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4287 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4288 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4289 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4290 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4292 };
4293
4294 SmallVector<SDValue> NewOperands;
4295 NewOperands.reserve(NumElts / 2);
4296 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4297 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4298 assert(NumElts == NewOperands.size() * 2);
4299 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4300 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4301 return DAG.getNode(ISD::BITCAST, DL, VT,
4302 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4303}
4304
4306 const RISCVSubtarget &Subtarget) {
4307 MVT VT = Op.getSimpleValueType();
4308 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4309
4310 MVT EltVT = VT.getVectorElementType();
4311 MVT XLenVT = Subtarget.getXLenVT();
4312
4313 SDLoc DL(Op);
4314
4315 // Proper support for f16 requires Zvfh. bf16 always requires special
4316 // handling. We need to cast the scalar to integer and create an integer
4317 // build_vector.
4318 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4319 MVT IVT = VT.changeVectorElementType(MVT::i16);
4320 SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4321 for (const auto &[I, U] : enumerate(Op->ops())) {
4322 SDValue Elem = U.get();
4323 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4324 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4325 // Called by LegalizeDAG, we need to use XLenVT operations since we
4326 // can't create illegal types.
4327 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4328 // Manually constant fold so the integer build_vector can be lowered
4329 // better. Waiting for DAGCombine will be too late.
4330 APInt V =
4331 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4332 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4333 } else {
4334 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4335 }
4336 } else {
4337 // Called by scalar type legalizer, we can use i16.
4338 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4339 }
4340 }
4341 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4342 return DAG.getBitcast(VT, Res);
4343 }
4344
4345 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4347 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4348
4349 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4350
4351 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4352
4353 if (VT.getVectorElementType() == MVT::i1) {
4354 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4355 // vector type, we have a legal equivalently-sized i8 type, so we can use
4356 // that.
4357 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4358 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4359
4360 SDValue WideVec;
4362 // For a splat, perform a scalar truncate before creating the wider
4363 // vector.
4364 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4365 DAG.getConstant(1, DL, Splat.getValueType()));
4366 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4367 } else {
4368 SmallVector<SDValue, 8> Ops(Op->op_values());
4369 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4370 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4371 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4372 }
4373
4374 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4375 }
4376
4378 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4379 return Gather;
4380
4381 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4382 // pressure at high LMUL.
4383 if (all_of(Op->ops().drop_front(),
4384 [](const SDUse &U) { return U.get().isUndef(); })) {
4385 unsigned Opc =
4386 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4387 if (!VT.isFloatingPoint())
4388 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4389 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4390 Splat, VL);
4391 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4392 }
4393
4394 unsigned Opc =
4395 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4396 if (!VT.isFloatingPoint())
4397 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4398 Splat =
4399 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4400 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4401 }
4402
4403 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4404 return Res;
4405
4406 // If we're compiling for an exact VLEN value, we can split our work per
4407 // register in the register group.
4408 if (const auto VLen = Subtarget.getRealVLen();
4409 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4410 MVT ElemVT = VT.getVectorElementType();
4411 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4412 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4413 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4414 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4415 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4416
4417 // The following semantically builds up a fixed length concat_vector
4418 // of the component build_vectors. We eagerly lower to scalable and
4419 // insert_subvector here to avoid DAG combining it back to a large
4420 // build_vector.
4421 SmallVector<SDValue> BuildVectorOps(Op->ops());
4422 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4423 SDValue Vec = DAG.getUNDEF(ContainerVT);
4424 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4425 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4426 SDValue SubBV =
4427 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4428 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4429 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4430 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4431 }
4432 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4433 }
4434
4435 // If we're about to resort to vslide1down (or stack usage), pack our
4436 // elements into the widest scalar type we can. This will force a VL/VTYPE
4437 // toggle, but reduces the critical path, the number of vslide1down ops
4438 // required, and possibly enables scalar folds of the values.
4439 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4440 return Res;
4441
4442 // For m1 vectors, if we have non-undef values in both halves of our vector,
4443 // split the vector into low and high halves, build them separately, then
4444 // use a vselect to combine them. For long vectors, this cuts the critical
4445 // path of the vslide1down sequence in half, and gives us an opportunity
4446 // to special case each half independently. Note that we don't change the
4447 // length of the sub-vectors here, so if both fallback to the generic
4448 // vslide1down path, we should be able to fold the vselect into the final
4449 // vslidedown (for the undef tail) for the first half w/ masking.
4450 unsigned NumElts = VT.getVectorNumElements();
4451 unsigned NumUndefElts =
4452 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4453 unsigned NumDefElts = NumElts - NumUndefElts;
4454 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4455 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4456 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4457 SmallVector<SDValue> MaskVals;
4458 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4459 SubVecAOps.reserve(NumElts);
4460 SubVecBOps.reserve(NumElts);
4461 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4462 SDValue Elem = U.get();
4463 if (Idx < NumElts / 2) {
4464 SubVecAOps.push_back(Elem);
4465 SubVecBOps.push_back(UndefElem);
4466 } else {
4467 SubVecAOps.push_back(UndefElem);
4468 SubVecBOps.push_back(Elem);
4469 }
4470 bool SelectMaskVal = (Idx < NumElts / 2);
4471 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4472 }
4473 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4474 MaskVals.size() == NumElts);
4475
4476 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4477 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4478 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4479 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4480 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4481 }
4482
4483 // Cap the cost at a value linear to the number of elements in the vector.
4484 // The default lowering is to use the stack. The vector store + scalar loads
4485 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4486 // being (at least) linear in LMUL. As a result, using the vslidedown
4487 // lowering for every element ends up being VL*LMUL..
4488 // TODO: Should we be directly costing the stack alternative? Doing so might
4489 // give us a more accurate upper bound.
4490 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4491
4492 // TODO: unify with TTI getSlideCost.
4493 InstructionCost PerSlideCost = 1;
4494 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4495 default: break;
4496 case RISCVVType::LMUL_2:
4497 PerSlideCost = 2;
4498 break;
4499 case RISCVVType::LMUL_4:
4500 PerSlideCost = 4;
4501 break;
4502 case RISCVVType::LMUL_8:
4503 PerSlideCost = 8;
4504 break;
4505 }
4506
4507 // TODO: Should we be using the build instseq then cost + evaluate scheme
4508 // we use for integer constants here?
4509 unsigned UndefCount = 0;
4510 for (const SDValue &V : Op->ops()) {
4511 if (V.isUndef()) {
4512 UndefCount++;
4513 continue;
4514 }
4515 if (UndefCount) {
4516 LinearBudget -= PerSlideCost;
4517 UndefCount = 0;
4518 }
4519 LinearBudget -= PerSlideCost;
4520 }
4521 if (UndefCount) {
4522 LinearBudget -= PerSlideCost;
4523 }
4524
4525 if (LinearBudget < 0)
4526 return SDValue();
4527
4528 assert((!VT.isFloatingPoint() ||
4529 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4530 "Illegal type which will result in reserved encoding");
4531
4532 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4533
4534 // General case: splat the first operand and slide other operands down one
4535 // by one to form a vector. Alternatively, if every operand is an
4536 // extraction from element 0 of a vector, we use that vector from the last
4537 // extraction as the start value and slide up instead of slide down. Such that
4538 // (1) we can avoid the initial splat (2) we can turn those vslide1up into
4539 // vslideup of 1 later and eliminate the vector to scalar movement, which is
4540 // something we cannot do with vslide1down/vslidedown.
4541 // Of course, using vslide1up/vslideup might increase the register pressure,
4542 // and that's why we conservatively limit to cases where every operand is an
4543 // extraction from the first element.
4544 SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
4545 SDValue EVec;
4546 bool SlideUp = false;
4547 auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
4548 SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
4549 if (SlideUp)
4550 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4551 Mask, VL, Policy);
4552 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4553 Mask, VL, Policy);
4554 };
4555
4556 // The reason we don't use all_of here is because we're also capturing EVec
4557 // from the last non-undef operand. If the std::execution_policy of the
4558 // underlying std::all_of is anything but std::sequenced_policy we might
4559 // capture the wrong EVec.
4560 for (SDValue V : Operands) {
4561 using namespace SDPatternMatch;
4562 SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
4563 if (!SlideUp)
4564 break;
4565 }
4566
4567 // Do not slideup if the element type of EVec is different.
4568 if (SlideUp) {
4569 MVT EVecEltVT = EVec.getSimpleValueType().getVectorElementType();
4570 MVT ContainerEltVT = ContainerVT.getVectorElementType();
4571 if (EVecEltVT != ContainerEltVT)
4572 SlideUp = false;
4573 }
4574
4575 if (SlideUp) {
4576 MVT EVecContainerVT = EVec.getSimpleValueType();
4577 // Make sure the original vector has scalable vector type.
4578 if (EVecContainerVT.isFixedLengthVector()) {
4579 EVecContainerVT =
4580 getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
4581 EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
4582 }
4583
4584 // Adapt EVec's type into ContainerVT.
4585 if (EVecContainerVT.getVectorMinNumElements() <
4586 ContainerVT.getVectorMinNumElements())
4587 EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
4588 else
4589 EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
4590
4591 // Reverse the elements as we're going to slide up from the last element.
4592 std::reverse(Operands.begin(), Operands.end());
4593 }
4594
4595 SDValue Vec;
4596 UndefCount = 0;
4597 for (SDValue V : Operands) {
4598 if (V.isUndef()) {
4599 UndefCount++;
4600 continue;
4601 }
4602
4603 // Start our sequence with either a TA splat or extract source in the
4604 // hopes that hardware is able to recognize there's no dependency on the
4605 // prior value of our temporary register.
4606 if (!Vec) {
4607 if (SlideUp) {
4608 Vec = EVec;
4609 } else {
4610 Vec = DAG.getSplatVector(VT, DL, V);
4611 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4612 }
4613
4614 UndefCount = 0;
4615 continue;
4616 }
4617
4618 if (UndefCount) {
4619 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4620 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4621 VL);
4622 UndefCount = 0;
4623 }
4624
4625 unsigned Opcode;
4626 if (VT.isFloatingPoint())
4627 Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
4628 else
4629 Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
4630
4631 if (!VT.isFloatingPoint())
4632 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4633 Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4634 V, Mask, VL);
4635 }
4636 if (UndefCount) {
4637 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4638 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4639 VL);
4640 }
4641 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4642}
4643
4644static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4646 SelectionDAG &DAG) {
4647 if (!Passthru)
4648 Passthru = DAG.getUNDEF(VT);
4650 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4651 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4652 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4653 // node in order to try and match RVV vector/scalar instructions.
4654 if ((LoC >> 31) == HiC)
4655 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4656
4657 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4658 // VL. This can temporarily increase VL if VL less than VLMAX.
4659 if (LoC == HiC) {
4660 SDValue NewVL;
4661 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4662 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4663 else
4664 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4665 MVT InterVT =
4666 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4667 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4668 DAG.getUNDEF(InterVT), Lo, NewVL);
4669 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4670 }
4671 }
4672
4673 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4674 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4675 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4676 Hi.getConstantOperandVal(1) == 31)
4677 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4678
4679 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4680 // even if it might be sign extended.
4681 if (Hi.isUndef())
4682 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4683
4684 // Fall back to a stack store and stride x0 vector load.
4685 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4686 Hi, VL);
4687}
4688
4689// Called by type legalization to handle splat of i64 on RV32.
4690// FIXME: We can optimize this when the type has sign or zero bits in one
4691// of the halves.
4692static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4693 SDValue Scalar, SDValue VL,
4694 SelectionDAG &DAG) {
4695 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4696 SDValue Lo, Hi;
4697 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4698 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4699}
4700
4701// This function lowers a splat of a scalar operand Splat with the vector
4702// length VL. It ensures the final sequence is type legal, which is useful when
4703// lowering a splat after type legalization.
4704static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4705 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4706 const RISCVSubtarget &Subtarget) {
4707 bool HasPassthru = Passthru && !Passthru.isUndef();
4708 if (!HasPassthru && !Passthru)
4709 Passthru = DAG.getUNDEF(VT);
4710
4711 MVT EltVT = VT.getVectorElementType();
4712 MVT XLenVT = Subtarget.getXLenVT();
4713
4714 if (VT.isFloatingPoint()) {
4715 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4716 EltVT == MVT::bf16) {
4717 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4718 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4719 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4720 else
4721 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4722 MVT IVT = VT.changeVectorElementType(MVT::i16);
4723 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4724 SDValue Splat =
4725 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4726 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4727 }
4728 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4729 }
4730
4731 // Simplest case is that the operand needs to be promoted to XLenVT.
4732 if (Scalar.getValueType().bitsLE(XLenVT)) {
4733 // If the operand is a constant, sign extend to increase our chances
4734 // of being able to use a .vi instruction. ANY_EXTEND would become a
4735 // a zero extend and the simm5 check in isel would fail.
4736 // FIXME: Should we ignore the upper bits in isel instead?
4737 unsigned ExtOpc =
4739 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4740 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4741 }
4742
4743 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4744 "Unexpected scalar for splat lowering!");
4745
4746 if (isOneConstant(VL) && isNullConstant(Scalar))
4747 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4748 DAG.getConstant(0, DL, XLenVT), VL);
4749
4750 // Otherwise use the more complicated splatting algorithm.
4751 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4752}
4753
4754// This function lowers an insert of a scalar operand Scalar into lane
4755// 0 of the vector regardless of the value of VL. The contents of the
4756// remaining lanes of the result vector are unspecified. VL is assumed
4757// to be non-zero.
4759 const SDLoc &DL, SelectionDAG &DAG,
4760 const RISCVSubtarget &Subtarget) {
4761 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4762
4763 const MVT XLenVT = Subtarget.getXLenVT();
4764 SDValue Passthru = DAG.getUNDEF(VT);
4765
4766 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4767 isNullConstant(Scalar.getOperand(1))) {
4768 SDValue ExtractedVal = Scalar.getOperand(0);
4769 // The element types must be the same.
4770 if (ExtractedVal.getValueType().getVectorElementType() ==
4771 VT.getVectorElementType()) {
4772 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4773 MVT ExtractedContainerVT = ExtractedVT;
4774 if (ExtractedContainerVT.isFixedLengthVector()) {
4775 ExtractedContainerVT = getContainerForFixedLengthVector(
4776 DAG, ExtractedContainerVT, Subtarget);
4777 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4778 ExtractedVal, DAG, Subtarget);
4779 }
4780 if (ExtractedContainerVT.bitsLE(VT))
4781 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
4782 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
4783 }
4784 }
4785
4786 if (VT.isFloatingPoint())
4787 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4788 VL);
4789
4790 // Avoid the tricky legalization cases by falling back to using the
4791 // splat code which already handles it gracefully.
4792 if (!Scalar.getValueType().bitsLE(XLenVT))
4793 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4794 DAG.getConstant(1, DL, XLenVT),
4795 VT, DL, DAG, Subtarget);
4796
4797 // If the operand is a constant, sign extend to increase our chances
4798 // of being able to use a .vi instruction. ANY_EXTEND would become a
4799 // a zero extend and the simm5 check in isel would fail.
4800 // FIXME: Should we ignore the upper bits in isel instead?
4801 unsigned ExtOpc =
4803 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4804 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4805 VL);
4806}
4807
4808/// If concat_vector(V1,V2) could be folded away to some existing
4809/// vector source, return it. Note that the source may be larger
4810/// than the requested concat_vector (i.e. a extract_subvector
4811/// might be required.)
4813 EVT VT = V1.getValueType();
4814 assert(VT == V2.getValueType() && "argument types must match");
4815 // Both input must be extracts.
4816 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4818 return SDValue();
4819
4820 // Extracting from the same source.
4821 SDValue Src = V1.getOperand(0);
4822 if (Src != V2.getOperand(0) ||
4823 VT.isScalableVector() != Src.getValueType().isScalableVector())
4824 return SDValue();
4825
4826 // The extracts must extract the two halves of the source.
4827 if (V1.getConstantOperandVal(1) != 0 ||
4829 return SDValue();
4830
4831 return Src;
4832}
4833
4834// Can this shuffle be performed on exactly one (possibly larger) input?
4836
4837 if (V2.isUndef())
4838 return V1;
4839
4840 unsigned NumElts = VT.getVectorNumElements();
4841 // Src needs to have twice the number of elements.
4842 // TODO: Update shuffle lowering to add the extract subvector
4843 if (SDValue Src = foldConcatVector(V1, V2);
4844 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4845 return Src;
4846
4847 return SDValue();
4848}
4849
4850/// Is this shuffle interleaving contiguous elements from one vector into the
4851/// even elements and contiguous elements from another vector into the odd
4852/// elements. \p EvenSrc will contain the element that should be in the first
4853/// even element. \p OddSrc will contain the element that should be in the first
4854/// odd element. These can be the first element in a source or the element half
4855/// way through the source.
4856static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4857 int &OddSrc, const RISCVSubtarget &Subtarget) {
4858 // We need to be able to widen elements to the next larger integer type or
4859 // use the zip2a instruction at e64.
4860 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
4861 !Subtarget.hasVendorXRivosVizip())
4862 return false;
4863
4864 int Size = Mask.size();
4865 int NumElts = VT.getVectorNumElements();
4866 assert(Size == (int)NumElts && "Unexpected mask size");
4867
4868 SmallVector<unsigned, 2> StartIndexes;
4869 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4870 return false;
4871
4872 EvenSrc = StartIndexes[0];
4873 OddSrc = StartIndexes[1];
4874
4875 // One source should be low half of first vector.
4876 if (EvenSrc != 0 && OddSrc != 0)
4877 return false;
4878
4879 // Subvectors will be subtracted from either at the start of the two input
4880 // vectors, or at the start and middle of the first vector if it's an unary
4881 // interleave.
4882 // In both cases, HalfNumElts will be extracted.
4883 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4884 // we'll create an illegal extract_subvector.
4885 // FIXME: We could support other values using a slidedown first.
4886 int HalfNumElts = NumElts / 2;
4887 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4888}
4889
4890/// Is this mask representing a masked combination of two slides?
4892 std::array<std::pair<int, int>, 2> &SrcInfo) {
4893 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
4894 return false;
4895
4896 // Avoid matching vselect idioms
4897 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
4898 return false;
4899 // Prefer vslideup as the second instruction, and identity
4900 // only as the initial instruction.
4901 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
4902 SrcInfo[1].second == 0)
4903 std::swap(SrcInfo[0], SrcInfo[1]);
4904 assert(SrcInfo[0].first != -1 && "Must find one slide");
4905 return true;
4906}
4907
4908// Exactly matches the semantics of a previously existing custom matcher
4909// to allow migration to new matcher without changing output.
4910static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
4911 unsigned NumElts) {
4912 if (SrcInfo[1].first == -1)
4913 return true;
4914 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
4915 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
4916}
4917
4918static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
4919 ArrayRef<int> Mask, unsigned Factor,
4920 bool RequiredPolarity) {
4921 int NumElts = Mask.size();
4922 for (const auto &[Idx, M] : enumerate(Mask)) {
4923 if (M < 0)
4924 continue;
4925 int Src = M >= NumElts;
4926 int Diff = (int)Idx - (M % NumElts);
4927 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
4928 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
4929 "Must match exactly one of the two slides");
4930 if (RequiredPolarity != (C == (Idx / Factor) % 2))
4931 return false;
4932 }
4933 return true;
4934}
4935
4936/// Given a shuffle which can be represented as a pair of two slides,
4937/// see if it is a zipeven idiom. Zipeven is:
4938/// vs2: a0 a1 a2 a3
4939/// vs1: b0 b1 b2 b3
4940/// vd: a0 b0 a2 b2
4941static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
4942 ArrayRef<int> Mask, unsigned &Factor) {
4943 Factor = SrcInfo[1].second;
4944 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4945 Mask.size() % Factor == 0 &&
4946 isAlternating(SrcInfo, Mask, Factor, true);
4947}
4948
4949/// Given a shuffle which can be represented as a pair of two slides,
4950/// see if it is a zipodd idiom. Zipodd is:
4951/// vs2: a0 a1 a2 a3
4952/// vs1: b0 b1 b2 b3
4953/// vd: a1 b1 a3 b3
4954/// Note that the operand order is swapped due to the way we canonicalize
4955/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
4956static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
4957 ArrayRef<int> Mask, unsigned &Factor) {
4958 Factor = -SrcInfo[1].second;
4959 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4960 Mask.size() % Factor == 0 &&
4961 isAlternating(SrcInfo, Mask, Factor, false);
4962}
4963
4964// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4965// 2, 4, 8 and the integer type Factor-times larger than VT's
4966// element type must be a legal element type.
4967// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4968// -> [p, q, r, s] (Factor=2, Index=1)
4970 SDValue Src, unsigned Factor,
4971 unsigned Index, SelectionDAG &DAG) {
4972 unsigned EltBits = VT.getScalarSizeInBits();
4973 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4974 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4975 SrcEC.divideCoefficientBy(Factor));
4976 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4977 SrcEC.divideCoefficientBy(Factor));
4978 Src = DAG.getBitcast(WideSrcVT, Src);
4979
4980 unsigned Shift = Index * EltBits;
4981 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4982 DAG.getConstant(Shift, DL, WideSrcVT));
4983 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4985 Res = DAG.getBitcast(CastVT, Res);
4986 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
4987}
4988
4989/// Match a single source shuffle which is an identity except that some
4990/// particular element is repeated. This can be lowered as a masked
4991/// vrgather.vi/vx. Note that the two source form of this is handled
4992/// by the recursive splitting logic and doesn't need special handling.
4994 const RISCVSubtarget &Subtarget,
4995 SelectionDAG &DAG) {
4996
4997 SDLoc DL(SVN);
4998 MVT VT = SVN->getSimpleValueType(0);
4999 SDValue V1 = SVN->getOperand(0);
5000 assert(SVN->getOperand(1).isUndef());
5001 ArrayRef<int> Mask = SVN->getMask();
5002 const unsigned NumElts = VT.getVectorNumElements();
5003 MVT XLenVT = Subtarget.getXLenVT();
5004
5005 std::optional<int> SplatIdx;
5006 for (auto [I, M] : enumerate(Mask)) {
5007 if (M == -1 || I == (unsigned)M)
5008 continue;
5009 if (SplatIdx && *SplatIdx != M)
5010 return SDValue();
5011 SplatIdx = M;
5012 }
5013
5014 if (!SplatIdx)
5015 return SDValue();
5016
5017 SmallVector<SDValue> MaskVals;
5018 for (int MaskIndex : Mask) {
5019 bool SelectMaskVal = MaskIndex == *SplatIdx;
5020 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5021 }
5022 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5023 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5024 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5025 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
5026 SmallVector<int>(NumElts, *SplatIdx));
5027 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
5028}
5029
5030// Lower the following shuffle to vslidedown.
5031// a)
5032// t49: v8i8 = extract_subvector t13, Constant:i64<0>
5033// t109: v8i8 = extract_subvector t13, Constant:i64<8>
5034// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
5035// b)
5036// t69: v16i16 = extract_subvector t68, Constant:i64<0>
5037// t23: v8i16 = extract_subvector t69, Constant:i64<0>
5038// t29: v4i16 = extract_subvector t23, Constant:i64<4>
5039// t26: v8i16 = extract_subvector t69, Constant:i64<8>
5040// t30: v4i16 = extract_subvector t26, Constant:i64<0>
5041// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
5043 SDValue V1, SDValue V2,
5044 ArrayRef<int> Mask,
5045 const RISCVSubtarget &Subtarget,
5046 SelectionDAG &DAG) {
5047 auto findNonEXTRACT_SUBVECTORParent =
5048 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
5049 uint64_t Offset = 0;
5050 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5051 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
5052 // a scalable vector. But we don't want to match the case.
5053 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
5054 Offset += Parent.getConstantOperandVal(1);
5055 Parent = Parent.getOperand(0);
5056 }
5057 return std::make_pair(Parent, Offset);
5058 };
5059
5060 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
5061 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
5062
5063 // Extracting from the same source.
5064 SDValue Src = V1Src;
5065 if (Src != V2Src)
5066 return SDValue();
5067
5068 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
5069 SmallVector<int, 16> NewMask(Mask);
5070 for (size_t i = 0; i != NewMask.size(); ++i) {
5071 if (NewMask[i] == -1)
5072 continue;
5073
5074 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
5075 NewMask[i] = NewMask[i] + V1IndexOffset;
5076 } else {
5077 // Minus NewMask.size() is needed. Otherwise, the b case would be
5078 // <5,6,7,12> instead of <5,6,7,8>.
5079 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
5080 }
5081 }
5082
5083 // First index must be known and non-zero. It will be used as the slidedown
5084 // amount.
5085 if (NewMask[0] <= 0)
5086 return SDValue();
5087
5088 // NewMask is also continuous.
5089 for (unsigned i = 1; i != NewMask.size(); ++i)
5090 if (NewMask[i - 1] + 1 != NewMask[i])
5091 return SDValue();
5092
5093 MVT XLenVT = Subtarget.getXLenVT();
5094 MVT SrcVT = Src.getSimpleValueType();
5095 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5096 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5097 SDValue Slidedown =
5098 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5099 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5100 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5101 return DAG.getExtractSubvector(
5102 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5103}
5104
5105// Because vslideup leaves the destination elements at the start intact, we can
5106// use it to perform shuffles that insert subvectors:
5107//
5108// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5109// ->
5110// vsetvli zero, 8, e8, mf2, ta, ma
5111// vslideup.vi v8, v9, 4
5112//
5113// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5114// ->
5115// vsetvli zero, 5, e8, mf2, tu, ma
5116// vslideup.v1 v8, v9, 2
5118 SDValue V1, SDValue V2,
5119 ArrayRef<int> Mask,
5120 const RISCVSubtarget &Subtarget,
5121 SelectionDAG &DAG) {
5122 unsigned NumElts = VT.getVectorNumElements();
5123 int NumSubElts, Index;
5124 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5125 Index))
5126 return SDValue();
5127
5128 bool OpsSwapped = Mask[Index] < (int)NumElts;
5129 SDValue InPlace = OpsSwapped ? V2 : V1;
5130 SDValue ToInsert = OpsSwapped ? V1 : V2;
5131
5132 MVT XLenVT = Subtarget.getXLenVT();
5133 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5134 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5135 // We slide up by the index that the subvector is being inserted at, and set
5136 // VL to the index + the number of elements being inserted.
5137 unsigned Policy =
5139 // If the we're adding a suffix to the in place vector, i.e. inserting right
5140 // up to the very end of it, then we don't actually care about the tail.
5141 if (NumSubElts + Index >= (int)NumElts)
5142 Policy |= RISCVVType::TAIL_AGNOSTIC;
5143
5144 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5145 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5146 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5147
5148 SDValue Res;
5149 // If we're inserting into the lowest elements, use a tail undisturbed
5150 // vmv.v.v.
5151 if (Index == 0)
5152 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5153 VL);
5154 else
5155 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5156 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5157 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5158}
5159
5160/// Match v(f)slide1up/down idioms. These operations involve sliding
5161/// N-1 elements to make room for an inserted scalar at one end.
5163 SDValue V1, SDValue V2,
5164 ArrayRef<int> Mask,
5165 const RISCVSubtarget &Subtarget,
5166 SelectionDAG &DAG) {
5167 bool OpsSwapped = false;
5168 if (!isa<BuildVectorSDNode>(V1)) {
5169 if (!isa<BuildVectorSDNode>(V2))
5170 return SDValue();
5171 std::swap(V1, V2);
5172 OpsSwapped = true;
5173 }
5174 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5175 if (!Splat)
5176 return SDValue();
5177
5178 // Return true if the mask could describe a slide of Mask.size() - 1
5179 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5180 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5181 const unsigned S = (Offset > 0) ? 0 : -Offset;
5182 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5183 for (unsigned i = S; i != E; ++i)
5184 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5185 return false;
5186 return true;
5187 };
5188
5189 const unsigned NumElts = VT.getVectorNumElements();
5190 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5191 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5192 return SDValue();
5193
5194 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5195 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5196 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5197 return SDValue();
5198
5199 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5200 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5201
5202 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5203 // vslide1{down,up}.vx instead.
5204 if (VT.getVectorElementType() == MVT::bf16 ||
5205 (VT.getVectorElementType() == MVT::f16 &&
5206 !Subtarget.hasVInstructionsF16())) {
5207 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5208 Splat =
5209 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5210 V2 = DAG.getBitcast(
5211 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5212 SDValue Vec = DAG.getNode(
5213 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5214 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5215 Vec = DAG.getBitcast(ContainerVT, Vec);
5216 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5217 }
5218
5219 auto OpCode = IsVSlidedown ?
5220 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5221 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5222 if (!VT.isFloatingPoint())
5223 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5224 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5225 DAG.getUNDEF(ContainerVT),
5226 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5227 Splat, TrueMask, VL);
5228 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5229}
5230
5231/// Match a mask which "spreads" the leading elements of a vector evenly
5232/// across the result. Factor is the spread amount, and Index is the
5233/// offset applied. (on success, Index < Factor) This is the inverse
5234/// of a deinterleave with the same Factor and Index. This is analogous
5235/// to an interleave, except that all but one lane is undef.
5237 unsigned &Index) {
5238 SmallVector<bool> LaneIsUndef(Factor, true);
5239 for (unsigned i = 0; i < Mask.size(); i++)
5240 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5241
5242 bool Found = false;
5243 for (unsigned i = 0; i < Factor; i++) {
5244 if (LaneIsUndef[i])
5245 continue;
5246 if (Found)
5247 return false;
5248 Index = i;
5249 Found = true;
5250 }
5251 if (!Found)
5252 return false;
5253
5254 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5255 unsigned j = i * Factor + Index;
5256 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5257 return false;
5258 }
5259 return true;
5260}
5261
5262static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5263 const SDLoc &DL, SelectionDAG &DAG,
5264 const RISCVSubtarget &Subtarget) {
5265 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5266 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5267 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5269
5270 MVT VT = Op0.getSimpleValueType();
5272 Op0 = DAG.getBitcast(IntVT, Op0);
5273 Op1 = DAG.getBitcast(IntVT, Op1);
5274
5275 MVT ContainerVT = IntVT;
5276 if (VT.isFixedLengthVector()) {
5277 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5278 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5279 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5280 }
5281
5282 MVT InnerVT = ContainerVT;
5283 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5284 if (Op1.isUndef() &&
5285 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5286 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5287 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5288 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5289 Subtarget.getXLenVT());
5290 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5291 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5292 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5293 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5294 }
5295
5296 SDValue Passthru = DAG.getUNDEF(InnerVT);
5297 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5298 if (InnerVT.bitsLT(ContainerVT))
5299 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5300 if (IntVT.isFixedLengthVector())
5301 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5302 Res = DAG.getBitcast(VT, Res);
5303 return Res;
5304}
5305
5306// Given a vector a, b, c, d return a vector Factor times longer
5307// with Factor-1 undef's between elements. Ex:
5308// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5309// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5310static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5311 const SDLoc &DL, SelectionDAG &DAG) {
5312
5313 MVT VT = V.getSimpleValueType();
5314 unsigned EltBits = VT.getScalarSizeInBits();
5316 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5317
5318 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5319
5320 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5321 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5322 // allow the SHL to fold away if Index is 0.
5323 if (Index != 0)
5324 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5325 DAG.getConstant(EltBits * Index, DL, WideVT));
5326 // Make sure to use original element type
5328 EC.multiplyCoefficientBy(Factor));
5329 return DAG.getBitcast(ResultVT, Result);
5330}
5331
5332// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5333// to create an interleaved vector of <[vscale x] n*2 x ty>.
5334// This requires that the size of ty is less than the subtarget's maximum ELEN.
5336 const SDLoc &DL, SelectionDAG &DAG,
5337 const RISCVSubtarget &Subtarget) {
5338
5339 // FIXME: Not only does this optimize the code, it fixes some correctness
5340 // issues because MIR does not have freeze.
5341 if (EvenV.isUndef())
5342 return getWideningSpread(OddV, 2, 1, DL, DAG);
5343 if (OddV.isUndef())
5344 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5345
5346 MVT VecVT = EvenV.getSimpleValueType();
5347 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5348 // Convert fixed vectors to scalable if needed
5349 if (VecContainerVT.isFixedLengthVector()) {
5350 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5351 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5352 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5353 }
5354
5355 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5356
5357 // We're working with a vector of the same size as the resulting
5358 // interleaved vector, but with half the number of elements and
5359 // twice the SEW (Hence the restriction on not using the maximum
5360 // ELEN)
5361 MVT WideVT =
5363 VecVT.getVectorElementCount());
5364 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5365 if (WideContainerVT.isFixedLengthVector())
5366 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5367
5368 // Bitcast the input vectors to integers in case they are FP
5369 VecContainerVT = VecContainerVT.changeTypeToInteger();
5370 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5371 OddV = DAG.getBitcast(VecContainerVT, OddV);
5372
5373 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5374 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5375
5376 SDValue Interleaved;
5377 if (Subtarget.hasStdExtZvbb()) {
5378 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5379 SDValue OffsetVec =
5380 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5381 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5382 OffsetVec, Passthru, Mask, VL);
5383 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5384 Interleaved, EvenV, Passthru, Mask, VL);
5385 } else {
5386 // FIXME: We should freeze the odd vector here. We already handled the case
5387 // of provably undef/poison above.
5388
5389 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5390 // vwaddu.vv
5391 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5392 OddV, Passthru, Mask, VL);
5393
5394 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5395 SDValue AllOnesVec = DAG.getSplatVector(
5396 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5397 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5398 OddV, AllOnesVec, Passthru, Mask, VL);
5399
5400 // Add the two together so we get
5401 // (OddV * 0xff...ff) + (OddV + EvenV)
5402 // = (OddV * 0x100...00) + EvenV
5403 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5404 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5405 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5406 Interleaved, OddsMul, Passthru, Mask, VL);
5407 }
5408
5409 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5410 MVT ResultContainerVT = MVT::getVectorVT(
5411 VecVT.getVectorElementType(), // Make sure to use original type
5412 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5413 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5414
5415 // Convert back to a fixed vector if needed
5416 MVT ResultVT =
5419 if (ResultVT.isFixedLengthVector())
5420 Interleaved =
5421 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5422
5423 return Interleaved;
5424}
5425
5426// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5427// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5429 SelectionDAG &DAG,
5430 const RISCVSubtarget &Subtarget) {
5431 SDLoc DL(SVN);
5432 MVT VT = SVN->getSimpleValueType(0);
5433 SDValue V = SVN->getOperand(0);
5434 unsigned NumElts = VT.getVectorNumElements();
5435
5436 assert(VT.getVectorElementType() == MVT::i1);
5437
5439 SVN->getMask().size()) ||
5440 !SVN->getOperand(1).isUndef())
5441 return SDValue();
5442
5443 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5444 EVT ViaVT = EVT::getVectorVT(
5445 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5446 EVT ViaBitVT =
5447 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5448
5449 // If we don't have zvbb or the larger element type > ELEN, the operation will
5450 // be illegal.
5452 ViaVT) ||
5453 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5454 return SDValue();
5455
5456 // If the bit vector doesn't fit exactly into the larger element type, we need
5457 // to insert it into the larger vector and then shift up the reversed bits
5458 // afterwards to get rid of the gap introduced.
5459 if (ViaEltSize > NumElts)
5460 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5461
5462 SDValue Res =
5463 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5464
5465 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5466 // element type.
5467 if (ViaEltSize > NumElts)
5468 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5469 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5470
5471 Res = DAG.getBitcast(ViaBitVT, Res);
5472
5473 if (ViaEltSize > NumElts)
5474 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5475 return Res;
5476}
5477
5479 const RISCVSubtarget &Subtarget,
5480 MVT &RotateVT, unsigned &RotateAmt) {
5481 unsigned NumElts = VT.getVectorNumElements();
5482 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5483 unsigned NumSubElts;
5484 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5485 NumElts, NumSubElts, RotateAmt))
5486 return false;
5487 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5488 NumElts / NumSubElts);
5489
5490 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5491 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5492}
5493
5494// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5495// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5496// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5498 SelectionDAG &DAG,
5499 const RISCVSubtarget &Subtarget) {
5500 SDLoc DL(SVN);
5501
5502 EVT VT = SVN->getValueType(0);
5503 unsigned RotateAmt;
5504 MVT RotateVT;
5505 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5506 return SDValue();
5507
5508 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5509
5510 SDValue Rotate;
5511 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5512 // so canonicalize to vrev8.
5513 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5514 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5515 else
5516 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5517 DAG.getConstant(RotateAmt, DL, RotateVT));
5518
5519 return DAG.getBitcast(VT, Rotate);
5520}
5521
5522// If compiling with an exactly known VLEN, see if we can split a
5523// shuffle on m2 or larger into a small number of m1 sized shuffles
5524// which write each destination registers exactly once.
5526 SelectionDAG &DAG,
5527 const RISCVSubtarget &Subtarget) {
5528 SDLoc DL(SVN);
5529 MVT VT = SVN->getSimpleValueType(0);
5530 SDValue V1 = SVN->getOperand(0);
5531 SDValue V2 = SVN->getOperand(1);
5532 ArrayRef<int> Mask = SVN->getMask();
5533
5534 // If we don't know exact data layout, not much we can do. If this
5535 // is already m1 or smaller, no point in splitting further.
5536 const auto VLen = Subtarget.getRealVLen();
5537 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5538 return SDValue();
5539
5540 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5541 // expansion for.
5542 unsigned RotateAmt;
5543 MVT RotateVT;
5544 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5545 return SDValue();
5546
5547 MVT ElemVT = VT.getVectorElementType();
5548 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5549
5550 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5551 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5552 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5553 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5554 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5555 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5556 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5557 unsigned NumOfDestRegs = NumElts / NumOpElts;
5558 // The following semantically builds up a fixed length concat_vector
5559 // of the component shuffle_vectors. We eagerly lower to scalable here
5560 // to avoid DAG combining it back to a large shuffle_vector again.
5561 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5562 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5564 Operands;
5566 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5567 [&]() { Operands.emplace_back(); },
5568 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5569 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5570 SmallVector<int>(SrcSubMask));
5571 },
5572 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5573 if (NewReg)
5574 Operands.emplace_back();
5575 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5576 });
5577 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5578 // Note: check that we do not emit too many shuffles here to prevent code
5579 // size explosion.
5580 // TODO: investigate, if it can be improved by extra analysis of the masks to
5581 // check if the code is more profitable.
5582 unsigned NumShuffles = std::accumulate(
5583 Operands.begin(), Operands.end(), 0u,
5584 [&](unsigned N,
5585 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5586 if (Data.empty())
5587 return N;
5588 N += Data.size();
5589 for (const auto &P : Data) {
5590 unsigned Idx2 = std::get<1>(P);
5591 ArrayRef<int> Mask = std::get<2>(P);
5592 if (Idx2 != UINT_MAX)
5593 ++N;
5594 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5595 --N;
5596 }
5597 return N;
5598 });
5599 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5600 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5601 return SDValue();
5602 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5603 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
5604 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5605 return SubVec;
5606 };
5607 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5609 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5610 return SubVec;
5611 };
5612 SDValue Vec = DAG.getUNDEF(ContainerVT);
5613 for (auto [I, Data] : enumerate(Operands)) {
5614 if (Data.empty())
5615 continue;
5617 for (unsigned I : seq<unsigned>(Data.size())) {
5618 const auto &[Idx1, Idx2, _] = Data[I];
5619 // If the shuffle contains permutation of odd number of elements,
5620 // Idx1 might be used already in the first iteration.
5621 //
5622 // Idx1 = shuffle Idx1, Idx2
5623 // Idx1 = shuffle Idx1, Idx3
5624 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5625 if (!V)
5626 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5627 (Idx1 % NumOfSrcRegs) * NumOpElts);
5628 if (Idx2 != UINT_MAX) {
5629 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5630 if (!V)
5631 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5632 (Idx2 % NumOfSrcRegs) * NumOpElts);
5633 }
5634 }
5635 SDValue V;
5636 for (const auto &[Idx1, Idx2, Mask] : Data) {
5637 SDValue V1 = Values.at(Idx1);
5638 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5639 V = PerformShuffle(V1, V2, Mask);
5640 Values[Idx1] = V;
5641 }
5642
5643 unsigned InsertIdx = I * NumOpElts;
5644 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5645 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
5646 }
5647 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5648}
5649
5650// Matches a subset of compress masks with a contiguous prefix of output
5651// elements. This could be extended to allow gaps by deciding which
5652// source elements to spuriously demand.
5654 int Last = -1;
5655 bool SawUndef = false;
5656 for (const auto &[Idx, M] : enumerate(Mask)) {
5657 if (M == -1) {
5658 SawUndef = true;
5659 continue;
5660 }
5661 if (SawUndef)
5662 return false;
5663 if (Idx > (unsigned)M)
5664 return false;
5665 if (M <= Last)
5666 return false;
5667 Last = M;
5668 }
5669 return true;
5670}
5671
5672/// Given a shuffle where the indices are disjoint between the two sources,
5673/// e.g.:
5674///
5675/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5676///
5677/// Merge the two sources into one and do a single source shuffle:
5678///
5679/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5680/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5681///
5682/// A vselect will either be merged into a masked instruction or be lowered as a
5683/// vmerge.vvm, which is cheaper than a vrgather.vv.
5685 SelectionDAG &DAG,
5686 const RISCVSubtarget &Subtarget) {
5687 MVT VT = SVN->getSimpleValueType(0);
5688 MVT XLenVT = Subtarget.getXLenVT();
5689 SDLoc DL(SVN);
5690
5691 const ArrayRef<int> Mask = SVN->getMask();
5692
5693 // Work out which source each lane will come from.
5694 SmallVector<int, 16> Srcs(Mask.size(), -1);
5695
5696 for (int Idx : Mask) {
5697 if (Idx == -1)
5698 continue;
5699 unsigned SrcIdx = Idx % Mask.size();
5700 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5701 if (Srcs[SrcIdx] == -1)
5702 // Mark this source as using this lane.
5703 Srcs[SrcIdx] = Src;
5704 else if (Srcs[SrcIdx] != Src)
5705 // The other source is using this lane: not disjoint.
5706 return SDValue();
5707 }
5708
5709 SmallVector<SDValue> SelectMaskVals;
5710 for (int Lane : Srcs) {
5711 if (Lane == -1)
5712 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5713 else
5714 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5715 }
5716 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5717 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5718 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5719 SVN->getOperand(0), SVN->getOperand(1));
5720
5721 // Move all indices relative to the first source.
5722 SmallVector<int> NewMask(Mask.size());
5723 for (unsigned I = 0; I < Mask.size(); I++) {
5724 if (Mask[I] == -1)
5725 NewMask[I] = -1;
5726 else
5727 NewMask[I] = Mask[I] % Mask.size();
5728 }
5729
5730 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5731}
5732
5733/// Is this mask local (i.e. elements only move within their local span), and
5734/// repeating (that is, the same rearrangement is being done within each span)?
5735static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5736 // Require a prefix from the original mask until the consumer code
5737 // is adjusted to rewrite the mask instead of just taking a prefix.
5738 for (auto [I, M] : enumerate(Mask)) {
5739 if (M == -1)
5740 continue;
5741 if ((M / Span) != (int)(I / Span))
5742 return false;
5743 int SpanIdx = I % Span;
5744 int Expected = M % Span;
5745 if (Mask[SpanIdx] != Expected)
5746 return false;
5747 }
5748 return true;
5749}
5750
5751/// Is this mask only using elements from the first span of the input?
5752static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5753 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
5754}
5755
5756/// Return true for a mask which performs an arbitrary shuffle within the first
5757/// span, and then repeats that same result across all remaining spans. Note
5758/// that this doesn't check if all the inputs come from a single span!
5759static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5760 // Require a prefix from the original mask until the consumer code
5761 // is adjusted to rewrite the mask instead of just taking a prefix.
5762 for (auto [I, M] : enumerate(Mask)) {
5763 if (M == -1)
5764 continue;
5765 int SpanIdx = I % Span;
5766 if (Mask[SpanIdx] != M)
5767 return false;
5768 }
5769 return true;
5770}
5771
5772/// Try to widen element type to get a new mask value for a better permutation
5773/// sequence. This doesn't try to inspect the widened mask for profitability;
5774/// we speculate the widened form is equal or better. This has the effect of
5775/// reducing mask constant sizes - allowing cheaper materialization sequences
5776/// - and index sequence sizes - reducing register pressure and materialization
5777/// cost, at the cost of (possibly) an extra VTYPE toggle.
5779 SDLoc DL(Op);
5780 MVT VT = Op.getSimpleValueType();
5781 MVT ScalarVT = VT.getVectorElementType();
5782 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5783 SDValue V0 = Op.getOperand(0);
5784 SDValue V1 = Op.getOperand(1);
5785 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5786
5787 // Avoid wasted work leading to isTypeLegal check failing below
5788 if (ElementSize > 32)
5789 return SDValue();
5790
5791 SmallVector<int, 8> NewMask;
5792 if (!widenShuffleMaskElts(Mask, NewMask))
5793 return SDValue();
5794
5795 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5796 : MVT::getIntegerVT(ElementSize * 2);
5797 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5798 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5799 return SDValue();
5800 V0 = DAG.getBitcast(NewVT, V0);
5801 V1 = DAG.getBitcast(NewVT, V1);
5802 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5803}
5804
5806 const RISCVSubtarget &Subtarget) {
5807 SDValue V1 = Op.getOperand(0);
5808 SDValue V2 = Op.getOperand(1);
5809 SDLoc DL(Op);
5810 MVT XLenVT = Subtarget.getXLenVT();
5811 MVT VT = Op.getSimpleValueType();
5812 unsigned NumElts = VT.getVectorNumElements();
5814
5815 if (VT.getVectorElementType() == MVT::i1) {
5816 // Lower to a vror.vi of a larger element type if possible before we promote
5817 // i1s to i8s.
5818 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5819 return V;
5820 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5821 return V;
5822
5823 // Promote i1 shuffle to i8 shuffle.
5824 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5825 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5826 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5827 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5828 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5829 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5830 ISD::SETNE);
5831 }
5832
5833 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5834
5835 // Store the return value in a single variable instead of structured bindings
5836 // so that we can pass it to GetSlide below, which cannot capture structured
5837 // bindings until C++20.
5838 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5839 auto [TrueMask, VL] = TrueMaskVL;
5840
5841 if (SVN->isSplat()) {
5842 const int Lane = SVN->getSplatIndex();
5843 if (Lane >= 0) {
5844 MVT SVT = VT.getVectorElementType();
5845
5846 // Turn splatted vector load into a strided load with an X0 stride.
5847 SDValue V = V1;
5848 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5849 // with undef.
5850 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5851 int Offset = Lane;
5852 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5853 int OpElements =
5854 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5855 V = V.getOperand(Offset / OpElements);
5856 Offset %= OpElements;
5857 }
5858
5859 // We need to ensure the load isn't atomic or volatile.
5860 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5861 auto *Ld = cast<LoadSDNode>(V);
5862 Offset *= SVT.getStoreSize();
5863 SDValue NewAddr = DAG.getMemBasePlusOffset(
5864 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5865
5866 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5867 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5868 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5869 SDValue IntID =
5870 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5871 SDValue Ops[] = {Ld->getChain(),
5872 IntID,
5873 DAG.getUNDEF(ContainerVT),
5874 NewAddr,
5875 DAG.getRegister(RISCV::X0, XLenVT),
5876 VL};
5877 SDValue NewLoad = DAG.getMemIntrinsicNode(
5878 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5880 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5881 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5882 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5883 }
5884
5885 MVT SplatVT = ContainerVT;
5886
5887 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5888 if (SVT == MVT::bf16 ||
5889 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5890 SVT = MVT::i16;
5891 SplatVT = ContainerVT.changeVectorElementType(SVT);
5892 }
5893
5894 // Otherwise use a scalar load and splat. This will give the best
5895 // opportunity to fold a splat into the operation. ISel can turn it into
5896 // the x0 strided load if we aren't able to fold away the select.
5897 if (SVT.isFloatingPoint())
5898 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5899 Ld->getPointerInfo().getWithOffset(Offset),
5900 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
5901 else
5902 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5903 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5904 Ld->getBaseAlign(),
5905 Ld->getMemOperand()->getFlags());
5907
5908 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5909 : RISCVISD::VMV_V_X_VL;
5910 SDValue Splat =
5911 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5912 Splat = DAG.getBitcast(ContainerVT, Splat);
5913 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5914 }
5915
5916 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5917 assert(Lane < (int)NumElts && "Unexpected lane!");
5918 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5919 V1, DAG.getConstant(Lane, DL, XLenVT),
5920 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5921 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5922 }
5923 }
5924
5925 // For exact VLEN m2 or greater, try to split to m1 operations if we
5926 // can split cleanly.
5927 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5928 return V;
5929
5930 ArrayRef<int> Mask = SVN->getMask();
5931
5932 if (SDValue V =
5933 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5934 return V;
5935
5936 if (SDValue V =
5937 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5938 return V;
5939
5940 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5941 // available.
5942 if (Subtarget.hasStdExtZvkb())
5943 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5944 return V;
5945
5946 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
5947 NumElts != 2)
5948 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5949
5950 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5951 // use shift and truncate to perform the shuffle.
5952 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5953 // shift-and-trunc reducing total cost for everything except an mf8 result.
5954 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5955 // to do the entire operation.
5956 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5957 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5958 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5959 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5960 unsigned Index = 0;
5961 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5962 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5963 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
5964 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5965 if (1 < count_if(Mask,
5966 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5967 1 < count_if(Mask, [&Mask](int Idx) {
5968 return Idx >= (int)Mask.size();
5969 })) {
5970 // Narrow each source and concatenate them.
5971 // FIXME: For small LMUL it is better to concatenate first.
5972 MVT EltVT = VT.getVectorElementType();
5973 auto EltCnt = VT.getVectorElementCount();
5974 MVT SubVT =
5975 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
5976
5977 SDValue Lo =
5978 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
5979 SDValue Hi =
5980 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
5981
5982 SDValue Concat =
5985 if (Factor == 2)
5986 return Concat;
5987
5988 SDValue Vec = DAG.getUNDEF(VT);
5989 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
5990 }
5991 }
5992 }
5993 }
5994
5995 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
5996 // e64 which can't match above.
5997 unsigned Index = 0;
5998 if (Subtarget.hasVendorXRivosVizip() &&
6000 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
6001 unsigned Opc =
6002 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
6003 if (V2.isUndef())
6004 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6005 if (auto VLEN = Subtarget.getRealVLen();
6006 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
6007 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6008 if (SDValue Src = foldConcatVector(V1, V2)) {
6009 EVT NewVT = VT.getDoubleNumVectorElementsVT();
6010 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
6011 SDValue Res =
6012 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
6013 return DAG.getExtractSubvector(DL, VT, Res, 0);
6014 }
6015 // Deinterleave each source and concatenate them, or concat first, then
6016 // deinterleave.
6017 if (1 < count_if(Mask,
6018 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6019 1 < count_if(Mask,
6020 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
6021
6022 const unsigned EltSize = VT.getScalarSizeInBits();
6023 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
6024 if (NumElts < MinVLMAX) {
6025 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
6026 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
6027 SDValue Res =
6028 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
6029 return DAG.getExtractSubvector(DL, VT, Res, 0);
6030 }
6031
6032 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6033 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6034
6035 MVT SubVT = VT.getHalfNumVectorElementsVT();
6036 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
6037 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
6038 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
6039 }
6040 }
6041
6042 if (SDValue V =
6043 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
6044 return V;
6045
6046 // Detect an interleave shuffle and lower to
6047 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
6048 int EvenSrc, OddSrc;
6049 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
6050 !(NumElts == 2 &&
6051 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
6052 // Extract the halves of the vectors.
6053 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6054
6055 // Recognize if one half is actually undef; the matching above will
6056 // otherwise reuse the even stream for the undef one. This improves
6057 // spread(2) shuffles.
6058 bool LaneIsUndef[2] = { true, true};
6059 for (const auto &[Idx, M] : enumerate(Mask))
6060 LaneIsUndef[Idx % 2] &= (M == -1);
6061
6062 int Size = Mask.size();
6063 SDValue EvenV, OddV;
6064 if (LaneIsUndef[0]) {
6065 EvenV = DAG.getUNDEF(HalfVT);
6066 } else {
6067 assert(EvenSrc >= 0 && "Undef source?");
6068 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
6069 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
6070 }
6071
6072 if (LaneIsUndef[1]) {
6073 OddV = DAG.getUNDEF(HalfVT);
6074 } else {
6075 assert(OddSrc >= 0 && "Undef source?");
6076 OddV = (OddSrc / Size) == 0 ? V1 : V2;
6077 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
6078 }
6079
6080 // Prefer vzip2a if available.
6081 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
6082 if (Subtarget.hasVendorXRivosVizip()) {
6083 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6084 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6085 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6086 }
6087 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6088 }
6089
6090 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6091 // instructions (in any combination) with masking on the second instruction.
6092 // Also handles masked slides into an identity source, and single slides
6093 // without masking. Avoid matching bit rotates (which are not also element
6094 // rotates) as slide pairs. This is a performance heuristic, not a
6095 // functional check.
6096 std::array<std::pair<int, int>, 2> SrcInfo;
6097 unsigned RotateAmt;
6098 MVT RotateVT;
6099 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6100 (isElementRotate(SrcInfo, NumElts) ||
6101 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6102 SDValue Sources[2];
6103 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6104 int SrcIdx = Info.first;
6105 assert(SrcIdx == 0 || SrcIdx == 1);
6106 SDValue &Src = Sources[SrcIdx];
6107 if (!Src) {
6108 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6109 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6110 }
6111 return Src;
6112 };
6113 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6114 SDValue Passthru) {
6115 auto [TrueMask, VL] = TrueMaskVL;
6116 SDValue SrcV = GetSourceFor(Src);
6117 int SlideAmt = Src.second;
6118 if (SlideAmt == 0) {
6119 // Should never be second operation
6120 assert(Mask == TrueMask);
6121 return SrcV;
6122 }
6123 if (SlideAmt < 0)
6124 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6125 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6127 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6128 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6130 };
6131
6132 if (SrcInfo[1].first == -1) {
6133 SDValue Res = DAG.getUNDEF(ContainerVT);
6134 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6135 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6136 }
6137
6138 if (Subtarget.hasVendorXRivosVizip()) {
6139 bool TryWiden = false;
6140 unsigned Factor;
6141 if (isZipEven(SrcInfo, Mask, Factor)) {
6142 if (Factor == 1) {
6143 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6144 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6145 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6146 Subtarget);
6147 }
6148 TryWiden = true;
6149 }
6150 if (isZipOdd(SrcInfo, Mask, Factor)) {
6151 if (Factor == 1) {
6152 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6153 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6154 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6155 Subtarget);
6156 }
6157 TryWiden = true;
6158 }
6159 // If we found a widening oppurtunity which would let us form a
6160 // zipeven or zipodd, use the generic code to widen the shuffle
6161 // and recurse through this logic.
6162 if (TryWiden)
6163 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6164 return V;
6165 }
6166
6167 // Build the mask. Note that vslideup unconditionally preserves elements
6168 // below the slide amount in the destination, and thus those elements are
6169 // undefined in the mask. If the mask ends up all true (or undef), it
6170 // will be folded away by general logic.
6171 SmallVector<SDValue> MaskVals;
6172 for (const auto &[Idx, M] : enumerate(Mask)) {
6173 if (M < 0 ||
6174 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6175 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6176 continue;
6177 }
6178 int Src = M >= (int)NumElts;
6179 int Diff = (int)Idx - (M % NumElts);
6180 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6181 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6182 "Must match exactly one of the two slides");
6183 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6184 }
6185 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6186 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6187 SDValue SelectMask = convertToScalableVector(
6188 ContainerVT.changeVectorElementType(MVT::i1),
6189 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6190
6191 SDValue Res = DAG.getUNDEF(ContainerVT);
6192 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6193 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6194 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6195 }
6196
6197 // Handle any remaining single source shuffles
6198 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6199 if (V2.isUndef()) {
6200 // We might be able to express the shuffle as a bitrotate. But even if we
6201 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6202 // shifts and a vor will have a higher throughput than a vrgather.
6203 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6204 return V;
6205
6206 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6207 return V;
6208
6209 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6210 // is fully covered in interleave(2) above, so it is ignored here.
6211 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6212 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6213 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6214 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6215 unsigned Index;
6216 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6217 MVT NarrowVT =
6218 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6219 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6220 return getWideningSpread(Src, Factor, Index, DL, DAG);
6221 }
6222 }
6223 }
6224
6225 // If only a prefix of the source elements influence a prefix of the
6226 // destination elements, try to see if we can reduce the required LMUL
6227 unsigned MinVLen = Subtarget.getRealMinVLen();
6228 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6229 if (NumElts > MinVLMAX) {
6230 unsigned MaxIdx = 0;
6231 for (auto [I, M] : enumerate(Mask)) {
6232 if (M == -1)
6233 continue;
6234 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6235 }
6236 unsigned NewNumElts =
6237 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6238 if (NewNumElts != NumElts) {
6239 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6240 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6241 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6242 Mask.take_front(NewNumElts));
6243 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6244 }
6245 }
6246
6247 // Before hitting generic lowering fallbacks, try to widen the mask
6248 // to a wider SEW.
6249 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6250 return V;
6251
6252 // Can we generate a vcompress instead of a vrgather? These scale better
6253 // at high LMUL, at the cost of not being able to fold a following select
6254 // into them. The mask constants are also smaller than the index vector
6255 // constants, and thus easier to materialize.
6256 if (isCompressMask(Mask)) {
6257 SmallVector<SDValue> MaskVals(NumElts,
6258 DAG.getConstant(false, DL, XLenVT));
6259 for (auto Idx : Mask) {
6260 if (Idx == -1)
6261 break;
6262 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6263 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6264 }
6265 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6266 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6267 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6268 DAG.getUNDEF(VT));
6269 }
6270
6271 if (VT.getScalarSizeInBits() == 8 &&
6272 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6273 // On such a vector we're unable to use i8 as the index type.
6274 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6275 // may involve vector splitting if we're already at LMUL=8, or our
6276 // user-supplied maximum fixed-length LMUL.
6277 return SDValue();
6278 }
6279
6280 // Base case for the two operand recursion below - handle the worst case
6281 // single source shuffle.
6282 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6283 MVT IndexVT = VT.changeTypeToInteger();
6284 // Since we can't introduce illegal index types at this stage, use i16 and
6285 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6286 // than XLenVT.
6287 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6288 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6289 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6290 }
6291
6292 // If the mask allows, we can do all the index computation in 16 bits. This
6293 // requires less work and less register pressure at high LMUL, and creates
6294 // smaller constants which may be cheaper to materialize.
6295 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6296 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6297 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6298 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6299 }
6300
6301 MVT IndexContainerVT =
6302 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6303
6304 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6305 SmallVector<SDValue> GatherIndicesLHS;
6306 for (int MaskIndex : Mask) {
6307 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6308 GatherIndicesLHS.push_back(IsLHSIndex
6309 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6310 : DAG.getUNDEF(XLenVT));
6311 }
6312 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6313 LHSIndices =
6314 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6315 // At m1 and less, there's no point trying any of the high LMUL splitting
6316 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6317 if (NumElts <= MinVLMAX) {
6318 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6319 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6320 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6321 }
6322
6323 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6324 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6325 auto [InnerTrueMask, InnerVL] =
6326 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6327 int N =
6328 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6329 assert(isPowerOf2_32(N) && N <= 8);
6330
6331 // If we have a locally repeating mask, then we can reuse the first
6332 // register in the index register group for all registers within the
6333 // source register group. TODO: This generalizes to m2, and m4.
6334 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6335 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6336 SDValue Gather = DAG.getUNDEF(ContainerVT);
6337 for (int i = 0; i < N; i++) {
6338 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6339 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6340 SDValue SubVec =
6341 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6342 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6343 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6344 }
6345 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6346 }
6347
6348 // If we have a shuffle which only uses the first register in our source
6349 // register group, and repeats the same index across all spans, we can
6350 // use a single vrgather (and possibly some register moves).
6351 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6352 // which we can do a linear number of shuffles to form an m1 which
6353 // contains all the output elements.
6354 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6355 isSpanSplatShuffle(Mask, MinVLMAX)) {
6356 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6357 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6358 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6359 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6360 SDValue Gather = DAG.getUNDEF(ContainerVT);
6361 for (int i = 0; i < N; i++)
6362 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6363 M1VT.getVectorMinNumElements() * i);
6364 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6365 }
6366
6367 // If we have a shuffle which only uses the first register in our
6368 // source register group, we can do a linear number of m1 vrgathers
6369 // reusing the same source register (but with different indices)
6370 // TODO: This can be generalized for m2 or m4, or for any shuffle
6371 // for which we can do a vslidedown followed by this expansion.
6372 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6373 SDValue SlideAmt =
6374 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6375 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6376 SDValue Gather = DAG.getUNDEF(ContainerVT);
6377 for (int i = 0; i < N; i++) {
6378 if (i != 0)
6379 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6380 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6381 SlideAmt, TrueMask, VL);
6382 SDValue SubIndex =
6383 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6384 SDValue SubVec =
6385 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6386 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6387 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6388 M1VT.getVectorMinNumElements() * i);
6389 }
6390 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6391 }
6392
6393 // Fallback to generic vrgather if we can't find anything better.
6394 // On many machines, this will be O(LMUL^2)
6395 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6396 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6397 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6398 }
6399
6400 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6401 // merged with a second vrgather.
6402 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6403
6404 // Now construct the mask that will be used by the blended vrgather operation.
6405 // Construct the appropriate indices into each vector.
6406 for (int MaskIndex : Mask) {
6407 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6408 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6409 ? MaskIndex : -1);
6410 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6411 }
6412
6413 // If the mask indices are disjoint between the two sources, we can lower it
6414 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6415 // operands may end up being lowered to something cheaper than a vrgather.vv.
6416 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6417 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6418 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6419 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6420 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6421 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6422 return V;
6423
6424 // Before hitting generic lowering fallbacks, try to widen the mask
6425 // to a wider SEW.
6426 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6427 return V;
6428
6429 // Try to pick a profitable operand order.
6430 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6431 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6432
6433 // Recursively invoke lowering for each operand if we had two
6434 // independent single source shuffles, and then combine the result via a
6435 // vselect. Note that the vselect will likely be folded back into the
6436 // second permute (vrgather, or other) by the post-isel combine.
6437 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6438 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6439
6440 SmallVector<SDValue> MaskVals;
6441 for (int MaskIndex : Mask) {
6442 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6443 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6444 }
6445
6446 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6447 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6448 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6449
6450 if (SwapOps)
6451 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6452 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6453}
6454
6456 // Only support legal VTs for other shuffles for now.
6457 if (!isTypeLegal(VT))
6458 return false;
6459
6460 // Support splats for any type. These should type legalize well.
6462 return true;
6463
6464 const unsigned NumElts = M.size();
6465 MVT SVT = VT.getSimpleVT();
6466
6467 // Not for i1 vectors.
6468 if (SVT.getScalarType() == MVT::i1)
6469 return false;
6470
6471 std::array<std::pair<int, int>, 2> SrcInfo;
6472 int Dummy1, Dummy2;
6473 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6474 (::isMaskedSlidePair(M, SrcInfo) &&
6475 isElementRotate(SrcInfo, NumElts)) ||
6476 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6477}
6478
6479// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6480// the exponent.
6481SDValue
6482RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6483 SelectionDAG &DAG) const {
6484 MVT VT = Op.getSimpleValueType();
6485 unsigned EltSize = VT.getScalarSizeInBits();
6486 SDValue Src = Op.getOperand(0);
6487 SDLoc DL(Op);
6488 MVT ContainerVT = VT;
6489
6490 SDValue Mask, VL;
6491 if (Op->isVPOpcode()) {
6492 Mask = Op.getOperand(1);
6493 if (VT.isFixedLengthVector())
6494 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6495 Subtarget);
6496 VL = Op.getOperand(2);
6497 }
6498
6499 // We choose FP type that can represent the value if possible. Otherwise, we
6500 // use rounding to zero conversion for correct exponent of the result.
6501 // TODO: Use f16 for i8 when possible?
6502 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6503 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6504 FloatEltVT = MVT::f32;
6505 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6506
6507 // Legal types should have been checked in the RISCVTargetLowering
6508 // constructor.
6509 // TODO: Splitting may make sense in some cases.
6510 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6511 "Expected legal float type!");
6512
6513 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6514 // The trailing zero count is equal to log2 of this single bit value.
6515 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6516 SDValue Neg = DAG.getNegative(Src, DL, VT);
6517 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6518 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6519 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6520 Src, Mask, VL);
6521 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6522 }
6523
6524 // We have a legal FP type, convert to it.
6525 SDValue FloatVal;
6526 if (FloatVT.bitsGT(VT)) {
6527 if (Op->isVPOpcode())
6528 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6529 else
6530 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6531 } else {
6532 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6533 if (VT.isFixedLengthVector()) {
6534 ContainerVT = getContainerForFixedLengthVector(VT);
6535 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6536 }
6537 if (!Op->isVPOpcode())
6538 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6539 SDValue RTZRM =
6540 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
6541 MVT ContainerFloatVT =
6542 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6543 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6544 Src, Mask, RTZRM, VL);
6545 if (VT.isFixedLengthVector())
6546 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6547 }
6548 // Bitcast to integer and shift the exponent to the LSB.
6549 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6550 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6551 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6552
6553 SDValue Exp;
6554 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6555 if (Op->isVPOpcode()) {
6556 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6557 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6558 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6559 } else {
6560 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6561 DAG.getConstant(ShiftAmt, DL, IntVT));
6562 if (IntVT.bitsLT(VT))
6563 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6564 else if (IntVT.bitsGT(VT))
6565 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6566 }
6567
6568 // The exponent contains log2 of the value in biased form.
6569 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6570 // For trailing zeros, we just need to subtract the bias.
6571 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6572 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6573 DAG.getConstant(ExponentBias, DL, VT));
6574 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6575 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6576 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6577
6578 // For leading zeros, we need to remove the bias and convert from log2 to
6579 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6580 unsigned Adjust = ExponentBias + (EltSize - 1);
6581 SDValue Res;
6582 if (Op->isVPOpcode())
6583 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6584 Mask, VL);
6585 else
6586 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6587
6588 // The above result with zero input equals to Adjust which is greater than
6589 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6590 if (Op.getOpcode() == ISD::CTLZ)
6591 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6592 else if (Op.getOpcode() == ISD::VP_CTLZ)
6593 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6594 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6595 return Res;
6596}
6597
6598SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6599 SelectionDAG &DAG) const {
6600 SDLoc DL(Op);
6601 MVT XLenVT = Subtarget.getXLenVT();
6602 SDValue Source = Op->getOperand(0);
6603 MVT SrcVT = Source.getSimpleValueType();
6604 SDValue Mask = Op->getOperand(1);
6605 SDValue EVL = Op->getOperand(2);
6606
6607 if (SrcVT.isFixedLengthVector()) {
6608 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6609 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6610 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6611 Subtarget);
6612 SrcVT = ContainerVT;
6613 }
6614
6615 // Convert to boolean vector.
6616 if (SrcVT.getScalarType() != MVT::i1) {
6617 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6618 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6619 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6620 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6621 DAG.getUNDEF(SrcVT), Mask, EVL});
6622 }
6623
6624 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6625 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6626 // In this case, we can interpret poison as -1, so nothing to do further.
6627 return Res;
6628
6629 // Convert -1 to VL.
6630 SDValue SetCC =
6631 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6632 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6633 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6634}
6635
6636// While RVV has alignment restrictions, we should always be able to load as a
6637// legal equivalently-sized byte-typed vector instead. This method is
6638// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6639// the load is already correctly-aligned, it returns SDValue().
6640SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6641 SelectionDAG &DAG) const {
6642 auto *Load = cast<LoadSDNode>(Op);
6643 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6644
6646 Load->getMemoryVT(),
6647 *Load->getMemOperand()))
6648 return SDValue();
6649
6650 SDLoc DL(Op);
6651 MVT VT = Op.getSimpleValueType();
6652 unsigned EltSizeBits = VT.getScalarSizeInBits();
6653 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6654 "Unexpected unaligned RVV load type");
6655 MVT NewVT =
6656 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6657 assert(NewVT.isValid() &&
6658 "Expecting equally-sized RVV vector types to be legal");
6659 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6660 Load->getPointerInfo(), Load->getBaseAlign(),
6661 Load->getMemOperand()->getFlags());
6662 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6663}
6664
6665// While RVV has alignment restrictions, we should always be able to store as a
6666// legal equivalently-sized byte-typed vector instead. This method is
6667// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6668// returns SDValue() if the store is already correctly aligned.
6669SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6670 SelectionDAG &DAG) const {
6671 auto *Store = cast<StoreSDNode>(Op);
6672 assert(Store && Store->getValue().getValueType().isVector() &&
6673 "Expected vector store");
6674
6676 Store->getMemoryVT(),
6677 *Store->getMemOperand()))
6678 return SDValue();
6679
6680 SDLoc DL(Op);
6681 SDValue StoredVal = Store->getValue();
6682 MVT VT = StoredVal.getSimpleValueType();
6683 unsigned EltSizeBits = VT.getScalarSizeInBits();
6684 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6685 "Unexpected unaligned RVV store type");
6686 MVT NewVT =
6687 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6688 assert(NewVT.isValid() &&
6689 "Expecting equally-sized RVV vector types to be legal");
6690 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6691 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6692 Store->getPointerInfo(), Store->getBaseAlign(),
6693 Store->getMemOperand()->getFlags());
6694}
6695
6697 const RISCVSubtarget &Subtarget) {
6698 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
6699
6700 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
6701
6702 // All simm32 constants should be handled by isel.
6703 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
6704 // this check redundant, but small immediates are common so this check
6705 // should have better compile time.
6706 if (isInt<32>(Imm))
6707 return Op;
6708
6709 // We only need to cost the immediate, if constant pool lowering is enabled.
6710 if (!Subtarget.useConstantPoolForLargeInts())
6711 return Op;
6712
6714 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6715 return Op;
6716
6717 // Optimizations below are disabled for opt size. If we're optimizing for
6718 // size, use a constant pool.
6719 if (DAG.shouldOptForSize())
6720 return SDValue();
6721
6722 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6723 // that if it will avoid a constant pool.
6724 // It will require an extra temporary register though.
6725 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6726 // low and high 32 bits are the same and bit 31 and 63 are set.
6727 unsigned ShiftAmt, AddOpc;
6728 RISCVMatInt::InstSeq SeqLo =
6729 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6730 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6731 return Op;
6732
6733 return SDValue();
6734}
6735
6736SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6737 SelectionDAG &DAG) const {
6738 MVT VT = Op.getSimpleValueType();
6739 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6740
6741 // Can this constant be selected by a Zfa FLI instruction?
6742 bool Negate = false;
6743 int Index = getLegalZfaFPImm(Imm, VT);
6744
6745 // If the constant is negative, try negating.
6746 if (Index < 0 && Imm.isNegative()) {
6747 Index = getLegalZfaFPImm(-Imm, VT);
6748 Negate = true;
6749 }
6750
6751 // If we couldn't find a FLI lowering, fall back to generic code.
6752 if (Index < 0)
6753 return SDValue();
6754
6755 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6756 SDLoc DL(Op);
6757 SDValue Const =
6758 DAG.getNode(RISCVISD::FLI, DL, VT,
6759 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6760 if (!Negate)
6761 return Const;
6762
6763 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6764}
6765
6767 SelectionDAG &DAG) {
6768
6769 unsigned IsData = Op.getConstantOperandVal(4);
6770
6771 // mips-p8700 we support data prefetch for now.
6772 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
6773 return Op.getOperand(0);
6774 return Op;
6775}
6776
6778 const RISCVSubtarget &Subtarget) {
6779 SDLoc dl(Op);
6780 AtomicOrdering FenceOrdering =
6781 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6782 SyncScope::ID FenceSSID =
6783 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6784
6785 if (Subtarget.hasStdExtZtso()) {
6786 // The only fence that needs an instruction is a sequentially-consistent
6787 // cross-thread fence.
6788 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6789 FenceSSID == SyncScope::System)
6790 return Op;
6791
6792 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6793 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6794 }
6795
6796 // singlethread fences only synchronize with signal handlers on the same
6797 // thread and thus only need to preserve instruction order, not actually
6798 // enforce memory ordering.
6799 if (FenceSSID == SyncScope::SingleThread)
6800 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6801 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6802
6803 return Op;
6804}
6805
6806SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6807 SelectionDAG &DAG) const {
6808 SDLoc DL(Op);
6809 MVT VT = Op.getSimpleValueType();
6810 MVT XLenVT = Subtarget.getXLenVT();
6811 unsigned Check = Op.getConstantOperandVal(1);
6812 unsigned TDCMask = 0;
6813 if (Check & fcSNan)
6814 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6815 if (Check & fcQNan)
6816 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6817 if (Check & fcPosInf)
6819 if (Check & fcNegInf)
6821 if (Check & fcPosNormal)
6823 if (Check & fcNegNormal)
6825 if (Check & fcPosSubnormal)
6827 if (Check & fcNegSubnormal)
6829 if (Check & fcPosZero)
6830 TDCMask |= RISCV::FPMASK_Positive_Zero;
6831 if (Check & fcNegZero)
6832 TDCMask |= RISCV::FPMASK_Negative_Zero;
6833
6834 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6835
6836 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6837
6838 if (VT.isVector()) {
6839 SDValue Op0 = Op.getOperand(0);
6840 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6841
6842 if (VT.isScalableVector()) {
6843 MVT DstVT = VT0.changeVectorElementTypeToInteger();
6844 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6845 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6846 Mask = Op.getOperand(2);
6847 VL = Op.getOperand(3);
6848 }
6849 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6850 VL, Op->getFlags());
6851 if (IsOneBitMask)
6852 return DAG.getSetCC(DL, VT, FPCLASS,
6853 DAG.getConstant(TDCMask, DL, DstVT),
6855 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6856 DAG.getConstant(TDCMask, DL, DstVT));
6857 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6858 ISD::SETNE);
6859 }
6860
6861 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6862 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6863 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6864 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6865 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6866 Mask = Op.getOperand(2);
6867 MVT MaskContainerVT =
6868 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6869 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6870 VL = Op.getOperand(3);
6871 }
6872 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6873
6874 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6875 Mask, VL, Op->getFlags());
6876
6877 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6878 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6879 if (IsOneBitMask) {
6880 SDValue VMSEQ =
6881 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6882 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6883 DAG.getUNDEF(ContainerVT), Mask, VL});
6884 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6885 }
6886 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6887 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6888
6889 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6890 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6891 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6892
6893 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6894 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6895 DAG.getUNDEF(ContainerVT), Mask, VL});
6896 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6897 }
6898
6899 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6900 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6901 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6903 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6904}
6905
6906// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6907// operations propagate nans.
6909 const RISCVSubtarget &Subtarget) {
6910 SDLoc DL(Op);
6911 MVT VT = Op.getSimpleValueType();
6912
6913 SDValue X = Op.getOperand(0);
6914 SDValue Y = Op.getOperand(1);
6915
6916 if (!VT.isVector()) {
6917 MVT XLenVT = Subtarget.getXLenVT();
6918
6919 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6920 // ensures that when one input is a nan, the other will also be a nan
6921 // allowing the nan to propagate. If both inputs are nan, this will swap the
6922 // inputs which is harmless.
6923
6924 SDValue NewY = Y;
6925 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6926 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6927 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6928 }
6929
6930 SDValue NewX = X;
6931 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6932 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6933 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6934 }
6935
6936 unsigned Opc =
6937 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6938 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6939 }
6940
6941 // Check no NaNs before converting to fixed vector scalable.
6942 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6943 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6944
6945 MVT ContainerVT = VT;
6946 if (VT.isFixedLengthVector()) {
6947 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6948 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6949 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6950 }
6951
6952 SDValue Mask, VL;
6953 if (Op->isVPOpcode()) {
6954 Mask = Op.getOperand(2);
6955 if (VT.isFixedLengthVector())
6956 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6957 Subtarget);
6958 VL = Op.getOperand(3);
6959 } else {
6960 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6961 }
6962
6963 SDValue NewY = Y;
6964 if (!XIsNeverNan) {
6965 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6966 {X, X, DAG.getCondCode(ISD::SETOEQ),
6967 DAG.getUNDEF(ContainerVT), Mask, VL});
6968 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6969 DAG.getUNDEF(ContainerVT), VL);
6970 }
6971
6972 SDValue NewX = X;
6973 if (!YIsNeverNan) {
6974 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6975 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6976 DAG.getUNDEF(ContainerVT), Mask, VL});
6977 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6978 DAG.getUNDEF(ContainerVT), VL);
6979 }
6980
6981 unsigned Opc =
6982 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6983 ? RISCVISD::VFMAX_VL
6984 : RISCVISD::VFMIN_VL;
6985 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6986 DAG.getUNDEF(ContainerVT), Mask, VL);
6987 if (VT.isFixedLengthVector())
6988 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6989 return Res;
6990}
6991
6993 const RISCVSubtarget &Subtarget) {
6994 bool IsFABS = Op.getOpcode() == ISD::FABS;
6995 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6996 "Wrong opcode for lowering FABS or FNEG.");
6997
6998 MVT XLenVT = Subtarget.getXLenVT();
6999 MVT VT = Op.getSimpleValueType();
7000 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7001
7002 SDLoc DL(Op);
7003 SDValue Fmv =
7004 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
7005
7006 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
7007 Mask = Mask.sext(Subtarget.getXLen());
7008
7009 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
7010 SDValue Logic =
7011 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
7012 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
7013}
7014
7016 const RISCVSubtarget &Subtarget) {
7017 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
7018
7019 MVT XLenVT = Subtarget.getXLenVT();
7020 MVT VT = Op.getSimpleValueType();
7021 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7022
7023 SDValue Mag = Op.getOperand(0);
7024 SDValue Sign = Op.getOperand(1);
7025
7026 SDLoc DL(Op);
7027
7028 // Get sign bit into an integer value.
7029 unsigned SignSize = Sign.getValueSizeInBits();
7030 SDValue SignAsInt = [&]() {
7031 if (SignSize == Subtarget.getXLen())
7032 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
7033 switch (SignSize) {
7034 case 16:
7035 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
7036 case 32:
7037 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
7038 case 64: {
7039 assert(XLenVT == MVT::i32 && "Unexpected type");
7040 // Copy the upper word to integer.
7041 SignSize = 32;
7042 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
7043 .getValue(1);
7044 }
7045 default:
7046 llvm_unreachable("Unexpected sign size");
7047 }
7048 }();
7049
7050 // Get the signbit at the right position for MagAsInt.
7051 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
7052 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
7053 SignAsInt,
7054 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
7055
7056 // Mask the sign bit and any bits above it. The extra bits will be dropped
7057 // when we convert back to FP.
7058 SDValue SignMask = DAG.getConstant(
7059 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
7060 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
7061
7062 // Transform Mag value to integer, and clear the sign bit.
7063 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
7064 SDValue ClearSignMask = DAG.getConstant(
7065 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
7066 SDValue ClearedSign =
7067 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
7068
7069 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
7071
7072 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
7073}
7074
7075/// Get a RISC-V target specified VL op for a given SDNode.
7076static unsigned getRISCVVLOp(SDValue Op) {
7077#define OP_CASE(NODE) \
7078 case ISD::NODE: \
7079 return RISCVISD::NODE##_VL;
7080#define VP_CASE(NODE) \
7081 case ISD::VP_##NODE: \
7082 return RISCVISD::NODE##_VL;
7083 // clang-format off
7084 switch (Op.getOpcode()) {
7085 default:
7086 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7087 OP_CASE(ADD)
7088 OP_CASE(SUB)
7089 OP_CASE(MUL)
7090 OP_CASE(MULHS)
7091 OP_CASE(MULHU)
7092 OP_CASE(SDIV)
7093 OP_CASE(SREM)
7094 OP_CASE(UDIV)
7095 OP_CASE(UREM)
7096 OP_CASE(SHL)
7097 OP_CASE(SRA)
7098 OP_CASE(SRL)
7099 OP_CASE(ROTL)
7100 OP_CASE(ROTR)
7101 OP_CASE(BSWAP)
7102 OP_CASE(CTTZ)
7103 OP_CASE(CTLZ)
7104 OP_CASE(CTPOP)
7105 OP_CASE(BITREVERSE)
7106 OP_CASE(SADDSAT)
7107 OP_CASE(UADDSAT)
7108 OP_CASE(SSUBSAT)
7109 OP_CASE(USUBSAT)
7110 OP_CASE(AVGFLOORS)
7111 OP_CASE(AVGFLOORU)
7112 OP_CASE(AVGCEILS)
7113 OP_CASE(AVGCEILU)
7114 OP_CASE(FADD)
7115 OP_CASE(FSUB)
7116 OP_CASE(FMUL)
7117 OP_CASE(FDIV)
7118 OP_CASE(FNEG)
7119 OP_CASE(FABS)
7120 OP_CASE(FCOPYSIGN)
7121 OP_CASE(FSQRT)
7122 OP_CASE(SMIN)
7123 OP_CASE(SMAX)
7124 OP_CASE(UMIN)
7125 OP_CASE(UMAX)
7126 OP_CASE(STRICT_FADD)
7127 OP_CASE(STRICT_FSUB)
7128 OP_CASE(STRICT_FMUL)
7129 OP_CASE(STRICT_FDIV)
7130 OP_CASE(STRICT_FSQRT)
7131 VP_CASE(ADD) // VP_ADD
7132 VP_CASE(SUB) // VP_SUB
7133 VP_CASE(MUL) // VP_MUL
7134 VP_CASE(SDIV) // VP_SDIV
7135 VP_CASE(SREM) // VP_SREM
7136 VP_CASE(UDIV) // VP_UDIV
7137 VP_CASE(UREM) // VP_UREM
7138 VP_CASE(SHL) // VP_SHL
7139 VP_CASE(FADD) // VP_FADD
7140 VP_CASE(FSUB) // VP_FSUB
7141 VP_CASE(FMUL) // VP_FMUL
7142 VP_CASE(FDIV) // VP_FDIV
7143 VP_CASE(FNEG) // VP_FNEG
7144 VP_CASE(FABS) // VP_FABS
7145 VP_CASE(SMIN) // VP_SMIN
7146 VP_CASE(SMAX) // VP_SMAX
7147 VP_CASE(UMIN) // VP_UMIN
7148 VP_CASE(UMAX) // VP_UMAX
7149 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7150 VP_CASE(SETCC) // VP_SETCC
7151 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7152 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7153 VP_CASE(BITREVERSE) // VP_BITREVERSE
7154 VP_CASE(SADDSAT) // VP_SADDSAT
7155 VP_CASE(UADDSAT) // VP_UADDSAT
7156 VP_CASE(SSUBSAT) // VP_SSUBSAT
7157 VP_CASE(USUBSAT) // VP_USUBSAT
7158 VP_CASE(BSWAP) // VP_BSWAP
7159 VP_CASE(CTLZ) // VP_CTLZ
7160 VP_CASE(CTTZ) // VP_CTTZ
7161 VP_CASE(CTPOP) // VP_CTPOP
7163 case ISD::VP_CTLZ_ZERO_UNDEF:
7164 return RISCVISD::CTLZ_VL;
7166 case ISD::VP_CTTZ_ZERO_UNDEF:
7167 return RISCVISD::CTTZ_VL;
7168 case ISD::FMA:
7169 case ISD::VP_FMA:
7170 return RISCVISD::VFMADD_VL;
7171 case ISD::STRICT_FMA:
7172 return RISCVISD::STRICT_VFMADD_VL;
7173 case ISD::AND:
7174 case ISD::VP_AND:
7175 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7176 return RISCVISD::VMAND_VL;
7177 return RISCVISD::AND_VL;
7178 case ISD::OR:
7179 case ISD::VP_OR:
7180 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7181 return RISCVISD::VMOR_VL;
7182 return RISCVISD::OR_VL;
7183 case ISD::XOR:
7184 case ISD::VP_XOR:
7185 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7186 return RISCVISD::VMXOR_VL;
7187 return RISCVISD::XOR_VL;
7188 case ISD::ANY_EXTEND:
7189 case ISD::ZERO_EXTEND:
7190 return RISCVISD::VZEXT_VL;
7191 case ISD::SIGN_EXTEND:
7192 return RISCVISD::VSEXT_VL;
7193 case ISD::SETCC:
7194 return RISCVISD::SETCC_VL;
7195 case ISD::VSELECT:
7196 return RISCVISD::VMERGE_VL;
7197 case ISD::VP_SELECT:
7198 case ISD::VP_MERGE:
7199 return RISCVISD::VMERGE_VL;
7200 case ISD::VP_SRA:
7201 return RISCVISD::SRA_VL;
7202 case ISD::VP_SRL:
7203 return RISCVISD::SRL_VL;
7204 case ISD::VP_SQRT:
7205 return RISCVISD::FSQRT_VL;
7206 case ISD::VP_SIGN_EXTEND:
7207 return RISCVISD::VSEXT_VL;
7208 case ISD::VP_ZERO_EXTEND:
7209 return RISCVISD::VZEXT_VL;
7210 case ISD::VP_FP_TO_SINT:
7211 return RISCVISD::VFCVT_RTZ_X_F_VL;
7212 case ISD::VP_FP_TO_UINT:
7213 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7214 case ISD::FMINNUM:
7215 case ISD::FMINIMUMNUM:
7216 case ISD::VP_FMINNUM:
7217 return RISCVISD::VFMIN_VL;
7218 case ISD::FMAXNUM:
7219 case ISD::FMAXIMUMNUM:
7220 case ISD::VP_FMAXNUM:
7221 return RISCVISD::VFMAX_VL;
7222 case ISD::LRINT:
7223 case ISD::VP_LRINT:
7224 case ISD::LLRINT:
7225 case ISD::VP_LLRINT:
7226 return RISCVISD::VFCVT_RM_X_F_VL;
7227 }
7228 // clang-format on
7229#undef OP_CASE
7230#undef VP_CASE
7231}
7232
7234 const RISCVSubtarget &Subtarget) {
7235 return (Op.getValueType() == MVT::nxv32f16 &&
7236 (Subtarget.hasVInstructionsF16Minimal() &&
7237 !Subtarget.hasVInstructionsF16())) ||
7238 Op.getValueType() == MVT::nxv32bf16;
7239}
7240
7242 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7243 SDLoc DL(Op);
7244
7245 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7246 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7247
7248 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7249 if (!Op.getOperand(j).getValueType().isVector()) {
7250 LoOperands[j] = Op.getOperand(j);
7251 HiOperands[j] = Op.getOperand(j);
7252 continue;
7253 }
7254 std::tie(LoOperands[j], HiOperands[j]) =
7255 DAG.SplitVector(Op.getOperand(j), DL);
7256 }
7257
7258 SDValue LoRes =
7259 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7260 SDValue HiRes =
7261 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7262
7263 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7264}
7265
7267 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7268 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7269 SDLoc DL(Op);
7270
7271 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7272 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7273
7274 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7275 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7276 std::tie(LoOperands[j], HiOperands[j]) =
7277 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7278 continue;
7279 }
7280 if (!Op.getOperand(j).getValueType().isVector()) {
7281 LoOperands[j] = Op.getOperand(j);
7282 HiOperands[j] = Op.getOperand(j);
7283 continue;
7284 }
7285 std::tie(LoOperands[j], HiOperands[j]) =
7286 DAG.SplitVector(Op.getOperand(j), DL);
7287 }
7288
7289 SDValue LoRes =
7290 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7291 SDValue HiRes =
7292 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7293
7294 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7295}
7296
7298 SDLoc DL(Op);
7299
7300 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7301 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7302 auto [EVLLo, EVLHi] =
7303 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7304
7305 SDValue ResLo =
7306 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7307 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7308 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7309 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7310}
7311
7313
7314 assert(Op->isStrictFPOpcode());
7315
7316 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7317
7318 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7319 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7320
7321 SDLoc DL(Op);
7322
7323 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7324 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7325
7326 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7327 if (!Op.getOperand(j).getValueType().isVector()) {
7328 LoOperands[j] = Op.getOperand(j);
7329 HiOperands[j] = Op.getOperand(j);
7330 continue;
7331 }
7332 std::tie(LoOperands[j], HiOperands[j]) =
7333 DAG.SplitVector(Op.getOperand(j), DL);
7334 }
7335
7336 SDValue LoRes =
7337 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7338 HiOperands[0] = LoRes.getValue(1);
7339 SDValue HiRes =
7340 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7341
7342 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7343 LoRes.getValue(0), HiRes.getValue(0));
7344 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7345}
7346
7347SDValue
7348RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7349 SelectionDAG &DAG) const {
7350 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7351 "Unexpected bfloat16 load lowering");
7352
7353 SDLoc DL(Op);
7354 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7355 EVT MemVT = LD->getMemoryVT();
7356 SDValue Load = DAG.getExtLoad(
7357 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7358 LD->getBasePtr(),
7360 LD->getMemOperand());
7361 // Using mask to make bf16 nan-boxing valid when we don't have flh
7362 // instruction. -65536 would be treat as a small number and thus it can be
7363 // directly used lui to get the constant.
7364 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7365 SDValue OrSixteenOne =
7366 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7367 SDValue ConvertedResult =
7368 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7369 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7370}
7371
7372SDValue
7373RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7374 SelectionDAG &DAG) const {
7375 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7376 "Unexpected bfloat16 store lowering");
7377
7378 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7379 SDLoc DL(Op);
7380 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7381 Subtarget.getXLenVT(), ST->getValue());
7382 return DAG.getTruncStore(
7383 ST->getChain(), DL, FMV, ST->getBasePtr(),
7384 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7385 ST->getMemOperand());
7386}
7387
7389 SelectionDAG &DAG) const {
7390 switch (Op.getOpcode()) {
7391 default:
7393 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7394 case ISD::PREFETCH:
7395 return LowerPREFETCH(Op, Subtarget, DAG);
7396 case ISD::ATOMIC_FENCE:
7397 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7398 case ISD::GlobalAddress:
7399 return lowerGlobalAddress(Op, DAG);
7400 case ISD::BlockAddress:
7401 return lowerBlockAddress(Op, DAG);
7402 case ISD::ConstantPool:
7403 return lowerConstantPool(Op, DAG);
7404 case ISD::JumpTable:
7405 return lowerJumpTable(Op, DAG);
7407 return lowerGlobalTLSAddress(Op, DAG);
7408 case ISD::Constant:
7409 return lowerConstant(Op, DAG, Subtarget);
7410 case ISD::ConstantFP:
7411 return lowerConstantFP(Op, DAG);
7412 case ISD::SELECT:
7413 return lowerSELECT(Op, DAG);
7414 case ISD::BRCOND:
7415 return lowerBRCOND(Op, DAG);
7416 case ISD::VASTART:
7417 return lowerVASTART(Op, DAG);
7418 case ISD::FRAMEADDR:
7419 return lowerFRAMEADDR(Op, DAG);
7420 case ISD::RETURNADDR:
7421 return lowerRETURNADDR(Op, DAG);
7422 case ISD::SHL_PARTS:
7423 return lowerShiftLeftParts(Op, DAG);
7424 case ISD::SRA_PARTS:
7425 return lowerShiftRightParts(Op, DAG, true);
7426 case ISD::SRL_PARTS:
7427 return lowerShiftRightParts(Op, DAG, false);
7428 case ISD::ROTL:
7429 case ISD::ROTR:
7430 if (Op.getValueType().isFixedLengthVector()) {
7431 assert(Subtarget.hasStdExtZvkb());
7432 return lowerToScalableOp(Op, DAG);
7433 }
7434 assert(Subtarget.hasVendorXTHeadBb() &&
7435 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7436 "Unexpected custom legalization");
7437 // XTHeadBb only supports rotate by constant.
7438 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7439 return SDValue();
7440 return Op;
7441 case ISD::BITCAST: {
7442 SDLoc DL(Op);
7443 EVT VT = Op.getValueType();
7444 SDValue Op0 = Op.getOperand(0);
7445 EVT Op0VT = Op0.getValueType();
7446 MVT XLenVT = Subtarget.getXLenVT();
7447 if (Op0VT == MVT::i16 &&
7448 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7449 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7450 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7451 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7452 }
7453 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7454 Subtarget.hasStdExtFOrZfinx()) {
7455 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7456 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7457 }
7458 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7459 Subtarget.hasStdExtDOrZdinx()) {
7460 SDValue Lo, Hi;
7461 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7462 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7463 }
7464
7465 // Consider other scalar<->scalar casts as legal if the types are legal.
7466 // Otherwise expand them.
7467 if (!VT.isVector() && !Op0VT.isVector()) {
7468 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7469 return Op;
7470 return SDValue();
7471 }
7472
7473 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7474 "Unexpected types");
7475
7476 if (VT.isFixedLengthVector()) {
7477 // We can handle fixed length vector bitcasts with a simple replacement
7478 // in isel.
7479 if (Op0VT.isFixedLengthVector())
7480 return Op;
7481 // When bitcasting from scalar to fixed-length vector, insert the scalar
7482 // into a one-element vector of the result type, and perform a vector
7483 // bitcast.
7484 if (!Op0VT.isVector()) {
7485 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7486 if (!isTypeLegal(BVT))
7487 return SDValue();
7488 return DAG.getBitcast(
7489 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7490 }
7491 return SDValue();
7492 }
7493 // Custom-legalize bitcasts from fixed-length vector types to scalar types
7494 // thus: bitcast the vector to a one-element vector type whose element type
7495 // is the same as the result type, and extract the first element.
7496 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7497 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7498 if (!isTypeLegal(BVT))
7499 return SDValue();
7500 SDValue BVec = DAG.getBitcast(BVT, Op0);
7501 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
7502 }
7503 return SDValue();
7504 }
7506 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7508 return LowerINTRINSIC_W_CHAIN(Op, DAG);
7510 return LowerINTRINSIC_VOID(Op, DAG);
7511 case ISD::IS_FPCLASS:
7512 return LowerIS_FPCLASS(Op, DAG);
7513 case ISD::BITREVERSE: {
7514 MVT VT = Op.getSimpleValueType();
7515 if (VT.isFixedLengthVector()) {
7516 assert(Subtarget.hasStdExtZvbb());
7517 return lowerToScalableOp(Op, DAG);
7518 }
7519 SDLoc DL(Op);
7520 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7521 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7522 // Expand bitreverse to a bswap(rev8) followed by brev8.
7523 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
7524 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
7525 }
7526 case ISD::TRUNCATE:
7529 // Only custom-lower vector truncates
7530 if (!Op.getSimpleValueType().isVector())
7531 return Op;
7532 return lowerVectorTruncLike(Op, DAG);
7533 case ISD::ANY_EXTEND:
7534 case ISD::ZERO_EXTEND:
7535 if (Op.getOperand(0).getValueType().isVector() &&
7536 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7537 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
7538 if (Op.getValueType().isScalableVector())
7539 return Op;
7540 return lowerToScalableOp(Op, DAG);
7541 case ISD::SIGN_EXTEND:
7542 if (Op.getOperand(0).getValueType().isVector() &&
7543 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7544 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
7545 if (Op.getValueType().isScalableVector())
7546 return Op;
7547 return lowerToScalableOp(Op, DAG);
7549 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7551 return lowerINSERT_VECTOR_ELT(Op, DAG);
7553 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7554 case ISD::SCALAR_TO_VECTOR: {
7555 MVT VT = Op.getSimpleValueType();
7556 SDLoc DL(Op);
7557 SDValue Scalar = Op.getOperand(0);
7558 if (VT.getVectorElementType() == MVT::i1) {
7559 MVT WideVT = VT.changeVectorElementType(MVT::i8);
7560 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
7561 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
7562 }
7563 MVT ContainerVT = VT;
7564 if (VT.isFixedLengthVector())
7565 ContainerVT = getContainerForFixedLengthVector(VT);
7566 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7567
7568 SDValue V;
7569 if (VT.isFloatingPoint()) {
7570 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
7571 DAG.getUNDEF(ContainerVT), Scalar, VL);
7572 } else {
7573 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
7574 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
7575 DAG.getUNDEF(ContainerVT), Scalar, VL);
7576 }
7577 if (VT.isFixedLengthVector())
7578 V = convertFromScalableVector(VT, V, DAG, Subtarget);
7579 return V;
7580 }
7581 case ISD::VSCALE: {
7582 MVT XLenVT = Subtarget.getXLenVT();
7583 MVT VT = Op.getSimpleValueType();
7584 SDLoc DL(Op);
7585 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7586 // We define our scalable vector types for lmul=1 to use a 64 bit known
7587 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7588 // vscale as VLENB / 8.
7589 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
7590 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7591 reportFatalInternalError("Support for VLEN==32 is incomplete.");
7592 // We assume VLENB is a multiple of 8. We manually choose the best shift
7593 // here because SimplifyDemandedBits isn't always able to simplify it.
7594 uint64_t Val = Op.getConstantOperandVal(0);
7595 if (isPowerOf2_64(Val)) {
7596 uint64_t Log2 = Log2_64(Val);
7597 if (Log2 < 3) {
7598 SDNodeFlags Flags;
7599 Flags.setExact(true);
7600 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7601 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
7602 } else if (Log2 > 3) {
7603 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
7604 DAG.getConstant(Log2 - 3, DL, XLenVT));
7605 }
7606 } else if ((Val % 8) == 0) {
7607 // If the multiplier is a multiple of 8, scale it down to avoid needing
7608 // to shift the VLENB value.
7609 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7610 DAG.getConstant(Val / 8, DL, XLenVT));
7611 } else {
7612 SDNodeFlags Flags;
7613 Flags.setExact(true);
7614 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7615 DAG.getConstant(3, DL, XLenVT), Flags);
7616 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7617 DAG.getConstant(Val, DL, XLenVT));
7618 }
7619 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7620 }
7621 case ISD::FPOWI: {
7622 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
7623 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7624 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7625 Op.getOperand(1).getValueType() == MVT::i32) {
7626 SDLoc DL(Op);
7627 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7628 SDValue Powi =
7629 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
7630 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
7631 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7632 }
7633 return SDValue();
7634 }
7635 case ISD::FMAXIMUM:
7636 case ISD::FMINIMUM:
7637 if (isPromotedOpNeedingSplit(Op, Subtarget))
7638 return SplitVectorOp(Op, DAG);
7639 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7640 case ISD::FP_EXTEND:
7641 case ISD::FP_ROUND:
7642 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7645 return lowerStrictFPExtendOrRoundLike(Op, DAG);
7646 case ISD::SINT_TO_FP:
7647 case ISD::UINT_TO_FP:
7648 if (Op.getValueType().isVector() &&
7649 ((Op.getValueType().getScalarType() == MVT::f16 &&
7650 (Subtarget.hasVInstructionsF16Minimal() &&
7651 !Subtarget.hasVInstructionsF16())) ||
7652 Op.getValueType().getScalarType() == MVT::bf16)) {
7653 if (isPromotedOpNeedingSplit(Op, Subtarget))
7654 return SplitVectorOp(Op, DAG);
7655 // int -> f32
7656 SDLoc DL(Op);
7657 MVT NVT =
7658 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7659 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7660 // f32 -> [b]f16
7661 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7662 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7663 }
7664 [[fallthrough]];
7665 case ISD::FP_TO_SINT:
7666 case ISD::FP_TO_UINT:
7667 if (SDValue Op1 = Op.getOperand(0);
7668 Op1.getValueType().isVector() &&
7669 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7670 (Subtarget.hasVInstructionsF16Minimal() &&
7671 !Subtarget.hasVInstructionsF16())) ||
7672 Op1.getValueType().getScalarType() == MVT::bf16)) {
7673 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7674 return SplitVectorOp(Op, DAG);
7675 // [b]f16 -> f32
7676 SDLoc DL(Op);
7677 MVT NVT = MVT::getVectorVT(MVT::f32,
7678 Op1.getValueType().getVectorElementCount());
7679 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7680 // f32 -> int
7681 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
7682 }
7683 [[fallthrough]];
7688 // RVV can only do fp<->int conversions to types half/double the size as
7689 // the source. We custom-lower any conversions that do two hops into
7690 // sequences.
7691 MVT VT = Op.getSimpleValueType();
7692 if (VT.isScalarInteger())
7693 return lowerFP_TO_INT(Op, DAG, Subtarget);
7694 bool IsStrict = Op->isStrictFPOpcode();
7695 SDValue Src = Op.getOperand(0 + IsStrict);
7696 MVT SrcVT = Src.getSimpleValueType();
7697 if (SrcVT.isScalarInteger())
7698 return lowerINT_TO_FP(Op, DAG, Subtarget);
7699 if (!VT.isVector())
7700 return Op;
7701 SDLoc DL(Op);
7702 MVT EltVT = VT.getVectorElementType();
7703 MVT SrcEltVT = SrcVT.getVectorElementType();
7704 unsigned EltSize = EltVT.getSizeInBits();
7705 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
7706 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
7707 "Unexpected vector element types");
7708
7709 bool IsInt2FP = SrcEltVT.isInteger();
7710 // Widening conversions
7711 if (EltSize > (2 * SrcEltSize)) {
7712 if (IsInt2FP) {
7713 // Do a regular integer sign/zero extension then convert to float.
7714 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
7716 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
7717 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
7720 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
7721 if (IsStrict)
7722 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
7723 Op.getOperand(0), Ext);
7724 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
7725 }
7726 // FP2Int
7727 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
7728 // Do one doubling fp_extend then complete the operation by converting
7729 // to int.
7730 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7731 if (IsStrict) {
7732 auto [FExt, Chain] =
7733 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
7734 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
7735 }
7736 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7737 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7738 }
7739
7740 // Narrowing conversions
7741 if (SrcEltSize > (2 * EltSize)) {
7742 if (IsInt2FP) {
7743 // One narrowing int_to_fp, then an fp_round.
7744 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7745 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7746 if (IsStrict) {
7747 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7748 DAG.getVTList(InterimFVT, MVT::Other),
7749 Op.getOperand(0), Src);
7750 SDValue Chain = Int2FP.getValue(1);
7751 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7752 }
7753 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7754 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7755 }
7756 // FP2Int
7757 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7758 // representable by the integer, the result is poison.
7759 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7761 if (IsStrict) {
7762 SDValue FP2Int =
7763 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7764 Op.getOperand(0), Src);
7765 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7766 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7767 }
7768 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7769 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7770 }
7771
7772 // Scalable vectors can exit here. Patterns will handle equally-sized
7773 // conversions halving/doubling ones.
7774 if (!VT.isFixedLengthVector())
7775 return Op;
7776
7777 // For fixed-length vectors we lower to a custom "VL" node.
7778 unsigned RVVOpc = 0;
7779 switch (Op.getOpcode()) {
7780 default:
7781 llvm_unreachable("Impossible opcode");
7782 case ISD::FP_TO_SINT:
7783 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
7784 break;
7785 case ISD::FP_TO_UINT:
7786 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
7787 break;
7788 case ISD::SINT_TO_FP:
7789 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7790 break;
7791 case ISD::UINT_TO_FP:
7792 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7793 break;
7795 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7796 break;
7798 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7799 break;
7801 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
7802 break;
7804 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
7805 break;
7806 }
7807
7808 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7809 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7810 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7811 "Expected same element count");
7812
7813 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7814
7815 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7816 if (IsStrict) {
7817 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7818 Op.getOperand(0), Src, Mask, VL);
7819 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7820 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7821 }
7822 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7823 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7824 }
7827 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7828 case ISD::FP_TO_BF16: {
7829 // Custom lower to ensure the libcall return is passed in an FPR on hard
7830 // float ABIs.
7831 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7832 SDLoc DL(Op);
7833 MakeLibCallOptions CallOptions;
7834 RTLIB::Libcall LC =
7835 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7836 SDValue Res =
7837 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7838 if (Subtarget.is64Bit())
7839 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7840 return DAG.getBitcast(MVT::i32, Res);
7841 }
7842 case ISD::BF16_TO_FP: {
7843 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7844 MVT VT = Op.getSimpleValueType();
7845 SDLoc DL(Op);
7846 Op = DAG.getNode(
7847 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7848 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7849 SDValue Res = Subtarget.is64Bit()
7850 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7851 : DAG.getBitcast(MVT::f32, Op);
7852 // fp_extend if the target VT is bigger than f32.
7853 if (VT != MVT::f32)
7854 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7855 return Res;
7856 }
7857 case ISD::STRICT_FP_TO_FP16:
7858 case ISD::FP_TO_FP16: {
7859 // Custom lower to ensure the libcall return is passed in an FPR on hard
7860 // float ABIs.
7861 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7862 SDLoc DL(Op);
7863 MakeLibCallOptions CallOptions;
7864 bool IsStrict = Op->isStrictFPOpcode();
7865 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7866 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7867 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7868 SDValue Res;
7869 std::tie(Res, Chain) =
7870 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7871 if (Subtarget.is64Bit())
7872 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7873 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7874 if (IsStrict)
7875 return DAG.getMergeValues({Result, Chain}, DL);
7876 return Result;
7877 }
7878 case ISD::STRICT_FP16_TO_FP:
7879 case ISD::FP16_TO_FP: {
7880 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7881 // float ABIs.
7882 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7883 SDLoc DL(Op);
7884 MakeLibCallOptions CallOptions;
7885 bool IsStrict = Op->isStrictFPOpcode();
7886 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7887 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7888 SDValue Arg = Subtarget.is64Bit()
7889 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7890 : DAG.getBitcast(MVT::f32, Op0);
7891 SDValue Res;
7892 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7893 CallOptions, DL, Chain);
7894 if (IsStrict)
7895 return DAG.getMergeValues({Res, Chain}, DL);
7896 return Res;
7897 }
7898 case ISD::FTRUNC:
7899 case ISD::FCEIL:
7900 case ISD::FFLOOR:
7901 case ISD::FNEARBYINT:
7902 case ISD::FRINT:
7903 case ISD::FROUND:
7904 case ISD::FROUNDEVEN:
7905 if (isPromotedOpNeedingSplit(Op, Subtarget))
7906 return SplitVectorOp(Op, DAG);
7907 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7908 case ISD::LRINT:
7909 case ISD::LLRINT:
7910 case ISD::LROUND:
7911 case ISD::LLROUND: {
7912 if (Op.getValueType().isVector())
7913 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
7914 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7915 "Unexpected custom legalisation");
7916 SDLoc DL(Op);
7917 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7918 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7919 }
7920 case ISD::STRICT_LRINT:
7921 case ISD::STRICT_LLRINT:
7922 case ISD::STRICT_LROUND:
7923 case ISD::STRICT_LLROUND: {
7924 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7925 "Unexpected custom legalisation");
7926 SDLoc DL(Op);
7927 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7928 {Op.getOperand(0), Op.getOperand(1)});
7929 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7930 {Ext.getValue(1), Ext.getValue(0)});
7931 }
7932 case ISD::VECREDUCE_ADD:
7933 case ISD::VECREDUCE_UMAX:
7934 case ISD::VECREDUCE_SMAX:
7935 case ISD::VECREDUCE_UMIN:
7936 case ISD::VECREDUCE_SMIN:
7937 return lowerVECREDUCE(Op, DAG);
7938 case ISD::VECREDUCE_AND:
7939 case ISD::VECREDUCE_OR:
7940 case ISD::VECREDUCE_XOR:
7941 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7942 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7943 return lowerVECREDUCE(Op, DAG);
7944 case ISD::VECREDUCE_FADD:
7945 case ISD::VECREDUCE_SEQ_FADD:
7946 case ISD::VECREDUCE_FMIN:
7947 case ISD::VECREDUCE_FMAX:
7948 case ISD::VECREDUCE_FMAXIMUM:
7949 case ISD::VECREDUCE_FMINIMUM:
7950 return lowerFPVECREDUCE(Op, DAG);
7951 case ISD::VP_REDUCE_ADD:
7952 case ISD::VP_REDUCE_UMAX:
7953 case ISD::VP_REDUCE_SMAX:
7954 case ISD::VP_REDUCE_UMIN:
7955 case ISD::VP_REDUCE_SMIN:
7956 case ISD::VP_REDUCE_FADD:
7957 case ISD::VP_REDUCE_SEQ_FADD:
7958 case ISD::VP_REDUCE_FMIN:
7959 case ISD::VP_REDUCE_FMAX:
7960 case ISD::VP_REDUCE_FMINIMUM:
7961 case ISD::VP_REDUCE_FMAXIMUM:
7962 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7963 return SplitVectorReductionOp(Op, DAG);
7964 return lowerVPREDUCE(Op, DAG);
7965 case ISD::VP_REDUCE_AND:
7966 case ISD::VP_REDUCE_OR:
7967 case ISD::VP_REDUCE_XOR:
7968 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7969 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7970 return lowerVPREDUCE(Op, DAG);
7971 case ISD::VP_CTTZ_ELTS:
7972 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7973 return lowerVPCttzElements(Op, DAG);
7974 case ISD::UNDEF: {
7975 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7976 return convertFromScalableVector(Op.getSimpleValueType(),
7977 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7978 }
7980 return lowerINSERT_SUBVECTOR(Op, DAG);
7982 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7984 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7986 return lowerVECTOR_INTERLEAVE(Op, DAG);
7987 case ISD::STEP_VECTOR:
7988 return lowerSTEP_VECTOR(Op, DAG);
7990 return lowerVECTOR_REVERSE(Op, DAG);
7991 case ISD::VECTOR_SPLICE:
7992 return lowerVECTOR_SPLICE(Op, DAG);
7993 case ISD::BUILD_VECTOR: {
7994 MVT VT = Op.getSimpleValueType();
7995 MVT EltVT = VT.getVectorElementType();
7996 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
7997 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
7998 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7999 }
8000 case ISD::SPLAT_VECTOR: {
8001 MVT VT = Op.getSimpleValueType();
8002 MVT EltVT = VT.getVectorElementType();
8003 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
8004 EltVT == MVT::bf16) {
8005 SDLoc DL(Op);
8006 SDValue Elt;
8007 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
8008 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
8009 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
8010 Op.getOperand(0));
8011 else
8012 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
8013 MVT IVT = VT.changeVectorElementType(MVT::i16);
8014 return DAG.getNode(ISD::BITCAST, DL, VT,
8015 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
8016 }
8017
8018 if (EltVT == MVT::i1)
8019 return lowerVectorMaskSplat(Op, DAG);
8020 return SDValue();
8021 }
8023 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8024 case ISD::CONCAT_VECTORS: {
8025 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
8026 // better than going through the stack, as the default expansion does.
8027 SDLoc DL(Op);
8028 MVT VT = Op.getSimpleValueType();
8029 MVT ContainerVT = VT;
8030 if (VT.isFixedLengthVector())
8031 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
8032
8033 // Recursively split concat_vectors with more than 2 operands:
8034 //
8035 // concat_vector op1, op2, op3, op4
8036 // ->
8037 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
8038 //
8039 // This reduces the length of the chain of vslideups and allows us to
8040 // perform the vslideups at a smaller LMUL, limited to MF2.
8041 if (Op.getNumOperands() > 2 &&
8042 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
8043 MVT HalfVT = VT.getHalfNumVectorElementsVT();
8044 assert(isPowerOf2_32(Op.getNumOperands()));
8045 size_t HalfNumOps = Op.getNumOperands() / 2;
8046 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8047 Op->ops().take_front(HalfNumOps));
8048 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8049 Op->ops().drop_front(HalfNumOps));
8050 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8051 }
8052
8053 unsigned NumOpElts =
8054 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
8055 SDValue Vec = DAG.getUNDEF(VT);
8056 for (const auto &OpIdx : enumerate(Op->ops())) {
8057 SDValue SubVec = OpIdx.value();
8058 // Don't insert undef subvectors.
8059 if (SubVec.isUndef())
8060 continue;
8061 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
8062 }
8063 return Vec;
8064 }
8065 case ISD::LOAD: {
8066 auto *Load = cast<LoadSDNode>(Op);
8067 EVT VT = Load->getValueType(0);
8068 if (VT == MVT::f64) {
8069 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8070 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8071
8072 // Replace a double precision load with two i32 loads and a BuildPairF64.
8073 SDLoc DL(Op);
8074 SDValue BasePtr = Load->getBasePtr();
8075 SDValue Chain = Load->getChain();
8076
8077 SDValue Lo =
8078 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
8079 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8080 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8081 SDValue Hi = DAG.getLoad(
8082 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
8083 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8084 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8085 Hi.getValue(1));
8086
8087 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8088 return DAG.getMergeValues({Pair, Chain}, DL);
8089 }
8090
8091 if (VT == MVT::bf16)
8092 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8093
8094 // Handle normal vector tuple load.
8095 if (VT.isRISCVVectorTuple()) {
8096 SDLoc DL(Op);
8097 MVT XLenVT = Subtarget.getXLenVT();
8098 unsigned NF = VT.getRISCVVectorTupleNumFields();
8099 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8100 unsigned NumElts = Sz / (NF * 8);
8101 int Log2LMUL = Log2_64(NumElts) - 3;
8102
8103 auto Flag = SDNodeFlags();
8104 Flag.setNoUnsignedWrap(true);
8105 SDValue Ret = DAG.getUNDEF(VT);
8106 SDValue BasePtr = Load->getBasePtr();
8107 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8108 VROffset =
8109 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8110 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8111 SmallVector<SDValue, 8> OutChains;
8112
8113 // Load NF vector registers and combine them to a vector tuple.
8114 for (unsigned i = 0; i < NF; ++i) {
8115 SDValue LoadVal = DAG.getLoad(
8116 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8117 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8118 OutChains.push_back(LoadVal.getValue(1));
8119 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8120 DAG.getTargetConstant(i, DL, MVT::i32));
8121 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8122 }
8123 return DAG.getMergeValues(
8124 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8125 }
8126
8127 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8128 return V;
8129 if (Op.getValueType().isFixedLengthVector())
8130 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8131 return Op;
8132 }
8133 case ISD::STORE: {
8134 auto *Store = cast<StoreSDNode>(Op);
8135 SDValue StoredVal = Store->getValue();
8136 EVT VT = StoredVal.getValueType();
8137 if (VT == MVT::f64) {
8138 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8139 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8140
8141 // Replace a double precision store with a SplitF64 and i32 stores.
8142 SDValue DL(Op);
8143 SDValue BasePtr = Store->getBasePtr();
8144 SDValue Chain = Store->getChain();
8145 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8146 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8147
8148 SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
8149 Store->getPointerInfo(), Store->getBaseAlign(),
8150 Store->getMemOperand()->getFlags());
8151 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8152 SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
8153 Store->getPointerInfo().getWithOffset(4),
8154 Store->getBaseAlign(),
8155 Store->getMemOperand()->getFlags());
8156 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8157 }
8158 if (VT == MVT::i64) {
8159 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8160 "Unexpected custom legalisation");
8161 if (Store->isTruncatingStore())
8162 return SDValue();
8163
8164 if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
8165 return SDValue();
8166
8167 SDLoc DL(Op);
8168 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8169 DAG.getTargetConstant(0, DL, MVT::i32));
8170 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8171 DAG.getTargetConstant(1, DL, MVT::i32));
8172
8173 return DAG.getMemIntrinsicNode(
8174 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8175 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8176 Store->getMemOperand());
8177 }
8178
8179 if (VT == MVT::bf16)
8180 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8181
8182 // Handle normal vector tuple store.
8183 if (VT.isRISCVVectorTuple()) {
8184 SDLoc DL(Op);
8185 MVT XLenVT = Subtarget.getXLenVT();
8186 unsigned NF = VT.getRISCVVectorTupleNumFields();
8187 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8188 unsigned NumElts = Sz / (NF * 8);
8189 int Log2LMUL = Log2_64(NumElts) - 3;
8190
8191 auto Flag = SDNodeFlags();
8192 Flag.setNoUnsignedWrap(true);
8193 SDValue Ret;
8194 SDValue Chain = Store->getChain();
8195 SDValue BasePtr = Store->getBasePtr();
8196 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8197 VROffset =
8198 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8199 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8200
8201 // Extract subregisters in a vector tuple and store them individually.
8202 for (unsigned i = 0; i < NF; ++i) {
8203 auto Extract =
8204 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8205 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8206 DAG.getTargetConstant(i, DL, MVT::i32));
8207 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8208 MachinePointerInfo(Store->getAddressSpace()),
8209 Store->getBaseAlign(),
8210 Store->getMemOperand()->getFlags());
8211 Chain = Ret.getValue(0);
8212 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8213 }
8214 return Ret;
8215 }
8216
8217 if (auto V = expandUnalignedRVVStore(Op, DAG))
8218 return V;
8219 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8220 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8221 return Op;
8222 }
8223 case ISD::MLOAD:
8224 case ISD::VP_LOAD:
8225 return lowerMaskedLoad(Op, DAG);
8226 case ISD::VP_LOAD_FF:
8227 return lowerLoadFF(Op, DAG);
8228 case ISD::MSTORE:
8229 case ISD::VP_STORE:
8230 return lowerMaskedStore(Op, DAG);
8232 return lowerVectorCompress(Op, DAG);
8233 case ISD::SELECT_CC: {
8234 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8235 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8236 // into separate SETCC+SELECT just like LegalizeDAG.
8237 SDValue Tmp1 = Op.getOperand(0);
8238 SDValue Tmp2 = Op.getOperand(1);
8239 SDValue True = Op.getOperand(2);
8240 SDValue False = Op.getOperand(3);
8241 EVT VT = Op.getValueType();
8242 SDValue CC = Op.getOperand(4);
8243 EVT CmpVT = Tmp1.getValueType();
8244 EVT CCVT =
8245 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8246 SDLoc DL(Op);
8247 SDValue Cond =
8248 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8249 return DAG.getSelect(DL, VT, Cond, True, False);
8250 }
8251 case ISD::SETCC: {
8252 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8253 if (OpVT.isScalarInteger()) {
8254 MVT VT = Op.getSimpleValueType();
8255 SDValue LHS = Op.getOperand(0);
8256 SDValue RHS = Op.getOperand(1);
8257 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8258 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8259 "Unexpected CondCode");
8260
8261 SDLoc DL(Op);
8262
8263 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8264 // convert this to the equivalent of (set(u)ge X, C+1) by using
8265 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8266 // in a register.
8267 if (isa<ConstantSDNode>(RHS)) {
8268 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8269 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8270 // If this is an unsigned compare and the constant is -1, incrementing
8271 // the constant would change behavior. The result should be false.
8272 if (CCVal == ISD::SETUGT && Imm == -1)
8273 return DAG.getConstant(0, DL, VT);
8274 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8275 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8276 SDValue SetCC = DAG.getSetCC(
8277 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8278 return DAG.getLogicalNOT(DL, SetCC, VT);
8279 }
8280 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8281 if (CCVal == ISD::SETUGT && Imm == 2047) {
8282 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8283 DAG.getShiftAmountConstant(11, OpVT, DL));
8284 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8285 ISD::SETNE);
8286 }
8287 }
8288
8289 // Not a constant we could handle, swap the operands and condition code to
8290 // SETLT/SETULT.
8291 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8292 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8293 }
8294
8295 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8296 return SplitVectorOp(Op, DAG);
8297
8298 return lowerToScalableOp(Op, DAG);
8299 }
8300 case ISD::ADD:
8301 case ISD::SUB:
8302 case ISD::MUL:
8303 case ISD::MULHS:
8304 case ISD::MULHU:
8305 case ISD::AND:
8306 case ISD::OR:
8307 case ISD::XOR:
8308 case ISD::SDIV:
8309 case ISD::SREM:
8310 case ISD::UDIV:
8311 case ISD::UREM:
8312 case ISD::BSWAP:
8313 case ISD::CTPOP:
8314 case ISD::VSELECT:
8315 return lowerToScalableOp(Op, DAG);
8316 case ISD::SHL:
8317 case ISD::SRA:
8318 case ISD::SRL:
8319 if (Op.getSimpleValueType().isFixedLengthVector())
8320 return lowerToScalableOp(Op, DAG);
8321 // This can be called for an i32 shift amount that needs to be promoted.
8322 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8323 "Unexpected custom legalisation");
8324 return SDValue();
8325 case ISD::FABS:
8326 case ISD::FNEG:
8327 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8328 return lowerFABSorFNEG(Op, DAG, Subtarget);
8329 [[fallthrough]];
8330 case ISD::FADD:
8331 case ISD::FSUB:
8332 case ISD::FMUL:
8333 case ISD::FDIV:
8334 case ISD::FSQRT:
8335 case ISD::FMA:
8336 case ISD::FMINNUM:
8337 case ISD::FMAXNUM:
8338 case ISD::FMINIMUMNUM:
8339 case ISD::FMAXIMUMNUM:
8340 if (isPromotedOpNeedingSplit(Op, Subtarget))
8341 return SplitVectorOp(Op, DAG);
8342 [[fallthrough]];
8343 case ISD::AVGFLOORS:
8344 case ISD::AVGFLOORU:
8345 case ISD::AVGCEILS:
8346 case ISD::AVGCEILU:
8347 case ISD::SMIN:
8348 case ISD::SMAX:
8349 case ISD::UMIN:
8350 case ISD::UMAX:
8351 case ISD::UADDSAT:
8352 case ISD::USUBSAT:
8353 case ISD::SADDSAT:
8354 case ISD::SSUBSAT:
8355 return lowerToScalableOp(Op, DAG);
8356 case ISD::ABDS:
8357 case ISD::ABDU: {
8358 SDLoc dl(Op);
8359 EVT VT = Op->getValueType(0);
8360 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8361 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8362 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8363
8364 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8365 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8366 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8367 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8368 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8369 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8370 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8371 }
8372 case ISD::ABS:
8373 case ISD::VP_ABS:
8374 return lowerABS(Op, DAG);
8375 case ISD::CTLZ:
8377 case ISD::CTTZ:
8379 if (Subtarget.hasStdExtZvbb())
8380 return lowerToScalableOp(Op, DAG);
8381 assert(Op.getOpcode() != ISD::CTTZ);
8382 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8383 case ISD::FCOPYSIGN:
8384 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8385 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8386 if (isPromotedOpNeedingSplit(Op, Subtarget))
8387 return SplitVectorOp(Op, DAG);
8388 return lowerToScalableOp(Op, DAG);
8389 case ISD::STRICT_FADD:
8390 case ISD::STRICT_FSUB:
8391 case ISD::STRICT_FMUL:
8392 case ISD::STRICT_FDIV:
8393 case ISD::STRICT_FSQRT:
8394 case ISD::STRICT_FMA:
8395 if (isPromotedOpNeedingSplit(Op, Subtarget))
8396 return SplitStrictFPVectorOp(Op, DAG);
8397 return lowerToScalableOp(Op, DAG);
8398 case ISD::STRICT_FSETCC:
8400 return lowerVectorStrictFSetcc(Op, DAG);
8401 case ISD::STRICT_FCEIL:
8402 case ISD::STRICT_FRINT:
8403 case ISD::STRICT_FFLOOR:
8404 case ISD::STRICT_FTRUNC:
8406 case ISD::STRICT_FROUND:
8408 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8409 case ISD::MGATHER:
8410 case ISD::VP_GATHER:
8411 return lowerMaskedGather(Op, DAG);
8412 case ISD::MSCATTER:
8413 case ISD::VP_SCATTER:
8414 return lowerMaskedScatter(Op, DAG);
8415 case ISD::GET_ROUNDING:
8416 return lowerGET_ROUNDING(Op, DAG);
8417 case ISD::SET_ROUNDING:
8418 return lowerSET_ROUNDING(Op, DAG);
8419 case ISD::GET_FPENV:
8420 return lowerGET_FPENV(Op, DAG);
8421 case ISD::SET_FPENV:
8422 return lowerSET_FPENV(Op, DAG);
8423 case ISD::RESET_FPENV:
8424 return lowerRESET_FPENV(Op, DAG);
8425 case ISD::GET_FPMODE:
8426 return lowerGET_FPMODE(Op, DAG);
8427 case ISD::SET_FPMODE:
8428 return lowerSET_FPMODE(Op, DAG);
8429 case ISD::RESET_FPMODE:
8430 return lowerRESET_FPMODE(Op, DAG);
8431 case ISD::EH_DWARF_CFA:
8432 return lowerEH_DWARF_CFA(Op, DAG);
8433 case ISD::VP_MERGE:
8434 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8435 return lowerVPMergeMask(Op, DAG);
8436 [[fallthrough]];
8437 case ISD::VP_SELECT:
8438 case ISD::VP_ADD:
8439 case ISD::VP_SUB:
8440 case ISD::VP_MUL:
8441 case ISD::VP_SDIV:
8442 case ISD::VP_UDIV:
8443 case ISD::VP_SREM:
8444 case ISD::VP_UREM:
8445 case ISD::VP_UADDSAT:
8446 case ISD::VP_USUBSAT:
8447 case ISD::VP_SADDSAT:
8448 case ISD::VP_SSUBSAT:
8449 case ISD::VP_LRINT:
8450 case ISD::VP_LLRINT:
8451 return lowerVPOp(Op, DAG);
8452 case ISD::VP_AND:
8453 case ISD::VP_OR:
8454 case ISD::VP_XOR:
8455 return lowerLogicVPOp(Op, DAG);
8456 case ISD::VP_FADD:
8457 case ISD::VP_FSUB:
8458 case ISD::VP_FMUL:
8459 case ISD::VP_FDIV:
8460 case ISD::VP_FNEG:
8461 case ISD::VP_FABS:
8462 case ISD::VP_SQRT:
8463 case ISD::VP_FMA:
8464 case ISD::VP_FMINNUM:
8465 case ISD::VP_FMAXNUM:
8466 case ISD::VP_FCOPYSIGN:
8467 if (isPromotedOpNeedingSplit(Op, Subtarget))
8468 return SplitVPOp(Op, DAG);
8469 [[fallthrough]];
8470 case ISD::VP_SRA:
8471 case ISD::VP_SRL:
8472 case ISD::VP_SHL:
8473 return lowerVPOp(Op, DAG);
8474 case ISD::VP_IS_FPCLASS:
8475 return LowerIS_FPCLASS(Op, DAG);
8476 case ISD::VP_SIGN_EXTEND:
8477 case ISD::VP_ZERO_EXTEND:
8478 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8479 return lowerVPExtMaskOp(Op, DAG);
8480 return lowerVPOp(Op, DAG);
8481 case ISD::VP_TRUNCATE:
8482 return lowerVectorTruncLike(Op, DAG);
8483 case ISD::VP_FP_EXTEND:
8484 case ISD::VP_FP_ROUND:
8485 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8486 case ISD::VP_SINT_TO_FP:
8487 case ISD::VP_UINT_TO_FP:
8488 if (Op.getValueType().isVector() &&
8489 ((Op.getValueType().getScalarType() == MVT::f16 &&
8490 (Subtarget.hasVInstructionsF16Minimal() &&
8491 !Subtarget.hasVInstructionsF16())) ||
8492 Op.getValueType().getScalarType() == MVT::bf16)) {
8493 if (isPromotedOpNeedingSplit(Op, Subtarget))
8494 return SplitVectorOp(Op, DAG);
8495 // int -> f32
8496 SDLoc DL(Op);
8497 MVT NVT =
8498 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8499 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8500 // f32 -> [b]f16
8501 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8502 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8503 }
8504 [[fallthrough]];
8505 case ISD::VP_FP_TO_SINT:
8506 case ISD::VP_FP_TO_UINT:
8507 if (SDValue Op1 = Op.getOperand(0);
8508 Op1.getValueType().isVector() &&
8509 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8510 (Subtarget.hasVInstructionsF16Minimal() &&
8511 !Subtarget.hasVInstructionsF16())) ||
8512 Op1.getValueType().getScalarType() == MVT::bf16)) {
8513 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8514 return SplitVectorOp(Op, DAG);
8515 // [b]f16 -> f32
8516 SDLoc DL(Op);
8517 MVT NVT = MVT::getVectorVT(MVT::f32,
8518 Op1.getValueType().getVectorElementCount());
8519 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8520 // f32 -> int
8521 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
8522 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
8523 }
8524 return lowerVPFPIntConvOp(Op, DAG);
8525 case ISD::VP_SETCC:
8526 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8527 return SplitVPOp(Op, DAG);
8528 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8529 return lowerVPSetCCMaskOp(Op, DAG);
8530 [[fallthrough]];
8531 case ISD::VP_SMIN:
8532 case ISD::VP_SMAX:
8533 case ISD::VP_UMIN:
8534 case ISD::VP_UMAX:
8535 case ISD::VP_BITREVERSE:
8536 case ISD::VP_BSWAP:
8537 return lowerVPOp(Op, DAG);
8538 case ISD::VP_CTLZ:
8539 case ISD::VP_CTLZ_ZERO_UNDEF:
8540 if (Subtarget.hasStdExtZvbb())
8541 return lowerVPOp(Op, DAG);
8542 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8543 case ISD::VP_CTTZ:
8544 case ISD::VP_CTTZ_ZERO_UNDEF:
8545 if (Subtarget.hasStdExtZvbb())
8546 return lowerVPOp(Op, DAG);
8547 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8548 case ISD::VP_CTPOP:
8549 return lowerVPOp(Op, DAG);
8550 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8551 return lowerVPStridedLoad(Op, DAG);
8552 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8553 return lowerVPStridedStore(Op, DAG);
8554 case ISD::VP_FCEIL:
8555 case ISD::VP_FFLOOR:
8556 case ISD::VP_FRINT:
8557 case ISD::VP_FNEARBYINT:
8558 case ISD::VP_FROUND:
8559 case ISD::VP_FROUNDEVEN:
8560 case ISD::VP_FROUNDTOZERO:
8561 if (isPromotedOpNeedingSplit(Op, Subtarget))
8562 return SplitVPOp(Op, DAG);
8563 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8564 case ISD::VP_FMAXIMUM:
8565 case ISD::VP_FMINIMUM:
8566 if (isPromotedOpNeedingSplit(Op, Subtarget))
8567 return SplitVPOp(Op, DAG);
8568 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8569 case ISD::EXPERIMENTAL_VP_SPLICE:
8570 return lowerVPSpliceExperimental(Op, DAG);
8571 case ISD::EXPERIMENTAL_VP_REVERSE:
8572 return lowerVPReverseExperimental(Op, DAG);
8573 case ISD::EXPERIMENTAL_VP_SPLAT:
8574 return lowerVPSplatExperimental(Op, DAG);
8575 case ISD::CLEAR_CACHE: {
8576 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8577 "llvm.clear_cache only needs custom lower on Linux targets");
8578 SDLoc DL(Op);
8579 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8580 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
8581 Op.getOperand(2), Flags, DL);
8582 }
8583 case ISD::DYNAMIC_STACKALLOC:
8584 return lowerDYNAMIC_STACKALLOC(Op, DAG);
8585 case ISD::INIT_TRAMPOLINE:
8586 return lowerINIT_TRAMPOLINE(Op, DAG);
8587 case ISD::ADJUST_TRAMPOLINE:
8588 return lowerADJUST_TRAMPOLINE(Op, DAG);
8589 case ISD::PARTIAL_REDUCE_UMLA:
8590 case ISD::PARTIAL_REDUCE_SMLA:
8591 case ISD::PARTIAL_REDUCE_SUMLA:
8592 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
8593 }
8594}
8595
8596SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
8597 SDValue Start, SDValue End,
8598 SDValue Flags, SDLoc DL) const {
8599 MakeLibCallOptions CallOptions;
8600 std::pair<SDValue, SDValue> CallResult =
8601 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
8602 {Start, End, Flags}, CallOptions, DL, InChain);
8603
8604 // This function returns void so only the out chain matters.
8605 return CallResult.second;
8606}
8607
8608SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
8609 SelectionDAG &DAG) const {
8610 if (!Subtarget.is64Bit())
8611 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8612
8613 // Create an MCCodeEmitter to encode instructions.
8614 TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
8615 assert(TLO);
8616 MCContext &MCCtx = TLO->getContext();
8617
8618 std::unique_ptr<MCCodeEmitter> CodeEmitter(
8619 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
8620
8621 SDValue Root = Op.getOperand(0);
8622 SDValue Trmp = Op.getOperand(1); // trampoline
8623 SDLoc dl(Op);
8624
8625 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
8626
8627 // We store in the trampoline buffer the following instructions and data.
8628 // Offset:
8629 // 0: auipc t2, 0
8630 // 4: ld t0, 24(t2)
8631 // 8: ld t2, 16(t2)
8632 // 12: jalr t0
8633 // 16: <StaticChainOffset>
8634 // 24: <FunctionAddressOffset>
8635 // 32:
8636 // Offset with branch control flow protection enabled:
8637 // 0: lpad <imm20>
8638 // 4: auipc t3, 0
8639 // 8: ld t2, 28(t3)
8640 // 12: ld t3, 20(t3)
8641 // 16: jalr t2
8642 // 20: <StaticChainOffset>
8643 // 28: <FunctionAddressOffset>
8644 // 36:
8645
8646 const bool HasCFBranch =
8647 Subtarget.hasStdExtZicfilp() &&
8649 "cf-protection-branch");
8650 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
8651 const unsigned StaticChainOffset = StaticChainIdx * 4;
8652 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
8653
8654 const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
8655 assert(STI);
8656 auto GetEncoding = [&](const MCInst &MC) {
8659 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
8660 uint32_t Encoding = support::endian::read32le(CB.data());
8661 return Encoding;
8662 };
8663
8664 SmallVector<SDValue> OutChains;
8665
8666 SmallVector<uint32_t> Encodings;
8667 if (!HasCFBranch) {
8668 Encodings.append(
8669 {// auipc t2, 0
8670 // Loads the current PC into t2.
8671 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
8672 // ld t0, 24(t2)
8673 // Loads the function address into t0. Note that we are using offsets
8674 // pc-relative to the first instruction of the trampoline.
8675 GetEncoding(MCInstBuilder(RISCV::LD)
8676 .addReg(RISCV::X5)
8677 .addReg(RISCV::X7)
8678 .addImm(FunctionAddressOffset)),
8679 // ld t2, 16(t2)
8680 // Load the value of the static chain.
8681 GetEncoding(MCInstBuilder(RISCV::LD)
8682 .addReg(RISCV::X7)
8683 .addReg(RISCV::X7)
8684 .addImm(StaticChainOffset)),
8685 // jalr t0
8686 // Jump to the function.
8687 GetEncoding(MCInstBuilder(RISCV::JALR)
8688 .addReg(RISCV::X0)
8689 .addReg(RISCV::X5)
8690 .addImm(0))});
8691 } else {
8692 Encodings.append(
8693 {// auipc x0, <imm20> (lpad <imm20>)
8694 // Landing pad.
8695 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
8696 // auipc t3, 0
8697 // Loads the current PC into t3.
8698 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
8699 // ld t2, (FunctionAddressOffset - 4)(t3)
8700 // Loads the function address into t2. Note that we are using offsets
8701 // pc-relative to the SECOND instruction of the trampoline.
8702 GetEncoding(MCInstBuilder(RISCV::LD)
8703 .addReg(RISCV::X7)
8704 .addReg(RISCV::X28)
8705 .addImm(FunctionAddressOffset - 4)),
8706 // ld t3, (StaticChainOffset - 4)(t3)
8707 // Load the value of the static chain.
8708 GetEncoding(MCInstBuilder(RISCV::LD)
8709 .addReg(RISCV::X28)
8710 .addReg(RISCV::X28)
8711 .addImm(StaticChainOffset - 4)),
8712 // jalr t2
8713 // Software-guarded jump to the function.
8714 GetEncoding(MCInstBuilder(RISCV::JALR)
8715 .addReg(RISCV::X0)
8716 .addReg(RISCV::X7)
8717 .addImm(0))});
8718 }
8719
8720 // Store encoded instructions.
8721 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
8722 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8723 DAG.getConstant(Idx * 4, dl, MVT::i64))
8724 : Trmp;
8725 OutChains.push_back(DAG.getTruncStore(
8726 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
8727 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
8728 }
8729
8730 // Now store the variable part of the trampoline.
8731 SDValue FunctionAddress = Op.getOperand(2);
8732 SDValue StaticChain = Op.getOperand(3);
8733
8734 // Store the given static chain and function pointer in the trampoline buffer.
8735 struct OffsetValuePair {
8736 const unsigned Offset;
8737 const SDValue Value;
8738 SDValue Addr = SDValue(); // Used to cache the address.
8739 } OffsetValues[] = {
8740 {StaticChainOffset, StaticChain},
8741 {FunctionAddressOffset, FunctionAddress},
8742 };
8743 for (auto &OffsetValue : OffsetValues) {
8744 SDValue Addr =
8745 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8746 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
8747 OffsetValue.Addr = Addr;
8748 OutChains.push_back(
8749 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
8750 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
8751 }
8752
8753 assert(OutChains.size() == StaticChainIdx + 2 &&
8754 "Size of OutChains mismatch");
8755 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
8756
8757 // The end of instructions of trampoline is the same as the static chain
8758 // address that we computed earlier.
8759 SDValue EndOfTrmp = OffsetValues[0].Addr;
8760
8761 // Call clear cache on the trampoline instructions.
8762 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
8763 Trmp, EndOfTrmp);
8764
8765 return Chain;
8766}
8767
8768SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
8769 SelectionDAG &DAG) const {
8770 if (!Subtarget.is64Bit())
8771 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8772
8773 return Op.getOperand(0);
8774}
8775
8776SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
8777 SelectionDAG &DAG) const {
8778 // Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
8779 // TODO: There are many other sub-cases we could potentially lower, are
8780 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
8781 SDLoc DL(Op);
8782 MVT VT = Op.getSimpleValueType();
8783 SDValue Accum = Op.getOperand(0);
8784 assert(Accum.getSimpleValueType() == VT &&
8785 VT.getVectorElementType() == MVT::i32);
8786 SDValue A = Op.getOperand(1);
8787 SDValue B = Op.getOperand(2);
8788 MVT ArgVT = A.getSimpleValueType();
8789 assert(ArgVT == B.getSimpleValueType() &&
8790 ArgVT.getVectorElementType() == MVT::i8);
8791 (void)ArgVT;
8792
8793 // The zvqdotq pseudos are defined with sources and destination both
8794 // being i32. This cast is needed for correctness to avoid incorrect
8795 // .vx matching of i8 splats.
8796 A = DAG.getBitcast(VT, A);
8797 B = DAG.getBitcast(VT, B);
8798
8799 MVT ContainerVT = VT;
8800 if (VT.isFixedLengthVector()) {
8801 ContainerVT = getContainerForFixedLengthVector(VT);
8802 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
8803 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
8804 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
8805 }
8806
8807 unsigned Opc;
8808 switch (Op.getOpcode()) {
8809 case ISD::PARTIAL_REDUCE_SMLA:
8810 Opc = RISCVISD::VQDOT_VL;
8811 break;
8812 case ISD::PARTIAL_REDUCE_UMLA:
8813 Opc = RISCVISD::VQDOTU_VL;
8814 break;
8815 case ISD::PARTIAL_REDUCE_SUMLA:
8816 Opc = RISCVISD::VQDOTSU_VL;
8817 break;
8818 default:
8819 llvm_unreachable("Unexpected opcode");
8820 }
8821 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8822 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
8823 if (VT.isFixedLengthVector())
8824 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
8825 return Res;
8826}
8827
8829 SelectionDAG &DAG, unsigned Flags) {
8830 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
8831}
8832
8834 SelectionDAG &DAG, unsigned Flags) {
8835 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
8836 Flags);
8837}
8838
8840 SelectionDAG &DAG, unsigned Flags) {
8841 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
8842 N->getOffset(), Flags);
8843}
8844
8846 SelectionDAG &DAG, unsigned Flags) {
8847 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
8848}
8849
8851 EVT Ty, SelectionDAG &DAG) {
8853 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8854 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8855 return DAG.getLoad(
8856 Ty, DL, DAG.getEntryNode(), LC,
8858}
8859
8861 EVT Ty, SelectionDAG &DAG) {
8863 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
8864 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8865 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8866 return DAG.getLoad(
8867 Ty, DL, DAG.getEntryNode(), LC,
8869}
8870
8871template <class NodeTy>
8872SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
8873 bool IsLocal, bool IsExternWeak) const {
8874 SDLoc DL(N);
8875 EVT Ty = getPointerTy(DAG.getDataLayout());
8876
8877 // When HWASAN is used and tagging of global variables is enabled
8878 // they should be accessed via the GOT, since the tagged address of a global
8879 // is incompatible with existing code models. This also applies to non-pic
8880 // mode.
8881 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
8882 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8883 if (IsLocal && !Subtarget.allowTaggedGlobals())
8884 // Use PC-relative addressing to access the symbol. This generates the
8885 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
8886 // %pcrel_lo(auipc)).
8887 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8888
8889 // Use PC-relative addressing to access the GOT for this symbol, then load
8890 // the address from the GOT. This generates the pattern (PseudoLGA sym),
8891 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8892 SDValue Load =
8893 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8894 MachineFunction &MF = DAG.getMachineFunction();
8895 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8899 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8900 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8901 return Load;
8902 }
8903
8904 switch (getTargetMachine().getCodeModel()) {
8905 default:
8906 reportFatalUsageError("Unsupported code model for lowering");
8907 case CodeModel::Small: {
8908 // Generate a sequence for accessing addresses within the first 2 GiB of
8909 // address space.
8910 if (Subtarget.hasVendorXqcili()) {
8911 // Use QC.E.LI to generate the address, as this is easier to relax than
8912 // LUI/ADDI.
8913 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8914 return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
8915 }
8916
8917 // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
8918 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
8919 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
8920 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8921 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
8922 }
8923 case CodeModel::Medium: {
8924 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8925 if (IsExternWeak) {
8926 // An extern weak symbol may be undefined, i.e. have value 0, which may
8927 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
8928 // symbol. This generates the pattern (PseudoLGA sym), which expands to
8929 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8930 SDValue Load =
8931 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8932 MachineFunction &MF = DAG.getMachineFunction();
8933 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8937 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8938 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8939 return Load;
8940 }
8941
8942 // Generate a sequence for accessing addresses within any 2GiB range within
8943 // the address space. This generates the pattern (PseudoLLA sym), which
8944 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8945 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8946 }
8947 case CodeModel::Large: {
8948 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8949 return getLargeGlobalAddress(G, DL, Ty, DAG);
8950
8951 // Using pc-relative mode for other node type.
8952 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8953 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8954 }
8955 }
8956}
8957
8958SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8959 SelectionDAG &DAG) const {
8960 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8961 assert(N->getOffset() == 0 && "unexpected offset in global node");
8962 const GlobalValue *GV = N->getGlobal();
8963 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8964}
8965
8966SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8967 SelectionDAG &DAG) const {
8968 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8969
8970 return getAddr(N, DAG);
8971}
8972
8973SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8974 SelectionDAG &DAG) const {
8975 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8976
8977 return getAddr(N, DAG);
8978}
8979
8980SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8981 SelectionDAG &DAG) const {
8982 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8983
8984 return getAddr(N, DAG);
8985}
8986
8987SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8988 SelectionDAG &DAG,
8989 bool UseGOT) const {
8990 SDLoc DL(N);
8991 EVT Ty = getPointerTy(DAG.getDataLayout());
8992 const GlobalValue *GV = N->getGlobal();
8993 MVT XLenVT = Subtarget.getXLenVT();
8994
8995 if (UseGOT) {
8996 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8997 // load the address from the GOT and add the thread pointer. This generates
8998 // the pattern (PseudoLA_TLS_IE sym), which expands to
8999 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
9000 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9001 SDValue Load =
9002 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
9003 MachineFunction &MF = DAG.getMachineFunction();
9004 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9008 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9009 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9010
9011 // Add the thread pointer.
9012 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9013 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
9014 }
9015
9016 // Generate a sequence for accessing the address relative to the thread
9017 // pointer, with the appropriate adjustment for the thread pointer offset.
9018 // This generates the pattern
9019 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
9020 SDValue AddrHi =
9022 SDValue AddrAdd =
9024 SDValue AddrLo =
9026
9027 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9028 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9029 SDValue MNAdd =
9030 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
9031 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
9032}
9033
9034SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
9035 SelectionDAG &DAG) const {
9036 SDLoc DL(N);
9037 EVT Ty = getPointerTy(DAG.getDataLayout());
9038 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
9039 const GlobalValue *GV = N->getGlobal();
9040
9041 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9042 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
9043 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
9044 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9045 SDValue Load =
9046 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
9047
9048 // Prepare argument list to generate call.
9050 Args.emplace_back(Load, CallTy);
9051
9052 // Setup call to __tls_get_addr.
9053 TargetLowering::CallLoweringInfo CLI(DAG);
9054 CLI.setDebugLoc(DL)
9055 .setChain(DAG.getEntryNode())
9056 .setLibCallee(CallingConv::C, CallTy,
9057 DAG.getExternalSymbol("__tls_get_addr", Ty),
9058 std::move(Args));
9059
9060 return LowerCallTo(CLI).first;
9061}
9062
9063SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
9064 SelectionDAG &DAG) const {
9065 SDLoc DL(N);
9066 EVT Ty = getPointerTy(DAG.getDataLayout());
9067 const GlobalValue *GV = N->getGlobal();
9068
9069 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9070 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
9071 //
9072 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
9073 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
9074 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
9075 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
9076 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9077 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
9078}
9079
9080SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
9081 SelectionDAG &DAG) const {
9082 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9083 assert(N->getOffset() == 0 && "unexpected offset in global node");
9084
9085 if (DAG.getTarget().useEmulatedTLS())
9086 return LowerToTLSEmulatedModel(N, DAG);
9087
9089
9092 reportFatalUsageError("In GHC calling convention TLS is not supported");
9093
9094 SDValue Addr;
9095 switch (Model) {
9097 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9098 break;
9100 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9101 break;
9104 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9105 : getDynamicTLSAddr(N, DAG);
9106 break;
9107 }
9108
9109 return Addr;
9110}
9111
9112// Return true if Val is equal to (setcc LHS, RHS, CC).
9113// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9114// Otherwise, return std::nullopt.
9115static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9116 ISD::CondCode CC, SDValue Val) {
9117 assert(Val->getOpcode() == ISD::SETCC);
9118 SDValue LHS2 = Val.getOperand(0);
9119 SDValue RHS2 = Val.getOperand(1);
9120 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9121
9122 if (LHS == LHS2 && RHS == RHS2) {
9123 if (CC == CC2)
9124 return true;
9125 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9126 return false;
9127 } else if (LHS == RHS2 && RHS == LHS2) {
9129 if (CC == CC2)
9130 return true;
9131 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9132 return false;
9133 }
9134
9135 return std::nullopt;
9136}
9137
9139 return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
9140}
9141
9143 const RISCVSubtarget &Subtarget) {
9144 SDValue CondV = N->getOperand(0);
9145 SDValue TrueV = N->getOperand(1);
9146 SDValue FalseV = N->getOperand(2);
9147 MVT VT = N->getSimpleValueType(0);
9148 SDLoc DL(N);
9149
9150 if (!Subtarget.hasConditionalMoveFusion()) {
9151 // (select c, -1, y) -> -c | y
9152 if (isAllOnesConstant(TrueV)) {
9153 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9154 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9155 }
9156 // (select c, y, -1) -> (c-1) | y
9157 if (isAllOnesConstant(FalseV)) {
9158 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9159 DAG.getAllOnesConstant(DL, VT));
9160 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9161 }
9162
9163 const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
9164
9165 // (select c, 0, y) -> (c-1) & y
9166 if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
9167 SDValue Neg =
9168 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
9169 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9170 }
9171 if (isNullConstant(FalseV)) {
9172 // (select c, (1 << ShAmount) + 1, 0) -> (c << ShAmount) + c
9173 if (auto *TrueC = dyn_cast<ConstantSDNode>(TrueV)) {
9174 uint64_t TrueM1 = TrueC->getZExtValue() - 1;
9175 if (isPowerOf2_64(TrueM1)) {
9176 unsigned ShAmount = Log2_64(TrueM1);
9177 if (Subtarget.hasShlAdd(ShAmount))
9178 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, CondV,
9179 DAG.getConstant(ShAmount, DL, VT), CondV);
9180 }
9181 }
9182 // (select c, y, 0) -> -c & y
9183 if (!HasCZero || isSimm12Constant(TrueV)) {
9184 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9185 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9186 }
9187 }
9188 }
9189
9190 // select c, ~x, x --> xor -c, x
9191 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9192 const APInt &TrueVal = TrueV->getAsAPIntVal();
9193 const APInt &FalseVal = FalseV->getAsAPIntVal();
9194 if (~TrueVal == FalseVal) {
9195 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9196 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9197 }
9198 }
9199
9200 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9201 // when both truev and falsev are also setcc.
9202 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9203 FalseV.getOpcode() == ISD::SETCC) {
9204 SDValue LHS = CondV.getOperand(0);
9205 SDValue RHS = CondV.getOperand(1);
9206 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9207
9208 // (select x, x, y) -> x | y
9209 // (select !x, x, y) -> x & y
9210 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9211 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9212 DAG.getFreeze(FalseV));
9213 }
9214 // (select x, y, x) -> x & y
9215 // (select !x, y, x) -> x | y
9216 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9217 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9218 DAG.getFreeze(TrueV), FalseV);
9219 }
9220 }
9221
9222 return SDValue();
9223}
9224
9225// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9226// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9227// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9228// being `0` or `-1`. In such cases we can replace `select` with `and`.
9229// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9230// than `c0`?
9231static SDValue
9233 const RISCVSubtarget &Subtarget) {
9234 if (Subtarget.hasShortForwardBranchOpt())
9235 return SDValue();
9236
9237 unsigned SelOpNo = 0;
9238 SDValue Sel = BO->getOperand(0);
9239 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9240 SelOpNo = 1;
9241 Sel = BO->getOperand(1);
9242 }
9243
9244 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9245 return SDValue();
9246
9247 unsigned ConstSelOpNo = 1;
9248 unsigned OtherSelOpNo = 2;
9249 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9250 ConstSelOpNo = 2;
9251 OtherSelOpNo = 1;
9252 }
9253 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9254 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9255 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9256 return SDValue();
9257
9258 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9259 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9260 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9261 return SDValue();
9262
9263 SDLoc DL(Sel);
9264 EVT VT = BO->getValueType(0);
9265
9266 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9267 if (SelOpNo == 1)
9268 std::swap(NewConstOps[0], NewConstOps[1]);
9269
9270 SDValue NewConstOp =
9271 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9272 if (!NewConstOp)
9273 return SDValue();
9274
9275 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9276 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9277 return SDValue();
9278
9279 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9280 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9281 if (SelOpNo == 1)
9282 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9283 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9284
9285 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9286 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9287 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9288}
9289
9290SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9291 SDValue CondV = Op.getOperand(0);
9292 SDValue TrueV = Op.getOperand(1);
9293 SDValue FalseV = Op.getOperand(2);
9294 SDLoc DL(Op);
9295 MVT VT = Op.getSimpleValueType();
9296 MVT XLenVT = Subtarget.getXLenVT();
9297
9298 // Lower vector SELECTs to VSELECTs by splatting the condition.
9299 if (VT.isVector()) {
9300 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9301 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9302 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9303 }
9304
9305 // Try some other optimizations before falling back to generic lowering.
9306 if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
9307 return V;
9308
9309 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9310 // nodes to implement the SELECT. Performing the lowering here allows for
9311 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9312 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9313 if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
9314
9315 // (select c, t, 0) -> (czero_eqz t, c)
9316 if (isNullConstant(FalseV))
9317 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9318 // (select c, 0, f) -> (czero_nez f, c)
9319 if (isNullConstant(TrueV))
9320 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9321
9322 // Check to see if a given operation is a 'NOT', if so return the negated
9323 // operand
9324 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9325 using namespace llvm::SDPatternMatch;
9326 SDValue Xor;
9327 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9328 return Xor;
9329 }
9330 return std::nullopt;
9331 };
9332 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9333 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9334 if (TrueV.getOpcode() == ISD::AND &&
9335 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9336 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9337 ? getNotOperand(TrueV.getOperand(1))
9338 : getNotOperand(TrueV.getOperand(0));
9339 if (NotOperand) {
9340 SDValue CMOV =
9341 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9342 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9343 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
9344 }
9345 return DAG.getNode(
9346 ISD::OR, DL, VT, TrueV,
9347 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
9348 }
9349
9350 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9351 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
9352 if (FalseV.getOpcode() == ISD::AND &&
9353 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
9354 auto NotOperand = (FalseV.getOperand(0) == TrueV)
9355 ? getNotOperand(FalseV.getOperand(1))
9356 : getNotOperand(FalseV.getOperand(0));
9357 if (NotOperand) {
9358 SDValue CMOV =
9359 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
9360 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9361 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
9362 }
9363 return DAG.getNode(
9364 ISD::OR, DL, VT, FalseV,
9365 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
9366 }
9367
9368 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9369 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9370 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9371 const APInt &TrueVal = TrueV->getAsAPIntVal();
9372 const APInt &FalseVal = FalseV->getAsAPIntVal();
9373
9374 // Prefer these over Zicond to avoid materializing an immediate:
9375 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9376 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9377 if (CondV.getOpcode() == ISD::SETCC &&
9378 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
9379 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9380 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
9381 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
9382 int64_t TrueImm = TrueVal.getSExtValue();
9383 int64_t FalseImm = FalseVal.getSExtValue();
9384 if (CCVal == ISD::SETGT)
9385 std::swap(TrueImm, FalseImm);
9386 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
9387 isInt<12>(TrueImm - FalseImm)) {
9388 SDValue SRA =
9389 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
9390 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
9391 SDValue AND =
9392 DAG.getNode(ISD::AND, DL, VT, SRA,
9393 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
9394 return DAG.getNode(ISD::ADD, DL, VT, AND,
9395 DAG.getSignedConstant(FalseImm, DL, VT));
9396 }
9397 }
9398 }
9399
9400 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
9401 // a constant in register
9402 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
9403 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
9404 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9405 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
9406 }
9407 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9408 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9409 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9410 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9411 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9412 }
9413
9414 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
9415 const int DeltaCost = RISCVMatInt::getIntMatCost(
9416 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9417 // Does the addend fold into an ADDI
9418 if (Addend.isSignedIntN(12))
9419 return DeltaCost;
9420 const int AddendCost = RISCVMatInt::getIntMatCost(
9421 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9422 return AddendCost + DeltaCost;
9423 };
9424 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
9425 getCost(TrueVal - FalseVal, FalseVal);
9426 SDValue LHSVal = DAG.getConstant(
9427 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9428 SDValue CMOV =
9429 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9430 DL, VT, LHSVal, CondV);
9431 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
9432 }
9433
9434 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9435 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9436 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
9437 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
9438 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9439 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9440 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
9441 // Fall back to XORI if Const == -0x800
9442 if (RawConstVal == -0x800) {
9443 SDValue XorOp = DAG.getNode(ISD::XOR, DL, VT, RegV, ConstVal);
9444 SDValue CMOV =
9445 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9446 DL, VT, XorOp, CondV);
9447 return DAG.getNode(ISD::XOR, DL, VT, CMOV, ConstVal);
9448 }
9449 // Efficient only if the constant and its negation fit into `ADDI`
9450 // Prefer Add/Sub over Xor since can be compressed for small immediates
9451 if (isInt<12>(RawConstVal)) {
9452 SDValue SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
9453 SDValue CMOV =
9454 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9455 DL, VT, SubOp, CondV);
9456 return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
9457 }
9458 }
9459
9460 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9461 // Unless we have the short forward branch optimization.
9462 if (!Subtarget.hasConditionalMoveFusion())
9463 return DAG.getNode(
9464 ISD::OR, DL, VT,
9465 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
9466 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
9468 }
9469
9470 if (Op.hasOneUse()) {
9471 unsigned UseOpc = Op->user_begin()->getOpcode();
9472 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
9473 SDNode *BinOp = *Op->user_begin();
9474 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
9475 DAG, Subtarget)) {
9476 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
9477 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9478 // may return a constant node and cause crash in lowerSELECT.
9479 if (NewSel.getOpcode() == ISD::SELECT)
9480 return lowerSELECT(NewSel, DAG);
9481 return NewSel;
9482 }
9483 }
9484 }
9485
9486 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9487 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9488 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
9489 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
9490 if (FPTV && FPFV) {
9491 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
9492 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
9493 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
9494 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
9495 DAG.getConstant(1, DL, XLenVT));
9496 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
9497 }
9498 }
9499
9500 // If the condition is not an integer SETCC which operates on XLenVT, we need
9501 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9502 // (select condv, truev, falsev)
9503 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9504 if (CondV.getOpcode() != ISD::SETCC ||
9505 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
9506 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9507 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
9508
9509 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9510
9511 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9512 }
9513
9514 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9515 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9516 // advantage of the integer compare+branch instructions. i.e.:
9517 // (select (setcc lhs, rhs, cc), truev, falsev)
9518 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9519 SDValue LHS = CondV.getOperand(0);
9520 SDValue RHS = CondV.getOperand(1);
9521 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9522
9523 // Special case for a select of 2 constants that have a difference of 1.
9524 // Normally this is done by DAGCombine, but if the select is introduced by
9525 // type legalization or op legalization, we miss it. Restricting to SETLT
9526 // case for now because that is what signed saturating add/sub need.
9527 // FIXME: We don't need the condition to be SETLT or even a SETCC,
9528 // but we would probably want to swap the true/false values if the condition
9529 // is SETGE/SETLE to avoid an XORI.
9530 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
9531 CCVal == ISD::SETLT) {
9532 const APInt &TrueVal = TrueV->getAsAPIntVal();
9533 const APInt &FalseVal = FalseV->getAsAPIntVal();
9534 if (TrueVal - 1 == FalseVal)
9535 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
9536 if (TrueVal + 1 == FalseVal)
9537 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
9538 }
9539
9540 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9541 // 1 < x ? x : 1 -> 0 < x ? x : 1
9542 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
9543 RHS == TrueV && LHS == FalseV) {
9544 LHS = DAG.getConstant(0, DL, VT);
9545 // 0 <u x is the same as x != 0.
9546 if (CCVal == ISD::SETULT) {
9547 std::swap(LHS, RHS);
9548 CCVal = ISD::SETNE;
9549 }
9550 }
9551
9552 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
9553 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
9554 RHS == FalseV) {
9555 RHS = DAG.getConstant(0, DL, VT);
9556 }
9557
9558 SDValue TargetCC = DAG.getCondCode(CCVal);
9559
9560 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
9561 // (select (setcc lhs, rhs, CC), constant, falsev)
9562 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
9563 std::swap(TrueV, FalseV);
9564 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
9565 }
9566
9567 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
9568 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9569}
9570
9571SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
9572 SDValue CondV = Op.getOperand(1);
9573 SDLoc DL(Op);
9574 MVT XLenVT = Subtarget.getXLenVT();
9575
9576 if (CondV.getOpcode() == ISD::SETCC &&
9577 CondV.getOperand(0).getValueType() == XLenVT) {
9578 SDValue LHS = CondV.getOperand(0);
9579 SDValue RHS = CondV.getOperand(1);
9580 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9581
9582 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9583
9584 SDValue TargetCC = DAG.getCondCode(CCVal);
9585 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9586 LHS, RHS, TargetCC, Op.getOperand(2));
9587 }
9588
9589 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9590 CondV, DAG.getConstant(0, DL, XLenVT),
9591 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
9592}
9593
9594SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
9595 MachineFunction &MF = DAG.getMachineFunction();
9596 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
9597
9598 SDLoc DL(Op);
9599 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
9601
9602 // vastart just stores the address of the VarArgsFrameIndex slot into the
9603 // memory location argument.
9604 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9605 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
9606 MachinePointerInfo(SV));
9607}
9608
9609SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
9610 SelectionDAG &DAG) const {
9611 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9612 MachineFunction &MF = DAG.getMachineFunction();
9613 MachineFrameInfo &MFI = MF.getFrameInfo();
9614 MFI.setFrameAddressIsTaken(true);
9615 Register FrameReg = RI.getFrameRegister(MF);
9616 int XLenInBytes = Subtarget.getXLen() / 8;
9617
9618 EVT VT = Op.getValueType();
9619 SDLoc DL(Op);
9620 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
9621 unsigned Depth = Op.getConstantOperandVal(0);
9622 while (Depth--) {
9623 int Offset = -(XLenInBytes * 2);
9624 SDValue Ptr = DAG.getNode(
9625 ISD::ADD, DL, VT, FrameAddr,
9627 FrameAddr =
9628 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
9629 }
9630 return FrameAddr;
9631}
9632
9633SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
9634 SelectionDAG &DAG) const {
9635 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9636 MachineFunction &MF = DAG.getMachineFunction();
9637 MachineFrameInfo &MFI = MF.getFrameInfo();
9638 MFI.setReturnAddressIsTaken(true);
9639 MVT XLenVT = Subtarget.getXLenVT();
9640 int XLenInBytes = Subtarget.getXLen() / 8;
9641
9642 EVT VT = Op.getValueType();
9643 SDLoc DL(Op);
9644 unsigned Depth = Op.getConstantOperandVal(0);
9645 if (Depth) {
9646 int Off = -XLenInBytes;
9647 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
9648 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
9649 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
9650 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
9651 MachinePointerInfo());
9652 }
9653
9654 // Return the value of the return address register, marking it an implicit
9655 // live-in.
9656 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
9657 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
9658}
9659
9660SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
9661 SelectionDAG &DAG) const {
9662 SDLoc DL(Op);
9663 SDValue Lo = Op.getOperand(0);
9664 SDValue Hi = Op.getOperand(1);
9665 SDValue Shamt = Op.getOperand(2);
9666 EVT VT = Lo.getValueType();
9667
9668 // if Shamt-XLEN < 0: // Shamt < XLEN
9669 // Lo = Lo << Shamt
9670 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
9671 // else:
9672 // Lo = 0
9673 // Hi = Lo << (Shamt-XLEN)
9674
9675 SDValue Zero = DAG.getConstant(0, DL, VT);
9676 SDValue One = DAG.getConstant(1, DL, VT);
9677 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9678 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9679 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9680 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9681
9682 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
9683 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
9684 SDValue ShiftRightLo =
9685 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
9686 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
9687 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
9688 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
9689
9690 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9691
9692 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
9693 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9694
9695 SDValue Parts[2] = {Lo, Hi};
9696 return DAG.getMergeValues(Parts, DL);
9697}
9698
9699SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
9700 bool IsSRA) const {
9701 SDLoc DL(Op);
9702 SDValue Lo = Op.getOperand(0);
9703 SDValue Hi = Op.getOperand(1);
9704 SDValue Shamt = Op.getOperand(2);
9705 EVT VT = Lo.getValueType();
9706
9707 // SRA expansion:
9708 // if Shamt-XLEN < 0: // Shamt < XLEN
9709 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9710 // Hi = Hi >>s Shamt
9711 // else:
9712 // Lo = Hi >>s (Shamt-XLEN);
9713 // Hi = Hi >>s (XLEN-1)
9714 //
9715 // SRL expansion:
9716 // if Shamt-XLEN < 0: // Shamt < XLEN
9717 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9718 // Hi = Hi >>u Shamt
9719 // else:
9720 // Lo = Hi >>u (Shamt-XLEN);
9721 // Hi = 0;
9722
9723 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
9724
9725 SDValue Zero = DAG.getConstant(0, DL, VT);
9726 SDValue One = DAG.getConstant(1, DL, VT);
9727 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9728 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9729 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9730 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9731
9732 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
9733 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
9734 SDValue ShiftLeftHi =
9735 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
9736 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
9737 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
9738 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
9739 SDValue HiFalse =
9740 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
9741
9742 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9743
9744 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
9745 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9746
9747 SDValue Parts[2] = {Lo, Hi};
9748 return DAG.getMergeValues(Parts, DL);
9749}
9750
9751// Lower splats of i1 types to SETCC. For each mask vector type, we have a
9752// legal equivalently-sized i8 type, so we can use that as a go-between.
9753SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
9754 SelectionDAG &DAG) const {
9755 SDLoc DL(Op);
9756 MVT VT = Op.getSimpleValueType();
9757 SDValue SplatVal = Op.getOperand(0);
9758 // All-zeros or all-ones splats are handled specially.
9759 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
9760 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9761 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
9762 }
9763 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
9764 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9765 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
9766 }
9767 MVT InterVT = VT.changeVectorElementType(MVT::i8);
9768 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
9769 DAG.getConstant(1, DL, SplatVal.getValueType()));
9770 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
9771 SDValue Zero = DAG.getConstant(0, DL, InterVT);
9772 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
9773}
9774
9775// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
9776// illegal (currently only vXi64 RV32).
9777// FIXME: We could also catch non-constant sign-extended i32 values and lower
9778// them to VMV_V_X_VL.
9779SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
9780 SelectionDAG &DAG) const {
9781 SDLoc DL(Op);
9782 MVT VecVT = Op.getSimpleValueType();
9783 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
9784 "Unexpected SPLAT_VECTOR_PARTS lowering");
9785
9786 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
9787 SDValue Lo = Op.getOperand(0);
9788 SDValue Hi = Op.getOperand(1);
9789
9790 MVT ContainerVT = VecVT;
9791 if (VecVT.isFixedLengthVector())
9792 ContainerVT = getContainerForFixedLengthVector(VecVT);
9793
9794 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9795
9796 SDValue Res =
9797 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
9798
9799 if (VecVT.isFixedLengthVector())
9800 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
9801
9802 return Res;
9803}
9804
9805// Custom-lower extensions from mask vectors by using a vselect either with 1
9806// for zero/any-extension or -1 for sign-extension:
9807// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
9808// Note that any-extension is lowered identically to zero-extension.
9809SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
9810 int64_t ExtTrueVal) const {
9811 SDLoc DL(Op);
9812 MVT VecVT = Op.getSimpleValueType();
9813 SDValue Src = Op.getOperand(0);
9814 // Only custom-lower extensions from mask types
9815 assert(Src.getValueType().isVector() &&
9816 Src.getValueType().getVectorElementType() == MVT::i1);
9817
9818 if (VecVT.isScalableVector()) {
9819 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
9820 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
9821 if (Src.getOpcode() == ISD::XOR &&
9822 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
9823 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
9824 SplatTrueVal);
9825 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
9826 }
9827
9828 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
9829 MVT I1ContainerVT =
9830 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
9831
9832 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
9833
9834 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9835
9836 MVT XLenVT = Subtarget.getXLenVT();
9837 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9838 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
9839
9840 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9841 SDValue Xor = Src.getOperand(0);
9842 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
9843 SDValue ScalableOnes = Xor.getOperand(1);
9844 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
9845 ScalableOnes.getOperand(0).isUndef() &&
9847 ScalableOnes.getOperand(1).getNode())) {
9848 CC = Xor.getOperand(0);
9849 std::swap(SplatZero, SplatTrueVal);
9850 }
9851 }
9852 }
9853
9854 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9855 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9856 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9857 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
9858 SDValue Select =
9859 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
9860 SplatZero, DAG.getUNDEF(ContainerVT), VL);
9861
9862 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
9863}
9864
9865// Custom-lower truncations from vectors to mask vectors by using a mask and a
9866// setcc operation:
9867// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
9868SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
9869 SelectionDAG &DAG) const {
9870 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
9871 SDLoc DL(Op);
9872 EVT MaskVT = Op.getValueType();
9873 // Only expect to custom-lower truncations to mask types
9874 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
9875 "Unexpected type for vector mask lowering");
9876 SDValue Src = Op.getOperand(0);
9877 MVT VecVT = Src.getSimpleValueType();
9878 SDValue Mask, VL;
9879 if (IsVPTrunc) {
9880 Mask = Op.getOperand(1);
9881 VL = Op.getOperand(2);
9882 }
9883 // If this is a fixed vector, we need to convert it to a scalable vector.
9884 MVT ContainerVT = VecVT;
9885
9886 if (VecVT.isFixedLengthVector()) {
9887 ContainerVT = getContainerForFixedLengthVector(VecVT);
9888 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9889 if (IsVPTrunc) {
9890 MVT MaskContainerVT =
9891 getContainerForFixedLengthVector(Mask.getSimpleValueType());
9892 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
9893 }
9894 }
9895
9896 if (!IsVPTrunc) {
9897 std::tie(Mask, VL) =
9898 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9899 }
9900
9901 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
9902 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
9903
9904 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9905 DAG.getUNDEF(ContainerVT), SplatOne, VL);
9906 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9907 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9908
9909 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
9910 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
9911 DAG.getUNDEF(ContainerVT), Mask, VL);
9912 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
9913 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
9914 DAG.getUNDEF(MaskContainerVT), Mask, VL});
9915 if (MaskVT.isFixedLengthVector())
9916 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
9917 return Trunc;
9918}
9919
9920SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
9921 SelectionDAG &DAG) const {
9922 unsigned Opc = Op.getOpcode();
9923 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
9924 SDLoc DL(Op);
9925
9926 MVT VT = Op.getSimpleValueType();
9927 // Only custom-lower vector truncates
9928 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9929
9930 // Truncates to mask types are handled differently
9931 if (VT.getVectorElementType() == MVT::i1)
9932 return lowerVectorMaskTruncLike(Op, DAG);
9933
9934 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
9935 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
9936 // truncate by one power of two at a time.
9937 MVT DstEltVT = VT.getVectorElementType();
9938
9939 SDValue Src = Op.getOperand(0);
9940 MVT SrcVT = Src.getSimpleValueType();
9941 MVT SrcEltVT = SrcVT.getVectorElementType();
9942
9943 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
9944 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
9945 "Unexpected vector truncate lowering");
9946
9947 MVT ContainerVT = SrcVT;
9948 SDValue Mask, VL;
9949 if (IsVPTrunc) {
9950 Mask = Op.getOperand(1);
9951 VL = Op.getOperand(2);
9952 }
9953 if (SrcVT.isFixedLengthVector()) {
9954 ContainerVT = getContainerForFixedLengthVector(SrcVT);
9955 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9956 if (IsVPTrunc) {
9957 MVT MaskVT = getMaskTypeFor(ContainerVT);
9958 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9959 }
9960 }
9961
9962 SDValue Result = Src;
9963 if (!IsVPTrunc) {
9964 std::tie(Mask, VL) =
9965 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9966 }
9967
9968 unsigned NewOpc;
9970 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
9971 else if (Opc == ISD::TRUNCATE_USAT_U)
9972 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
9973 else
9974 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
9975
9976 do {
9977 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
9978 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
9979 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
9980 } while (SrcEltVT != DstEltVT);
9981
9982 if (SrcVT.isFixedLengthVector())
9983 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9984
9985 return Result;
9986}
9987
9988SDValue
9989RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
9990 SelectionDAG &DAG) const {
9991 SDLoc DL(Op);
9992 SDValue Chain = Op.getOperand(0);
9993 SDValue Src = Op.getOperand(1);
9994 MVT VT = Op.getSimpleValueType();
9995 MVT SrcVT = Src.getSimpleValueType();
9996 MVT ContainerVT = VT;
9997 if (VT.isFixedLengthVector()) {
9998 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9999 ContainerVT =
10000 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10001 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10002 }
10003
10004 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10005
10006 // RVV can only widen/truncate fp to types double/half the size as the source.
10007 if ((VT.getVectorElementType() == MVT::f64 &&
10008 (SrcVT.getVectorElementType() == MVT::f16 ||
10009 SrcVT.getVectorElementType() == MVT::bf16)) ||
10010 ((VT.getVectorElementType() == MVT::f16 ||
10011 VT.getVectorElementType() == MVT::bf16) &&
10012 SrcVT.getVectorElementType() == MVT::f64)) {
10013 // For double rounding, the intermediate rounding should be round-to-odd.
10014 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10015 ? RISCVISD::STRICT_FP_EXTEND_VL
10016 : RISCVISD::STRICT_VFNCVT_ROD_VL;
10017 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10018 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
10019 Chain, Src, Mask, VL);
10020 Chain = Src.getValue(1);
10021 }
10022
10023 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10024 ? RISCVISD::STRICT_FP_EXTEND_VL
10025 : RISCVISD::STRICT_FP_ROUND_VL;
10026 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
10027 Chain, Src, Mask, VL);
10028 if (VT.isFixedLengthVector()) {
10029 // StrictFP operations have two result values. Their lowered result should
10030 // have same result count.
10031 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10032 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10033 }
10034 return Res;
10035}
10036
10037SDValue
10038RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
10039 SelectionDAG &DAG) const {
10040 bool IsVP =
10041 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
10042 bool IsExtend =
10043 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
10044 // RVV can only do truncate fp to types half the size as the source. We
10045 // custom-lower f64->f16 rounds via RVV's round-to-odd float
10046 // conversion instruction.
10047 SDLoc DL(Op);
10048 MVT VT = Op.getSimpleValueType();
10049
10050 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10051
10052 SDValue Src = Op.getOperand(0);
10053 MVT SrcVT = Src.getSimpleValueType();
10054
10055 bool IsDirectExtend =
10056 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
10057 (SrcVT.getVectorElementType() != MVT::f16 &&
10058 SrcVT.getVectorElementType() != MVT::bf16));
10059 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
10060 VT.getVectorElementType() != MVT::bf16) ||
10061 SrcVT.getVectorElementType() != MVT::f64);
10062
10063 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
10064
10065 // We have regular SD node patterns for direct non-VL extends.
10066 if (VT.isScalableVector() && IsDirectConv && !IsVP)
10067 return Op;
10068
10069 // Prepare any fixed-length vector operands.
10070 MVT ContainerVT = VT;
10071 SDValue Mask, VL;
10072 if (IsVP) {
10073 Mask = Op.getOperand(1);
10074 VL = Op.getOperand(2);
10075 }
10076 if (VT.isFixedLengthVector()) {
10077 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10078 ContainerVT =
10079 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10080 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10081 if (IsVP) {
10082 MVT MaskVT = getMaskTypeFor(ContainerVT);
10083 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10084 }
10085 }
10086
10087 if (!IsVP)
10088 std::tie(Mask, VL) =
10089 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10090
10091 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
10092
10093 if (IsDirectConv) {
10094 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
10095 if (VT.isFixedLengthVector())
10096 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
10097 return Src;
10098 }
10099
10100 unsigned InterConvOpc =
10101 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
10102
10103 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10104 SDValue IntermediateConv =
10105 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
10106 SDValue Result =
10107 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10108 if (VT.isFixedLengthVector())
10109 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10110 return Result;
10111}
10112
10113// Given a scalable vector type and an index into it, returns the type for the
10114// smallest subvector that the index fits in. This can be used to reduce LMUL
10115// for operations like vslidedown.
10116//
10117// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10118static std::optional<MVT>
10119getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10120 const RISCVSubtarget &Subtarget) {
10121 assert(VecVT.isScalableVector());
10122 const unsigned EltSize = VecVT.getScalarSizeInBits();
10123 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10124 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10125 MVT SmallerVT;
10126 if (MaxIdx < MinVLMAX)
10127 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10128 else if (MaxIdx < MinVLMAX * 2)
10129 SmallerVT =
10131 else if (MaxIdx < MinVLMAX * 4)
10132 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10135 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10136 return std::nullopt;
10137 return SmallerVT;
10138}
10139
10141 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10142 if (!IdxC || isNullConstant(Idx))
10143 return false;
10144 return isUInt<5>(IdxC->getZExtValue());
10145}
10146
10147// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10148// first position of a vector, and that vector is slid up to the insert index.
10149// By limiting the active vector length to index+1 and merging with the
10150// original vector (with an undisturbed tail policy for elements >= VL), we
10151// achieve the desired result of leaving all elements untouched except the one
10152// at VL-1, which is replaced with the desired value.
10153SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10154 SelectionDAG &DAG) const {
10155 SDLoc DL(Op);
10156 MVT VecVT = Op.getSimpleValueType();
10157 MVT XLenVT = Subtarget.getXLenVT();
10158 SDValue Vec = Op.getOperand(0);
10159 SDValue Val = Op.getOperand(1);
10160 MVT ValVT = Val.getSimpleValueType();
10161 SDValue Idx = Op.getOperand(2);
10162
10163 if (VecVT.getVectorElementType() == MVT::i1) {
10164 // FIXME: For now we just promote to an i8 vector and insert into that,
10165 // but this is probably not optimal.
10166 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10167 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10168 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10169 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10170 }
10171
10172 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10173 ValVT == MVT::bf16) {
10174 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10175 MVT IntVT = VecVT.changeTypeToInteger();
10176 SDValue IntInsert = DAG.getNode(
10177 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10178 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10179 return DAG.getBitcast(VecVT, IntInsert);
10180 }
10181
10182 MVT ContainerVT = VecVT;
10183 // If the operand is a fixed-length vector, convert to a scalable one.
10184 if (VecVT.isFixedLengthVector()) {
10185 ContainerVT = getContainerForFixedLengthVector(VecVT);
10186 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10187 }
10188
10189 // If we know the index we're going to insert at, we can shrink Vec so that
10190 // we're performing the scalar inserts and slideup on a smaller LMUL.
10191 SDValue OrigVec = Vec;
10192 std::optional<unsigned> AlignedIdx;
10193 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10194 const unsigned OrigIdx = IdxC->getZExtValue();
10195 // Do we know an upper bound on LMUL?
10196 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10197 DL, DAG, Subtarget)) {
10198 ContainerVT = *ShrunkVT;
10199 AlignedIdx = 0;
10200 }
10201
10202 // If we're compiling for an exact VLEN value, we can always perform
10203 // the insert in m1 as we can determine the register corresponding to
10204 // the index in the register group.
10205 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10206 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10207 EVT ElemVT = VecVT.getVectorElementType();
10208 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10209 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10210 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10211 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10212 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10213 ContainerVT = M1VT;
10214 }
10215
10216 if (AlignedIdx)
10217 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
10218 }
10219
10220 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
10221 // Even i64-element vectors on RV32 can be lowered without scalar
10222 // legalization if the most-significant 32 bits of the value are not affected
10223 // by the sign-extension of the lower 32 bits.
10224 // TODO: We could also catch sign extensions of a 32-bit value.
10225 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
10226 const auto *CVal = cast<ConstantSDNode>(Val);
10227 if (isInt<32>(CVal->getSExtValue())) {
10228 IsLegalInsert = true;
10229 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
10230 }
10231 }
10232
10233 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10234
10235 SDValue ValInVec;
10236
10237 if (IsLegalInsert) {
10238 unsigned Opc =
10239 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
10240 if (isNullConstant(Idx)) {
10241 if (!VecVT.isFloatingPoint())
10242 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10243 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
10244
10245 if (AlignedIdx)
10246 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10247 if (!VecVT.isFixedLengthVector())
10248 return Vec;
10249 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10250 }
10251
10252 // Use ri.vinsert.v.x if available.
10253 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
10255 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
10256 SDValue PolicyOp =
10258 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
10259 VL, PolicyOp);
10260 if (AlignedIdx)
10261 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10262 if (!VecVT.isFixedLengthVector())
10263 return Vec;
10264 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10265 }
10266
10267 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
10268 } else {
10269 // On RV32, i64-element vectors must be specially handled to place the
10270 // value at element 0, by using two vslide1down instructions in sequence on
10271 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10272 // this.
10273 SDValue ValLo, ValHi;
10274 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
10275 MVT I32ContainerVT =
10276 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
10277 SDValue I32Mask =
10278 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
10279 // Limit the active VL to two.
10280 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
10281 // If the Idx is 0 we can insert directly into the vector.
10282 if (isNullConstant(Idx)) {
10283 // First slide in the lo value, then the hi in above it. We use slide1down
10284 // to avoid the register group overlap constraint of vslide1up.
10285 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10286 Vec, Vec, ValLo, I32Mask, InsertI64VL);
10287 // If the source vector is undef don't pass along the tail elements from
10288 // the previous slide1down.
10289 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10290 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10291 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
10292 // Bitcast back to the right container type.
10293 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10294
10295 if (AlignedIdx)
10296 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
10297 if (!VecVT.isFixedLengthVector())
10298 return ValInVec;
10299 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
10300 }
10301
10302 // First slide in the lo value, then the hi in above it. We use slide1down
10303 // to avoid the register group overlap constraint of vslide1up.
10304 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10305 DAG.getUNDEF(I32ContainerVT),
10306 DAG.getUNDEF(I32ContainerVT), ValLo,
10307 I32Mask, InsertI64VL);
10308 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10309 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
10310 I32Mask, InsertI64VL);
10311 // Bitcast back to the right container type.
10312 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10313 }
10314
10315 // Now that the value is in a vector, slide it into position.
10316 SDValue InsertVL =
10317 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
10318
10319 // Use tail agnostic policy if Idx is the last index of Vec.
10321 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
10322 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
10324 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
10325 Idx, Mask, InsertVL, Policy);
10326
10327 if (AlignedIdx)
10328 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
10329 if (!VecVT.isFixedLengthVector())
10330 return Slideup;
10331 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
10332}
10333
10334// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10335// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10336// types this is done using VMV_X_S to allow us to glean information about the
10337// sign bits of the result.
10338SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10339 SelectionDAG &DAG) const {
10340 SDLoc DL(Op);
10341 SDValue Idx = Op.getOperand(1);
10342 SDValue Vec = Op.getOperand(0);
10343 EVT EltVT = Op.getValueType();
10344 MVT VecVT = Vec.getSimpleValueType();
10345 MVT XLenVT = Subtarget.getXLenVT();
10346
10347 if (VecVT.getVectorElementType() == MVT::i1) {
10348 // Use vfirst.m to extract the first bit.
10349 if (isNullConstant(Idx)) {
10350 MVT ContainerVT = VecVT;
10351 if (VecVT.isFixedLengthVector()) {
10352 ContainerVT = getContainerForFixedLengthVector(VecVT);
10353 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10354 }
10355 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10356 SDValue Vfirst =
10357 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
10358 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
10359 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10360 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10361 }
10362 if (VecVT.isFixedLengthVector()) {
10363 unsigned NumElts = VecVT.getVectorNumElements();
10364 if (NumElts >= 8) {
10365 MVT WideEltVT;
10366 unsigned WidenVecLen;
10367 SDValue ExtractElementIdx;
10368 SDValue ExtractBitIdx;
10369 unsigned MaxEEW = Subtarget.getELen();
10370 MVT LargestEltVT = MVT::getIntegerVT(
10371 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
10372 if (NumElts <= LargestEltVT.getSizeInBits()) {
10373 assert(isPowerOf2_32(NumElts) &&
10374 "the number of elements should be power of 2");
10375 WideEltVT = MVT::getIntegerVT(NumElts);
10376 WidenVecLen = 1;
10377 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
10378 ExtractBitIdx = Idx;
10379 } else {
10380 WideEltVT = LargestEltVT;
10381 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10382 // extract element index = index / element width
10383 ExtractElementIdx = DAG.getNode(
10384 ISD::SRL, DL, XLenVT, Idx,
10385 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
10386 // mask bit index = index % element width
10387 ExtractBitIdx = DAG.getNode(
10388 ISD::AND, DL, XLenVT, Idx,
10389 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
10390 }
10391 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
10392 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
10393 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
10394 Vec, ExtractElementIdx);
10395 // Extract the bit from GPR.
10396 SDValue ShiftRight =
10397 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
10398 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
10399 DAG.getConstant(1, DL, XLenVT));
10400 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10401 }
10402 }
10403 // Otherwise, promote to an i8 vector and extract from that.
10404 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10405 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10406 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
10407 }
10408
10409 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10410 EltVT == MVT::bf16) {
10411 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10412 MVT IntVT = VecVT.changeTypeToInteger();
10413 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
10414 SDValue IntExtract =
10415 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
10416 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
10417 }
10418
10419 // If this is a fixed vector, we need to convert it to a scalable vector.
10420 MVT ContainerVT = VecVT;
10421 if (VecVT.isFixedLengthVector()) {
10422 ContainerVT = getContainerForFixedLengthVector(VecVT);
10423 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10424 }
10425
10426 // If we're compiling for an exact VLEN value and we have a known
10427 // constant index, we can always perform the extract in m1 (or
10428 // smaller) as we can determine the register corresponding to
10429 // the index in the register group.
10430 const auto VLen = Subtarget.getRealVLen();
10431 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10432 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10433 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10434 unsigned OrigIdx = IdxC->getZExtValue();
10435 EVT ElemVT = VecVT.getVectorElementType();
10436 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10437 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10438 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10439 unsigned ExtractIdx =
10440 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10441 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
10442 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10443 ContainerVT = M1VT;
10444 }
10445
10446 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10447 // contains our index.
10448 std::optional<uint64_t> MaxIdx;
10449 if (VecVT.isFixedLengthVector())
10450 MaxIdx = VecVT.getVectorNumElements() - 1;
10451 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
10452 MaxIdx = IdxC->getZExtValue();
10453 if (MaxIdx) {
10454 if (auto SmallerVT =
10455 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
10456 ContainerVT = *SmallerVT;
10457 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
10458 }
10459 }
10460
10461 // Use ri.vextract.x.v if available.
10462 // TODO: Avoid index 0 and just use the vmv.x.s
10463 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10465 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
10466 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
10467 }
10468
10469 // If after narrowing, the required slide is still greater than LMUL2,
10470 // fallback to generic expansion and go through the stack. This is done
10471 // for a subtle reason: extracting *all* elements out of a vector is
10472 // widely expected to be linear in vector size, but because vslidedown
10473 // is linear in LMUL, performing N extracts using vslidedown becomes
10474 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10475 // seems to have the same problem (the store is linear in LMUL), but the
10476 // generic expansion *memoizes* the store, and thus for many extracts of
10477 // the same vector we end up with one store and a bunch of loads.
10478 // TODO: We don't have the same code for insert_vector_elt because we
10479 // have BUILD_VECTOR and handle the degenerate case there. Should we
10480 // consider adding an inverse BUILD_VECTOR node?
10481 MVT LMUL2VT =
10483 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
10484 return SDValue();
10485
10486 // If the index is 0, the vector is already in the right position.
10487 if (!isNullConstant(Idx)) {
10488 // Use a VL of 1 to avoid processing more elements than we need.
10489 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10490 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10491 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10492 }
10493
10494 if (!EltVT.isInteger()) {
10495 // Floating-point extracts are handled in TableGen.
10496 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
10497 }
10498
10499 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10500 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
10501}
10502
10503// Some RVV intrinsics may claim that they want an integer operand to be
10504// promoted or expanded.
10506 const RISCVSubtarget &Subtarget) {
10507 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
10508 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
10509 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10510 "Unexpected opcode");
10511
10512 if (!Subtarget.hasVInstructions())
10513 return SDValue();
10514
10515 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10516 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10517 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10518
10519 SDLoc DL(Op);
10520
10522 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10523 if (!II || !II->hasScalarOperand())
10524 return SDValue();
10525
10526 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
10527 assert(SplatOp < Op.getNumOperands());
10528
10530 SDValue &ScalarOp = Operands[SplatOp];
10531 MVT OpVT = ScalarOp.getSimpleValueType();
10532 MVT XLenVT = Subtarget.getXLenVT();
10533
10534 // If this isn't a scalar, or its type is XLenVT we're done.
10535 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10536 return SDValue();
10537
10538 // Simplest case is that the operand needs to be promoted to XLenVT.
10539 if (OpVT.bitsLT(XLenVT)) {
10540 // If the operand is a constant, sign extend to increase our chances
10541 // of being able to use a .vi instruction. ANY_EXTEND would become a
10542 // a zero extend and the simm5 check in isel would fail.
10543 // FIXME: Should we ignore the upper bits in isel instead?
10544 unsigned ExtOpc =
10546 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10547 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10548 }
10549
10550 // Use the previous operand to get the vXi64 VT. The result might be a mask
10551 // VT for compares. Using the previous operand assumes that the previous
10552 // operand will never have a smaller element size than a scalar operand and
10553 // that a widening operation never uses SEW=64.
10554 // NOTE: If this fails the below assert, we can probably just find the
10555 // element count from any operand or result and use it to construct the VT.
10556 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
10557 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
10558
10559 // The more complex case is when the scalar is larger than XLenVT.
10560 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
10561 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
10562
10563 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
10564 // instruction to sign-extend since SEW>XLEN.
10565 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
10566 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
10567 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10568 }
10569
10570 switch (IntNo) {
10571 case Intrinsic::riscv_vslide1up:
10572 case Intrinsic::riscv_vslide1down:
10573 case Intrinsic::riscv_vslide1up_mask:
10574 case Intrinsic::riscv_vslide1down_mask: {
10575 // We need to special case these when the scalar is larger than XLen.
10576 unsigned NumOps = Op.getNumOperands();
10577 bool IsMasked = NumOps == 7;
10578
10579 // Convert the vector source to the equivalent nxvXi32 vector.
10580 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
10581 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
10582 SDValue ScalarLo, ScalarHi;
10583 std::tie(ScalarLo, ScalarHi) =
10584 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
10585
10586 // Double the VL since we halved SEW.
10587 SDValue AVL = getVLOperand(Op);
10588 SDValue I32VL;
10589
10590 // Optimize for constant AVL
10591 if (isa<ConstantSDNode>(AVL)) {
10592 const auto [MinVLMAX, MaxVLMAX] =
10594
10595 uint64_t AVLInt = AVL->getAsZExtVal();
10596 if (AVLInt <= MinVLMAX) {
10597 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
10598 } else if (AVLInt >= 2 * MaxVLMAX) {
10599 // Just set vl to VLMAX in this situation
10600 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
10601 } else {
10602 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
10603 // is related to the hardware implementation.
10604 // So let the following code handle
10605 }
10606 }
10607 if (!I32VL) {
10609 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
10610 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
10611 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
10612 SDValue SETVL =
10613 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
10614 // Using vsetvli instruction to get actually used length which related to
10615 // the hardware implementation
10616 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
10617 SEW, LMUL);
10618 I32VL =
10619 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
10620 }
10621
10622 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
10623
10624 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
10625 // instructions.
10626 SDValue Passthru;
10627 if (IsMasked)
10628 Passthru = DAG.getUNDEF(I32VT);
10629 else
10630 Passthru = DAG.getBitcast(I32VT, Operands[1]);
10631
10632 if (IntNo == Intrinsic::riscv_vslide1up ||
10633 IntNo == Intrinsic::riscv_vslide1up_mask) {
10634 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10635 ScalarHi, I32Mask, I32VL);
10636 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10637 ScalarLo, I32Mask, I32VL);
10638 } else {
10639 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10640 ScalarLo, I32Mask, I32VL);
10641 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10642 ScalarHi, I32Mask, I32VL);
10643 }
10644
10645 // Convert back to nxvXi64.
10646 Vec = DAG.getBitcast(VT, Vec);
10647
10648 if (!IsMasked)
10649 return Vec;
10650 // Apply mask after the operation.
10651 SDValue Mask = Operands[NumOps - 3];
10652 SDValue MaskedOff = Operands[1];
10653 // Assume Policy operand is the last operand.
10654 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
10655 // We don't need to select maskedoff if it's undef.
10656 if (MaskedOff.isUndef())
10657 return Vec;
10658 // TAMU
10659 if (Policy == RISCVVType::TAIL_AGNOSTIC)
10660 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10661 DAG.getUNDEF(VT), AVL);
10662 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
10663 // It's fine because vmerge does not care mask policy.
10664 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10665 MaskedOff, AVL);
10666 }
10667 }
10668
10669 // We need to convert the scalar to a splat vector.
10670 SDValue VL = getVLOperand(Op);
10671 assert(VL.getValueType() == XLenVT);
10672 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
10673 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10674}
10675
10676// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
10677// scalable vector llvm.get.vector.length for now.
10678//
10679// We need to convert from a scalable VF to a vsetvli with VLMax equal to
10680// (vscale * VF). The vscale and VF are independent of element width. We use
10681// SEW=8 for the vsetvli because it is the only element width that supports all
10682// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
10683// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
10684// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
10685// SEW and LMUL are better for the surrounding vector instructions.
10687 const RISCVSubtarget &Subtarget) {
10688 MVT XLenVT = Subtarget.getXLenVT();
10689
10690 // The smallest LMUL is only valid for the smallest element width.
10691 const unsigned ElementWidth = 8;
10692
10693 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
10694 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
10695 // We don't support VF==1 with ELEN==32.
10696 [[maybe_unused]] unsigned MinVF =
10697 RISCV::RVVBitsPerBlock / Subtarget.getELen();
10698
10699 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
10700 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
10701 "Unexpected VF");
10702
10703 bool Fractional = VF < LMul1VF;
10704 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
10705 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
10706 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
10707
10708 SDLoc DL(N);
10709
10710 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
10711 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
10712
10713 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
10714
10715 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
10716 SDValue Res =
10717 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
10718 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
10719}
10720
10722 const RISCVSubtarget &Subtarget) {
10723 SDValue Op0 = N->getOperand(1);
10724 MVT OpVT = Op0.getSimpleValueType();
10725 MVT ContainerVT = OpVT;
10726 if (OpVT.isFixedLengthVector()) {
10727 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
10728 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
10729 }
10730 MVT XLenVT = Subtarget.getXLenVT();
10731 SDLoc DL(N);
10732 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
10733 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
10734 if (isOneConstant(N->getOperand(2)))
10735 return Res;
10736
10737 // Convert -1 to VL.
10738 SDValue Setcc =
10739 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
10740 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
10741 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
10742}
10743
10744static inline void promoteVCIXScalar(SDValue Op,
10746 SelectionDAG &DAG) {
10747 const RISCVSubtarget &Subtarget =
10749
10750 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10751 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10752 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10753 SDLoc DL(Op);
10754
10756 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10757 if (!II || !II->hasScalarOperand())
10758 return;
10759
10760 unsigned SplatOp = II->ScalarOperand + 1;
10761 assert(SplatOp < Op.getNumOperands());
10762
10763 SDValue &ScalarOp = Operands[SplatOp];
10764 MVT OpVT = ScalarOp.getSimpleValueType();
10765 MVT XLenVT = Subtarget.getXLenVT();
10766
10767 // The code below is partially copied from lowerVectorIntrinsicScalars.
10768 // If this isn't a scalar, or its type is XLenVT we're done.
10769 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10770 return;
10771
10772 // Manually emit promote operation for scalar operation.
10773 if (OpVT.bitsLT(XLenVT)) {
10774 unsigned ExtOpc =
10776 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10777 }
10778}
10779
10780static void processVCIXOperands(SDValue OrigOp,
10782 SelectionDAG &DAG) {
10783 promoteVCIXScalar(OrigOp, Operands, DAG);
10784 const RISCVSubtarget &Subtarget =
10786 for (SDValue &V : Operands) {
10787 EVT ValType = V.getValueType();
10788 if (ValType.isVector() && ValType.isFloatingPoint()) {
10789 MVT InterimIVT =
10790 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
10791 ValType.getVectorElementCount());
10792 V = DAG.getBitcast(InterimIVT, V);
10793 }
10794 if (ValType.isFixedLengthVector()) {
10795 MVT OpContainerVT = getContainerForFixedLengthVector(
10796 DAG, V.getSimpleValueType(), Subtarget);
10797 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
10798 }
10799 }
10800}
10801
10802// LMUL * VLEN should be greater than or equal to EGS * SEW
10803static inline bool isValidEGW(int EGS, EVT VT,
10804 const RISCVSubtarget &Subtarget) {
10805 return (Subtarget.getRealMinVLen() *
10807 EGS * VT.getScalarSizeInBits();
10808}
10809
10810SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10811 SelectionDAG &DAG) const {
10812 unsigned IntNo = Op.getConstantOperandVal(0);
10813 SDLoc DL(Op);
10814 MVT XLenVT = Subtarget.getXLenVT();
10815
10816 switch (IntNo) {
10817 default:
10818 break; // Don't custom lower most intrinsics.
10819 case Intrinsic::riscv_tuple_insert: {
10820 SDValue Vec = Op.getOperand(1);
10821 SDValue SubVec = Op.getOperand(2);
10822 SDValue Index = Op.getOperand(3);
10823
10824 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
10825 SubVec, Index);
10826 }
10827 case Intrinsic::riscv_tuple_extract: {
10828 SDValue Vec = Op.getOperand(1);
10829 SDValue Index = Op.getOperand(2);
10830
10831 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
10832 Index);
10833 }
10834 case Intrinsic::thread_pointer: {
10835 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10836 return DAG.getRegister(RISCV::X4, PtrVT);
10837 }
10838 case Intrinsic::riscv_orc_b:
10839 case Intrinsic::riscv_brev8:
10840 case Intrinsic::riscv_sha256sig0:
10841 case Intrinsic::riscv_sha256sig1:
10842 case Intrinsic::riscv_sha256sum0:
10843 case Intrinsic::riscv_sha256sum1:
10844 case Intrinsic::riscv_sm3p0:
10845 case Intrinsic::riscv_sm3p1: {
10846 unsigned Opc;
10847 switch (IntNo) {
10848 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10849 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10850 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10851 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10852 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10853 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10854 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10855 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10856 }
10857
10858 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10859 }
10860 case Intrinsic::riscv_sm4ks:
10861 case Intrinsic::riscv_sm4ed: {
10862 unsigned Opc =
10863 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10864
10865 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
10866 Op.getOperand(3));
10867 }
10868 case Intrinsic::riscv_zip:
10869 case Intrinsic::riscv_unzip: {
10870 unsigned Opc =
10871 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
10872 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10873 }
10874 case Intrinsic::riscv_mopr:
10875 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
10876 Op.getOperand(2));
10877
10878 case Intrinsic::riscv_moprr: {
10879 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
10880 Op.getOperand(2), Op.getOperand(3));
10881 }
10882 case Intrinsic::riscv_clmul:
10883 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
10884 Op.getOperand(2));
10885 case Intrinsic::riscv_clmulh:
10886 case Intrinsic::riscv_clmulr: {
10887 unsigned Opc =
10888 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
10889 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
10890 }
10891 case Intrinsic::experimental_get_vector_length:
10892 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
10893 case Intrinsic::experimental_cttz_elts:
10894 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
10895 case Intrinsic::riscv_vmv_x_s: {
10896 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
10897 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
10898 }
10899 case Intrinsic::riscv_vfmv_f_s:
10900 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
10901 case Intrinsic::riscv_vmv_v_x:
10902 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
10903 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
10904 Subtarget);
10905 case Intrinsic::riscv_vfmv_v_f:
10906 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
10907 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10908 case Intrinsic::riscv_vmv_s_x: {
10909 SDValue Scalar = Op.getOperand(2);
10910
10911 if (Scalar.getValueType().bitsLE(XLenVT)) {
10912 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
10913 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
10914 Op.getOperand(1), Scalar, Op.getOperand(3));
10915 }
10916
10917 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
10918
10919 // This is an i64 value that lives in two scalar registers. We have to
10920 // insert this in a convoluted way. First we build vXi64 splat containing
10921 // the two values that we assemble using some bit math. Next we'll use
10922 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
10923 // to merge element 0 from our splat into the source vector.
10924 // FIXME: This is probably not the best way to do this, but it is
10925 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
10926 // point.
10927 // sw lo, (a0)
10928 // sw hi, 4(a0)
10929 // vlse vX, (a0)
10930 //
10931 // vid.v vVid
10932 // vmseq.vx mMask, vVid, 0
10933 // vmerge.vvm vDest, vSrc, vVal, mMask
10934 MVT VT = Op.getSimpleValueType();
10935 SDValue Vec = Op.getOperand(1);
10936 SDValue VL = getVLOperand(Op);
10937
10938 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
10939 if (Op.getOperand(1).isUndef())
10940 return SplattedVal;
10941 SDValue SplattedIdx =
10942 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10943 DAG.getConstant(0, DL, MVT::i32), VL);
10944
10945 MVT MaskVT = getMaskTypeFor(VT);
10946 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
10947 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10948 SDValue SelectCond =
10949 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10950 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
10951 DAG.getUNDEF(MaskVT), Mask, VL});
10952 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
10953 Vec, DAG.getUNDEF(VT), VL);
10954 }
10955 case Intrinsic::riscv_vfmv_s_f:
10956 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
10957 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10958 // EGS * EEW >= 128 bits
10959 case Intrinsic::riscv_vaesdf_vv:
10960 case Intrinsic::riscv_vaesdf_vs:
10961 case Intrinsic::riscv_vaesdm_vv:
10962 case Intrinsic::riscv_vaesdm_vs:
10963 case Intrinsic::riscv_vaesef_vv:
10964 case Intrinsic::riscv_vaesef_vs:
10965 case Intrinsic::riscv_vaesem_vv:
10966 case Intrinsic::riscv_vaesem_vs:
10967 case Intrinsic::riscv_vaeskf1:
10968 case Intrinsic::riscv_vaeskf2:
10969 case Intrinsic::riscv_vaesz_vs:
10970 case Intrinsic::riscv_vsm4k:
10971 case Intrinsic::riscv_vsm4r_vv:
10972 case Intrinsic::riscv_vsm4r_vs: {
10973 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10974 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10975 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10976 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10977 return Op;
10978 }
10979 // EGS * EEW >= 256 bits
10980 case Intrinsic::riscv_vsm3c:
10981 case Intrinsic::riscv_vsm3me: {
10982 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
10983 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
10984 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
10985 return Op;
10986 }
10987 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
10988 case Intrinsic::riscv_vsha2ch:
10989 case Intrinsic::riscv_vsha2cl:
10990 case Intrinsic::riscv_vsha2ms: {
10991 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
10992 !Subtarget.hasStdExtZvknhb())
10993 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
10994 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10995 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10996 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10997 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10998 return Op;
10999 }
11000 case Intrinsic::riscv_sf_vc_v_x:
11001 case Intrinsic::riscv_sf_vc_v_i:
11002 case Intrinsic::riscv_sf_vc_v_xv:
11003 case Intrinsic::riscv_sf_vc_v_iv:
11004 case Intrinsic::riscv_sf_vc_v_vv:
11005 case Intrinsic::riscv_sf_vc_v_fv:
11006 case Intrinsic::riscv_sf_vc_v_xvv:
11007 case Intrinsic::riscv_sf_vc_v_ivv:
11008 case Intrinsic::riscv_sf_vc_v_vvv:
11009 case Intrinsic::riscv_sf_vc_v_fvv:
11010 case Intrinsic::riscv_sf_vc_v_xvw:
11011 case Intrinsic::riscv_sf_vc_v_ivw:
11012 case Intrinsic::riscv_sf_vc_v_vvw:
11013 case Intrinsic::riscv_sf_vc_v_fvw: {
11014 MVT VT = Op.getSimpleValueType();
11015
11016 SmallVector<SDValue> Operands{Op->op_values()};
11018
11019 MVT RetVT = VT;
11020 if (VT.isFixedLengthVector())
11022 else if (VT.isFloatingPoint())
11025
11026 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
11027
11028 if (VT.isFixedLengthVector())
11029 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
11030 else if (VT.isFloatingPoint())
11031 NewNode = DAG.getBitcast(VT, NewNode);
11032
11033 if (Op == NewNode)
11034 break;
11035
11036 return NewNode;
11037 }
11038 }
11039
11040 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11041}
11042
11044 unsigned Type) {
11045 SDLoc DL(Op);
11046 SmallVector<SDValue> Operands{Op->op_values()};
11047 Operands.erase(Operands.begin() + 1);
11048
11049 const RISCVSubtarget &Subtarget =
11051 MVT VT = Op.getSimpleValueType();
11052 MVT RetVT = VT;
11053 MVT FloatVT = VT;
11054
11055 if (VT.isFloatingPoint()) {
11056 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
11057 VT.getVectorElementCount());
11058 FloatVT = RetVT;
11059 }
11060 if (VT.isFixedLengthVector())
11062 Subtarget);
11063
11065
11066 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
11067 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
11068 SDValue Chain = NewNode.getValue(1);
11069
11070 if (VT.isFixedLengthVector())
11071 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
11072 if (VT.isFloatingPoint())
11073 NewNode = DAG.getBitcast(VT, NewNode);
11074
11075 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
11076
11077 return NewNode;
11078}
11079
11081 unsigned Type) {
11082 SmallVector<SDValue> Operands{Op->op_values()};
11083 Operands.erase(Operands.begin() + 1);
11085
11086 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
11087}
11088
11089static SDValue
11091 const RISCVSubtarget &Subtarget,
11092 SelectionDAG &DAG) {
11093 bool IsStrided;
11094 switch (IntNo) {
11095 case Intrinsic::riscv_seg2_load_mask:
11096 case Intrinsic::riscv_seg3_load_mask:
11097 case Intrinsic::riscv_seg4_load_mask:
11098 case Intrinsic::riscv_seg5_load_mask:
11099 case Intrinsic::riscv_seg6_load_mask:
11100 case Intrinsic::riscv_seg7_load_mask:
11101 case Intrinsic::riscv_seg8_load_mask:
11102 IsStrided = false;
11103 break;
11104 case Intrinsic::riscv_sseg2_load_mask:
11105 case Intrinsic::riscv_sseg3_load_mask:
11106 case Intrinsic::riscv_sseg4_load_mask:
11107 case Intrinsic::riscv_sseg5_load_mask:
11108 case Intrinsic::riscv_sseg6_load_mask:
11109 case Intrinsic::riscv_sseg7_load_mask:
11110 case Intrinsic::riscv_sseg8_load_mask:
11111 IsStrided = true;
11112 break;
11113 default:
11114 llvm_unreachable("unexpected intrinsic ID");
11115 };
11116
11117 static const Intrinsic::ID VlsegInts[7] = {
11118 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11119 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11120 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11121 Intrinsic::riscv_vlseg8_mask};
11122 static const Intrinsic::ID VlssegInts[7] = {
11123 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11124 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11125 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11126 Intrinsic::riscv_vlsseg8_mask};
11127
11128 SDLoc DL(Op);
11129 unsigned NF = Op->getNumValues() - 1;
11130 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11131 MVT XLenVT = Subtarget.getXLenVT();
11132 MVT VT = Op->getSimpleValueType(0);
11133 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11134 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11135 ContainerVT.getScalarSizeInBits();
11136 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11137
11138 // Operands: (chain, int_id, pointer, mask, vl) or
11139 // (chain, int_id, pointer, offset, mask, vl)
11140 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11141 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11142 MVT MaskVT = Mask.getSimpleValueType();
11143 MVT MaskContainerVT =
11144 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11145 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11146
11147 SDValue IntID = DAG.getTargetConstant(
11148 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11149 auto *Load = cast<MemIntrinsicSDNode>(Op);
11150
11151 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11153 Load->getChain(),
11154 IntID,
11155 DAG.getUNDEF(VecTupTy),
11156 Op.getOperand(2),
11157 Mask,
11158 VL,
11161 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11162 // Insert the stride operand.
11163 if (IsStrided)
11164 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11165
11166 SDValue Result =
11168 Load->getMemoryVT(), Load->getMemOperand());
11170 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11171 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11172 Result.getValue(0),
11173 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11174 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11175 }
11176 Results.push_back(Result.getValue(1));
11177 return DAG.getMergeValues(Results, DL);
11178}
11179
11180SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11181 SelectionDAG &DAG) const {
11182 unsigned IntNo = Op.getConstantOperandVal(1);
11183 switch (IntNo) {
11184 default:
11185 break;
11186 case Intrinsic::riscv_seg2_load_mask:
11187 case Intrinsic::riscv_seg3_load_mask:
11188 case Intrinsic::riscv_seg4_load_mask:
11189 case Intrinsic::riscv_seg5_load_mask:
11190 case Intrinsic::riscv_seg6_load_mask:
11191 case Intrinsic::riscv_seg7_load_mask:
11192 case Intrinsic::riscv_seg8_load_mask:
11193 case Intrinsic::riscv_sseg2_load_mask:
11194 case Intrinsic::riscv_sseg3_load_mask:
11195 case Intrinsic::riscv_sseg4_load_mask:
11196 case Intrinsic::riscv_sseg5_load_mask:
11197 case Intrinsic::riscv_sseg6_load_mask:
11198 case Intrinsic::riscv_sseg7_load_mask:
11199 case Intrinsic::riscv_sseg8_load_mask:
11200 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
11201
11202 case Intrinsic::riscv_sf_vc_v_x_se:
11203 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
11204 case Intrinsic::riscv_sf_vc_v_i_se:
11205 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
11206 case Intrinsic::riscv_sf_vc_v_xv_se:
11207 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
11208 case Intrinsic::riscv_sf_vc_v_iv_se:
11209 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
11210 case Intrinsic::riscv_sf_vc_v_vv_se:
11211 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
11212 case Intrinsic::riscv_sf_vc_v_fv_se:
11213 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
11214 case Intrinsic::riscv_sf_vc_v_xvv_se:
11215 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
11216 case Intrinsic::riscv_sf_vc_v_ivv_se:
11217 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
11218 case Intrinsic::riscv_sf_vc_v_vvv_se:
11219 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
11220 case Intrinsic::riscv_sf_vc_v_fvv_se:
11221 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
11222 case Intrinsic::riscv_sf_vc_v_xvw_se:
11223 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
11224 case Intrinsic::riscv_sf_vc_v_ivw_se:
11225 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
11226 case Intrinsic::riscv_sf_vc_v_vvw_se:
11227 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
11228 case Intrinsic::riscv_sf_vc_v_fvw_se:
11229 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
11230 }
11231
11232 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11233}
11234
11235static SDValue
11237 const RISCVSubtarget &Subtarget,
11238 SelectionDAG &DAG) {
11239 bool IsStrided;
11240 switch (IntNo) {
11241 case Intrinsic::riscv_seg2_store_mask:
11242 case Intrinsic::riscv_seg3_store_mask:
11243 case Intrinsic::riscv_seg4_store_mask:
11244 case Intrinsic::riscv_seg5_store_mask:
11245 case Intrinsic::riscv_seg6_store_mask:
11246 case Intrinsic::riscv_seg7_store_mask:
11247 case Intrinsic::riscv_seg8_store_mask:
11248 IsStrided = false;
11249 break;
11250 case Intrinsic::riscv_sseg2_store_mask:
11251 case Intrinsic::riscv_sseg3_store_mask:
11252 case Intrinsic::riscv_sseg4_store_mask:
11253 case Intrinsic::riscv_sseg5_store_mask:
11254 case Intrinsic::riscv_sseg6_store_mask:
11255 case Intrinsic::riscv_sseg7_store_mask:
11256 case Intrinsic::riscv_sseg8_store_mask:
11257 IsStrided = true;
11258 break;
11259 default:
11260 llvm_unreachable("unexpected intrinsic ID");
11261 }
11262
11263 SDLoc DL(Op);
11264 static const Intrinsic::ID VssegInts[] = {
11265 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
11266 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
11267 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
11268 Intrinsic::riscv_vsseg8_mask};
11269 static const Intrinsic::ID VsssegInts[] = {
11270 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
11271 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
11272 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
11273 Intrinsic::riscv_vssseg8_mask};
11274
11275 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
11276 // (chain, int_id, vec*, ptr, stride, mask, vl)
11277 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
11278 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11279 MVT XLenVT = Subtarget.getXLenVT();
11280 MVT VT = Op->getOperand(2).getSimpleValueType();
11281 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11282 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11283 ContainerVT.getScalarSizeInBits();
11284 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11285
11286 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11287 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11288 MVT MaskVT = Mask.getSimpleValueType();
11289 MVT MaskContainerVT =
11290 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11291 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11292
11293 SDValue IntID = DAG.getTargetConstant(
11294 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
11295 SDValue Ptr = Op->getOperand(NF + 2);
11296
11297 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
11298
11299 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11300 for (unsigned i = 0; i < NF; i++)
11301 StoredVal = DAG.getNode(
11302 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11303 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
11304 DAG, Subtarget),
11305 DAG.getTargetConstant(i, DL, MVT::i32));
11306
11308 FixedIntrinsic->getChain(),
11309 IntID,
11310 StoredVal,
11311 Ptr,
11312 Mask,
11313 VL,
11314 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11315 // Insert the stride operand.
11316 if (IsStrided)
11317 Ops.insert(std::next(Ops.begin(), 4),
11318 Op.getOperand(Op.getNumOperands() - 3));
11319
11320 return DAG.getMemIntrinsicNode(
11321 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11322 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
11323}
11324
11325SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11326 SelectionDAG &DAG) const {
11327 unsigned IntNo = Op.getConstantOperandVal(1);
11328 switch (IntNo) {
11329 default:
11330 break;
11331 case Intrinsic::riscv_seg2_store_mask:
11332 case Intrinsic::riscv_seg3_store_mask:
11333 case Intrinsic::riscv_seg4_store_mask:
11334 case Intrinsic::riscv_seg5_store_mask:
11335 case Intrinsic::riscv_seg6_store_mask:
11336 case Intrinsic::riscv_seg7_store_mask:
11337 case Intrinsic::riscv_seg8_store_mask:
11338 case Intrinsic::riscv_sseg2_store_mask:
11339 case Intrinsic::riscv_sseg3_store_mask:
11340 case Intrinsic::riscv_sseg4_store_mask:
11341 case Intrinsic::riscv_sseg5_store_mask:
11342 case Intrinsic::riscv_sseg6_store_mask:
11343 case Intrinsic::riscv_sseg7_store_mask:
11344 case Intrinsic::riscv_sseg8_store_mask:
11345 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
11346
11347 case Intrinsic::riscv_sf_vc_xv_se:
11348 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
11349 case Intrinsic::riscv_sf_vc_iv_se:
11350 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
11351 case Intrinsic::riscv_sf_vc_vv_se:
11352 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
11353 case Intrinsic::riscv_sf_vc_fv_se:
11354 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
11355 case Intrinsic::riscv_sf_vc_xvv_se:
11356 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
11357 case Intrinsic::riscv_sf_vc_ivv_se:
11358 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
11359 case Intrinsic::riscv_sf_vc_vvv_se:
11360 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
11361 case Intrinsic::riscv_sf_vc_fvv_se:
11362 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
11363 case Intrinsic::riscv_sf_vc_xvw_se:
11364 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
11365 case Intrinsic::riscv_sf_vc_ivw_se:
11366 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
11367 case Intrinsic::riscv_sf_vc_vvw_se:
11368 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
11369 case Intrinsic::riscv_sf_vc_fvw_se:
11370 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
11371 }
11372
11373 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11374}
11375
11376static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11377 switch (ISDOpcode) {
11378 default:
11379 llvm_unreachable("Unhandled reduction");
11380 case ISD::VP_REDUCE_ADD:
11381 case ISD::VECREDUCE_ADD:
11382 return RISCVISD::VECREDUCE_ADD_VL;
11383 case ISD::VP_REDUCE_UMAX:
11384 case ISD::VECREDUCE_UMAX:
11385 return RISCVISD::VECREDUCE_UMAX_VL;
11386 case ISD::VP_REDUCE_SMAX:
11387 case ISD::VECREDUCE_SMAX:
11388 return RISCVISD::VECREDUCE_SMAX_VL;
11389 case ISD::VP_REDUCE_UMIN:
11390 case ISD::VECREDUCE_UMIN:
11391 return RISCVISD::VECREDUCE_UMIN_VL;
11392 case ISD::VP_REDUCE_SMIN:
11393 case ISD::VECREDUCE_SMIN:
11394 return RISCVISD::VECREDUCE_SMIN_VL;
11395 case ISD::VP_REDUCE_AND:
11396 case ISD::VECREDUCE_AND:
11397 return RISCVISD::VECREDUCE_AND_VL;
11398 case ISD::VP_REDUCE_OR:
11399 case ISD::VECREDUCE_OR:
11400 return RISCVISD::VECREDUCE_OR_VL;
11401 case ISD::VP_REDUCE_XOR:
11402 case ISD::VECREDUCE_XOR:
11403 return RISCVISD::VECREDUCE_XOR_VL;
11404 case ISD::VP_REDUCE_FADD:
11405 return RISCVISD::VECREDUCE_FADD_VL;
11406 case ISD::VP_REDUCE_SEQ_FADD:
11407 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11408 case ISD::VP_REDUCE_FMAX:
11409 case ISD::VP_REDUCE_FMAXIMUM:
11410 return RISCVISD::VECREDUCE_FMAX_VL;
11411 case ISD::VP_REDUCE_FMIN:
11412 case ISD::VP_REDUCE_FMINIMUM:
11413 return RISCVISD::VECREDUCE_FMIN_VL;
11414 }
11415
11416}
11417
11418SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11419 SelectionDAG &DAG,
11420 bool IsVP) const {
11421 SDLoc DL(Op);
11422 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
11423 MVT VecVT = Vec.getSimpleValueType();
11424 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
11425 Op.getOpcode() == ISD::VECREDUCE_OR ||
11426 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11427 Op.getOpcode() == ISD::VP_REDUCE_AND ||
11428 Op.getOpcode() == ISD::VP_REDUCE_OR ||
11429 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11430 "Unexpected reduction lowering");
11431
11432 MVT XLenVT = Subtarget.getXLenVT();
11433
11434 MVT ContainerVT = VecVT;
11435 if (VecVT.isFixedLengthVector()) {
11436 ContainerVT = getContainerForFixedLengthVector(VecVT);
11437 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11438 }
11439
11440 SDValue Mask, VL;
11441 if (IsVP) {
11442 Mask = Op.getOperand(2);
11443 VL = Op.getOperand(3);
11444 } else {
11445 std::tie(Mask, VL) =
11446 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11447 }
11448
11449 ISD::CondCode CC;
11450 switch (Op.getOpcode()) {
11451 default:
11452 llvm_unreachable("Unhandled reduction");
11453 case ISD::VECREDUCE_AND:
11454 case ISD::VP_REDUCE_AND: {
11455 // vcpop ~x == 0
11456 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11457 if (IsVP || VecVT.isFixedLengthVector())
11458 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
11459 else
11460 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
11461 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11462 CC = ISD::SETEQ;
11463 break;
11464 }
11465 case ISD::VECREDUCE_OR:
11466 case ISD::VP_REDUCE_OR:
11467 // vcpop x != 0
11468 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11469 CC = ISD::SETNE;
11470 break;
11471 case ISD::VECREDUCE_XOR:
11472 case ISD::VP_REDUCE_XOR: {
11473 // ((vcpop x) & 1) != 0
11474 SDValue One = DAG.getConstant(1, DL, XLenVT);
11475 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11476 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
11477 CC = ISD::SETNE;
11478 break;
11479 }
11480 }
11481
11482 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11483 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
11484 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
11485
11486 if (!IsVP)
11487 return SetCC;
11488
11489 // Now include the start value in the operation.
11490 // Note that we must return the start value when no elements are operated
11491 // upon. The vcpop instructions we've emitted in each case above will return
11492 // 0 for an inactive vector, and so we've already received the neutral value:
11493 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11494 // can simply include the start value.
11495 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11496 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
11497}
11498
11499static bool isNonZeroAVL(SDValue AVL) {
11500 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
11501 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
11502 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
11503 (ImmAVL && ImmAVL->getZExtValue() >= 1);
11504}
11505
11506/// Helper to lower a reduction sequence of the form:
11507/// scalar = reduce_op vec, scalar_start
11508static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11509 SDValue StartValue, SDValue Vec, SDValue Mask,
11510 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
11511 const RISCVSubtarget &Subtarget) {
11512 const MVT VecVT = Vec.getSimpleValueType();
11513 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
11514 const MVT XLenVT = Subtarget.getXLenVT();
11515 const bool NonZeroAVL = isNonZeroAVL(VL);
11516
11517 // The reduction needs an LMUL1 input; do the splat at either LMUL1
11518 // or the original VT if fractional.
11519 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
11520 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
11521 // prove it is non-zero. For the AVL=0 case, we need the scalar to
11522 // be the result of the reduction operation.
11523 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
11524 SDValue InitialValue =
11525 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
11526 if (M1VT != InnerVT)
11527 InitialValue =
11528 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
11529 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
11531 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
11532 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
11533 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
11534}
11535
11536SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
11537 SelectionDAG &DAG) const {
11538 SDLoc DL(Op);
11539 SDValue Vec = Op.getOperand(0);
11540 EVT VecEVT = Vec.getValueType();
11541
11542 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11543
11544 // Due to ordering in legalize types we may have a vector type that needs to
11545 // be split. Do that manually so we can get down to a legal type.
11546 while (getTypeAction(*DAG.getContext(), VecEVT) ==
11548 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11549 VecEVT = Lo.getValueType();
11550 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
11551 }
11552
11553 // TODO: The type may need to be widened rather than split. Or widened before
11554 // it can be split.
11555 if (!isTypeLegal(VecEVT))
11556 return SDValue();
11557
11558 MVT VecVT = VecEVT.getSimpleVT();
11559 MVT VecEltVT = VecVT.getVectorElementType();
11560 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
11561
11562 MVT ContainerVT = VecVT;
11563 if (VecVT.isFixedLengthVector()) {
11564 ContainerVT = getContainerForFixedLengthVector(VecVT);
11565 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11566 }
11567
11568 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11569
11570 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
11571 switch (BaseOpc) {
11572 case ISD::AND:
11573 case ISD::OR:
11574 case ISD::UMAX:
11575 case ISD::UMIN:
11576 case ISD::SMAX:
11577 case ISD::SMIN:
11578 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
11579 }
11580 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
11581 Mask, VL, DL, DAG, Subtarget);
11582}
11583
11584// Given a reduction op, this function returns the matching reduction opcode,
11585// the vector SDValue and the scalar SDValue required to lower this to a
11586// RISCVISD node.
11587static std::tuple<unsigned, SDValue, SDValue>
11589 const RISCVSubtarget &Subtarget) {
11590 SDLoc DL(Op);
11591 auto Flags = Op->getFlags();
11592 unsigned Opcode = Op.getOpcode();
11593 switch (Opcode) {
11594 default:
11595 llvm_unreachable("Unhandled reduction");
11596 case ISD::VECREDUCE_FADD: {
11597 // Use positive zero if we can. It is cheaper to materialize.
11598 SDValue Zero =
11599 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
11600 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
11601 }
11602 case ISD::VECREDUCE_SEQ_FADD:
11603 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
11604 Op.getOperand(0));
11605 case ISD::VECREDUCE_FMINIMUM:
11606 case ISD::VECREDUCE_FMAXIMUM:
11607 case ISD::VECREDUCE_FMIN:
11608 case ISD::VECREDUCE_FMAX: {
11609 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
11610 unsigned RVVOpc =
11611 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
11612 ? RISCVISD::VECREDUCE_FMIN_VL
11613 : RISCVISD::VECREDUCE_FMAX_VL;
11614 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
11615 }
11616 }
11617}
11618
11619SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
11620 SelectionDAG &DAG) const {
11621 SDLoc DL(Op);
11622 MVT VecEltVT = Op.getSimpleValueType();
11623
11624 unsigned RVVOpcode;
11625 SDValue VectorVal, ScalarVal;
11626 std::tie(RVVOpcode, VectorVal, ScalarVal) =
11627 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
11628 MVT VecVT = VectorVal.getSimpleValueType();
11629
11630 MVT ContainerVT = VecVT;
11631 if (VecVT.isFixedLengthVector()) {
11632 ContainerVT = getContainerForFixedLengthVector(VecVT);
11633 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
11634 }
11635
11636 MVT ResVT = Op.getSimpleValueType();
11637 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11638 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
11639 VL, DL, DAG, Subtarget);
11640 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
11641 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
11642 return Res;
11643
11644 if (Op->getFlags().hasNoNaNs())
11645 return Res;
11646
11647 // Force output to NaN if any element is Nan.
11648 SDValue IsNan =
11649 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
11650 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
11651 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
11652 MVT XLenVT = Subtarget.getXLenVT();
11653 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
11654 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
11655 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11656 return DAG.getSelect(
11657 DL, ResVT, NoNaNs, Res,
11658 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11659}
11660
11661SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
11662 SelectionDAG &DAG) const {
11663 SDLoc DL(Op);
11664 unsigned Opc = Op.getOpcode();
11665 SDValue Start = Op.getOperand(0);
11666 SDValue Vec = Op.getOperand(1);
11667 EVT VecEVT = Vec.getValueType();
11668 MVT XLenVT = Subtarget.getXLenVT();
11669
11670 // TODO: The type may need to be widened rather than split. Or widened before
11671 // it can be split.
11672 if (!isTypeLegal(VecEVT))
11673 return SDValue();
11674
11675 MVT VecVT = VecEVT.getSimpleVT();
11676 unsigned RVVOpcode = getRVVReductionOp(Opc);
11677
11678 if (VecVT.isFixedLengthVector()) {
11679 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
11680 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11681 }
11682
11683 SDValue VL = Op.getOperand(3);
11684 SDValue Mask = Op.getOperand(2);
11685 SDValue Res =
11686 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
11687 Vec, Mask, VL, DL, DAG, Subtarget);
11688 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
11689 Op->getFlags().hasNoNaNs())
11690 return Res;
11691
11692 // Propagate NaNs.
11693 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
11694 // Check if any of the elements in Vec is NaN.
11695 SDValue IsNaN = DAG.getNode(
11696 RISCVISD::SETCC_VL, DL, PredVT,
11697 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
11698 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
11699 // Check if the start value is NaN.
11700 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
11701 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
11702 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
11703 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11704 MVT ResVT = Res.getSimpleValueType();
11705 return DAG.getSelect(
11706 DL, ResVT, NoNaNs, Res,
11707 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11708}
11709
11710SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
11711 SelectionDAG &DAG) const {
11712 SDValue Vec = Op.getOperand(0);
11713 SDValue SubVec = Op.getOperand(1);
11714 MVT VecVT = Vec.getSimpleValueType();
11715 MVT SubVecVT = SubVec.getSimpleValueType();
11716
11717 SDLoc DL(Op);
11718 MVT XLenVT = Subtarget.getXLenVT();
11719 unsigned OrigIdx = Op.getConstantOperandVal(2);
11720 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11721
11722 if (OrigIdx == 0 && Vec.isUndef())
11723 return Op;
11724
11725 // We don't have the ability to slide mask vectors up indexed by their i1
11726 // elements; the smallest we can do is i8. Often we are able to bitcast to
11727 // equivalent i8 vectors. Note that when inserting a fixed-length vector
11728 // into a scalable one, we might not necessarily have enough scalable
11729 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
11730 if (SubVecVT.getVectorElementType() == MVT::i1) {
11731 if (VecVT.getVectorMinNumElements() >= 8 &&
11732 SubVecVT.getVectorMinNumElements() >= 8) {
11733 assert(OrigIdx % 8 == 0 && "Invalid index");
11734 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11735 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11736 "Unexpected mask vector lowering");
11737 OrigIdx /= 8;
11738 SubVecVT =
11739 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11740 SubVecVT.isScalableVector());
11741 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11742 VecVT.isScalableVector());
11743 Vec = DAG.getBitcast(VecVT, Vec);
11744 SubVec = DAG.getBitcast(SubVecVT, SubVec);
11745 } else {
11746 // We can't slide this mask vector up indexed by its i1 elements.
11747 // This poses a problem when we wish to insert a scalable vector which
11748 // can't be re-expressed as a larger type. Just choose the slow path and
11749 // extend to a larger type, then truncate back down.
11750 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11751 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11752 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11753 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
11754 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
11755 Op.getOperand(2));
11756 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
11757 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
11758 }
11759 }
11760
11761 // If the subvector vector is a fixed-length type and we don't know VLEN
11762 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11763 // don't know which register of a LMUL group contains the specific subvector
11764 // as we only know the minimum register size. Therefore we must slide the
11765 // vector group up the full amount.
11766 const auto VLen = Subtarget.getRealVLen();
11767 if (SubVecVT.isFixedLengthVector() && !VLen) {
11768 MVT ContainerVT = VecVT;
11769 if (VecVT.isFixedLengthVector()) {
11770 ContainerVT = getContainerForFixedLengthVector(VecVT);
11771 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11772 }
11773
11774 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
11775
11776 SDValue Mask =
11777 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11778 // Set the vector length to only the number of elements we care about. Note
11779 // that for slideup this includes the offset.
11780 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
11781 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
11782
11783 // Use tail agnostic policy if we're inserting over Vec's tail.
11785 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
11787
11788 // If we're inserting into the lowest elements, use a tail undisturbed
11789 // vmv.v.v.
11790 if (OrigIdx == 0) {
11791 SubVec =
11792 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
11793 } else {
11794 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11795 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
11796 SlideupAmt, Mask, VL, Policy);
11797 }
11798
11799 if (VecVT.isFixedLengthVector())
11800 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11801 return DAG.getBitcast(Op.getValueType(), SubVec);
11802 }
11803
11804 MVT ContainerVecVT = VecVT;
11805 if (VecVT.isFixedLengthVector()) {
11806 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
11807 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
11808 }
11809
11810 MVT ContainerSubVecVT = SubVecVT;
11811 if (SubVecVT.isFixedLengthVector()) {
11812 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11813 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
11814 }
11815
11816 unsigned SubRegIdx;
11817 ElementCount RemIdx;
11818 // insert_subvector scales the index by vscale if the subvector is scalable,
11819 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11820 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11821 if (SubVecVT.isFixedLengthVector()) {
11822 assert(VLen);
11823 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11824 auto Decompose =
11826 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
11827 SubRegIdx = Decompose.first;
11828 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
11829 (OrigIdx % Vscale));
11830 } else {
11831 auto Decompose =
11833 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
11834 SubRegIdx = Decompose.first;
11835 RemIdx = ElementCount::getScalable(Decompose.second);
11836 }
11837
11838 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
11840 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
11841 bool ExactlyVecRegSized =
11842 Subtarget.expandVScale(SubVecVT.getSizeInBits())
11843 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
11844
11845 // 1. If the Idx has been completely eliminated and this subvector's size is
11846 // a vector register or a multiple thereof, or the surrounding elements are
11847 // undef, then this is a subvector insert which naturally aligns to a vector
11848 // register. These can easily be handled using subregister manipulation.
11849 // 2. If the subvector isn't an exact multiple of a valid register group size,
11850 // then the insertion must preserve the undisturbed elements of the register.
11851 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
11852 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
11853 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
11854 // of that LMUL=1 type back into the larger vector (resolving to another
11855 // subregister operation). See below for how our VSLIDEUP works. We go via a
11856 // LMUL=1 type to avoid allocating a large register group to hold our
11857 // subvector.
11858 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
11859 if (SubVecVT.isFixedLengthVector()) {
11860 // We may get NoSubRegister if inserting at index 0 and the subvec
11861 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
11862 if (SubRegIdx == RISCV::NoSubRegister) {
11863 assert(OrigIdx == 0);
11864 return Op;
11865 }
11866
11867 // Use a insert_subvector that will resolve to an insert subreg.
11868 assert(VLen);
11869 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11870 SDValue Insert =
11871 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
11872 if (VecVT.isFixedLengthVector())
11873 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
11874 return Insert;
11875 }
11876 return Op;
11877 }
11878
11879 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
11880 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
11881 // (in our case undisturbed). This means we can set up a subvector insertion
11882 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
11883 // size of the subvector.
11884 MVT InterSubVT = ContainerVecVT;
11885 SDValue AlignedExtract = Vec;
11886 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
11887 if (SubVecVT.isFixedLengthVector()) {
11888 assert(VLen);
11889 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
11890 }
11891 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
11892 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
11893 // Extract a subvector equal to the nearest full vector register type. This
11894 // should resolve to a EXTRACT_SUBREG instruction.
11895 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
11896 }
11897
11898 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
11899
11900 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
11901
11902 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
11903 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
11904
11905 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
11907 if (Subtarget.expandVScale(EndIndex) ==
11908 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
11910
11911 // If we're inserting into the lowest elements, use a tail undisturbed
11912 // vmv.v.v.
11913 if (RemIdx.isZero()) {
11914 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
11915 SubVec, VL);
11916 } else {
11917 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
11918
11919 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
11920 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
11921
11922 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
11923 SlideupAmt, Mask, VL, Policy);
11924 }
11925
11926 // If required, insert this subvector back into the correct vector register.
11927 // This should resolve to an INSERT_SUBREG instruction.
11928 if (ContainerVecVT.bitsGT(InterSubVT))
11929 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
11930
11931 if (VecVT.isFixedLengthVector())
11932 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11933
11934 // We might have bitcast from a mask type: cast back to the original type if
11935 // required.
11936 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
11937}
11938
11939SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
11940 SelectionDAG &DAG) const {
11941 SDValue Vec = Op.getOperand(0);
11942 MVT SubVecVT = Op.getSimpleValueType();
11943 MVT VecVT = Vec.getSimpleValueType();
11944
11945 SDLoc DL(Op);
11946 MVT XLenVT = Subtarget.getXLenVT();
11947 unsigned OrigIdx = Op.getConstantOperandVal(1);
11948 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11949
11950 // With an index of 0 this is a cast-like subvector, which can be performed
11951 // with subregister operations.
11952 if (OrigIdx == 0)
11953 return Op;
11954
11955 // We don't have the ability to slide mask vectors down indexed by their i1
11956 // elements; the smallest we can do is i8. Often we are able to bitcast to
11957 // equivalent i8 vectors. Note that when extracting a fixed-length vector
11958 // from a scalable one, we might not necessarily have enough scalable
11959 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
11960 if (SubVecVT.getVectorElementType() == MVT::i1) {
11961 if (VecVT.getVectorMinNumElements() >= 8 &&
11962 SubVecVT.getVectorMinNumElements() >= 8) {
11963 assert(OrigIdx % 8 == 0 && "Invalid index");
11964 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11965 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11966 "Unexpected mask vector lowering");
11967 OrigIdx /= 8;
11968 SubVecVT =
11969 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11970 SubVecVT.isScalableVector());
11971 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11972 VecVT.isScalableVector());
11973 Vec = DAG.getBitcast(VecVT, Vec);
11974 } else {
11975 // We can't slide this mask vector down, indexed by its i1 elements.
11976 // This poses a problem when we wish to extract a scalable vector which
11977 // can't be re-expressed as a larger type. Just choose the slow path and
11978 // extend to a larger type, then truncate back down.
11979 // TODO: We could probably improve this when extracting certain fixed
11980 // from fixed, where we can extract as i8 and shift the correct element
11981 // right to reach the desired subvector?
11982 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11983 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11984 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11985 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
11986 Op.getOperand(1));
11987 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
11988 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
11989 }
11990 }
11991
11992 const auto VLen = Subtarget.getRealVLen();
11993
11994 // If the subvector vector is a fixed-length type and we don't know VLEN
11995 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11996 // don't know which register of a LMUL group contains the specific subvector
11997 // as we only know the minimum register size. Therefore we must slide the
11998 // vector group down the full amount.
11999 if (SubVecVT.isFixedLengthVector() && !VLen) {
12000 MVT ContainerVT = VecVT;
12001 if (VecVT.isFixedLengthVector()) {
12002 ContainerVT = getContainerForFixedLengthVector(VecVT);
12003 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12004 }
12005
12006 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
12007 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
12008 if (auto ShrunkVT =
12009 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
12010 ContainerVT = *ShrunkVT;
12011 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
12012 }
12013
12014 SDValue Mask =
12015 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
12016 // Set the vector length to only the number of elements we care about. This
12017 // avoids sliding down elements we're going to discard straight away.
12018 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12019 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
12020 SDValue Slidedown =
12021 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12022 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
12023 // Now we can use a cast-like subvector extract to get the result.
12024 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12025 return DAG.getBitcast(Op.getValueType(), Slidedown);
12026 }
12027
12028 if (VecVT.isFixedLengthVector()) {
12029 VecVT = getContainerForFixedLengthVector(VecVT);
12030 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
12031 }
12032
12033 MVT ContainerSubVecVT = SubVecVT;
12034 if (SubVecVT.isFixedLengthVector())
12035 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12036
12037 unsigned SubRegIdx;
12038 ElementCount RemIdx;
12039 // extract_subvector scales the index by vscale if the subvector is scalable,
12040 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12041 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12042 if (SubVecVT.isFixedLengthVector()) {
12043 assert(VLen);
12044 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12045 auto Decompose =
12047 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12048 SubRegIdx = Decompose.first;
12049 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12050 (OrigIdx % Vscale));
12051 } else {
12052 auto Decompose =
12054 VecVT, ContainerSubVecVT, OrigIdx, TRI);
12055 SubRegIdx = Decompose.first;
12056 RemIdx = ElementCount::getScalable(Decompose.second);
12057 }
12058
12059 // If the Idx has been completely eliminated then this is a subvector extract
12060 // which naturally aligns to a vector register. These can easily be handled
12061 // using subregister manipulation. We use an extract_subvector that will
12062 // resolve to an extract subreg.
12063 if (RemIdx.isZero()) {
12064 if (SubVecVT.isFixedLengthVector()) {
12065 assert(VLen);
12066 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12067 Vec =
12068 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
12069 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
12070 }
12071 return Op;
12072 }
12073
12074 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
12075 // was > M1 then the index would need to be a multiple of VLMAX, and so would
12076 // divide exactly.
12077 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
12078 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
12079
12080 // If the vector type is an LMUL-group type, extract a subvector equal to the
12081 // nearest full vector register type.
12082 MVT InterSubVT = VecVT;
12083 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
12084 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
12085 // we should have successfully decomposed the extract into a subregister.
12086 // We use an extract_subvector that will resolve to a subreg extract.
12087 assert(SubRegIdx != RISCV::NoSubRegister);
12088 (void)SubRegIdx;
12089 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
12090 if (SubVecVT.isFixedLengthVector()) {
12091 assert(VLen);
12092 Idx /= *VLen / RISCV::RVVBitsPerBlock;
12093 }
12094 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
12095 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
12096 }
12097
12098 // Slide this vector register down by the desired number of elements in order
12099 // to place the desired subvector starting at element 0.
12100 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12101 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
12102 if (SubVecVT.isFixedLengthVector())
12103 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12104 SDValue Slidedown =
12105 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
12106 Vec, SlidedownAmt, Mask, VL);
12107
12108 // Now the vector is in the right position, extract our final subvector. This
12109 // should resolve to a COPY.
12110 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12111
12112 // We might have bitcast from a mask type: cast back to the original type if
12113 // required.
12114 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12115}
12116
12117// Widen a vector's operands to i8, then truncate its results back to the
12118// original type, typically i1. All operand and result types must be the same.
12120 SelectionDAG &DAG) {
12121 MVT VT = N.getSimpleValueType();
12122 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12124 for (SDValue Op : N->ops()) {
12125 assert(Op.getSimpleValueType() == VT &&
12126 "Operands and result must be same type");
12127 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12128 }
12129
12130 unsigned NumVals = N->getNumValues();
12131
12133 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
12134 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12135 SmallVector<SDValue, 4> TruncVals;
12136 for (unsigned I = 0; I < NumVals; I++) {
12137 TruncVals.push_back(
12138 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12139 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12140 }
12141
12142 if (TruncVals.size() > 1)
12143 return DAG.getMergeValues(TruncVals, DL);
12144 return TruncVals.front();
12145}
12146
12147SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12148 SelectionDAG &DAG) const {
12149 SDLoc DL(Op);
12150 MVT VecVT = Op.getSimpleValueType();
12151
12152 const unsigned Factor = Op->getNumValues();
12153 assert(Factor <= 8);
12154
12155 // 1 bit element vectors need to be widened to e8
12156 if (VecVT.getVectorElementType() == MVT::i1)
12157 return widenVectorOpsToi8(Op, DL, DAG);
12158
12159 // Convert to scalable vectors first.
12160 if (VecVT.isFixedLengthVector()) {
12161 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12163 for (unsigned i = 0U; i < Factor; ++i)
12164 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12165 Subtarget);
12166
12167 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12168 SDValue NewDeinterleave =
12170
12171 SmallVector<SDValue, 8> Res(Factor);
12172 for (unsigned i = 0U; i < Factor; ++i)
12173 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12174 DAG, Subtarget);
12175 return DAG.getMergeValues(Res, DL);
12176 }
12177
12178 // If concatenating would exceed LMUL=8, we need to split.
12179 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12180 (8 * RISCV::RVVBitsPerBlock)) {
12181 SmallVector<SDValue, 8> Ops(Factor * 2);
12182 for (unsigned i = 0; i != Factor; ++i) {
12183 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12184 Ops[i * 2] = OpLo;
12185 Ops[i * 2 + 1] = OpHi;
12186 }
12187
12188 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12189
12191 ArrayRef(Ops).slice(0, Factor));
12193 ArrayRef(Ops).slice(Factor, Factor));
12194
12195 SmallVector<SDValue, 8> Res(Factor);
12196 for (unsigned i = 0; i != Factor; ++i)
12197 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
12198 Hi.getValue(i));
12199
12200 return DAG.getMergeValues(Res, DL);
12201 }
12202
12203 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
12204 MVT VT = Op->getSimpleValueType(0);
12205 SDValue V1 = Op->getOperand(0);
12206 SDValue V2 = Op->getOperand(1);
12207
12208 // For fractional LMUL, check if we can use a higher LMUL
12209 // instruction to avoid a vslidedown.
12210 if (SDValue Src = foldConcatVector(V1, V2);
12211 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
12212 EVT NewVT = VT.getDoubleNumVectorElementsVT();
12213 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
12214 // Freeze the source so we can increase its use count.
12215 Src = DAG.getFreeze(Src);
12216 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
12217 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12218 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
12219 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12220 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
12221 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
12222 return DAG.getMergeValues({Even, Odd}, DL);
12223 }
12224
12225 // Freeze the sources so we can increase their use count.
12226 V1 = DAG.getFreeze(V1);
12227 V2 = DAG.getFreeze(V2);
12228 SDValue Even =
12229 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12230 SDValue Odd =
12231 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12232 return DAG.getMergeValues({Even, Odd}, DL);
12233 }
12234
12235 SmallVector<SDValue, 8> Ops(Op->op_values());
12236
12237 // Concatenate the vectors as one vector to deinterleave
12238 MVT ConcatVT =
12241 PowerOf2Ceil(Factor)));
12242 if (Ops.size() < PowerOf2Ceil(Factor))
12243 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
12244 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
12245
12246 if (Factor == 2) {
12247 // We can deinterleave through vnsrl.wi if the element type is smaller than
12248 // ELEN
12249 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12250 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
12251 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
12252 return DAG.getMergeValues({Even, Odd}, DL);
12253 }
12254
12255 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
12256 // possibly mask vector, then extract the required subvector. Doing this
12257 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
12258 // creation to be rematerialized during register allocation to reduce
12259 // register pressure if needed.
12260
12261 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
12262
12263 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
12264 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
12265 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
12266
12267 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
12268 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
12269 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
12270
12271 // vcompress the even and odd elements into two separate vectors
12272 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12273 EvenMask, DAG.getUNDEF(ConcatVT));
12274 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12275 OddMask, DAG.getUNDEF(ConcatVT));
12276
12277 // Extract the result half of the gather for even and odd
12278 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
12279 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
12280
12281 return DAG.getMergeValues({Even, Odd}, DL);
12282 }
12283
12284 // Store with unit-stride store and load it back with segmented load.
12285 MVT XLenVT = Subtarget.getXLenVT();
12286 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12287 SDValue Passthru = DAG.getUNDEF(ConcatVT);
12288
12289 // Allocate a stack slot.
12290 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12292 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
12293 auto &MF = DAG.getMachineFunction();
12294 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12295 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12296
12297 SDValue StoreOps[] = {DAG.getEntryNode(),
12298 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
12299 Concat, StackPtr, VL};
12300
12301 SDValue Chain = DAG.getMemIntrinsicNode(
12302 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
12303 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12305
12306 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
12307 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
12308 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
12309 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
12310 Intrinsic::riscv_vlseg8_mask};
12311
12312 SDValue LoadOps[] = {
12313 Chain,
12314 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
12315 Passthru,
12316 StackPtr,
12317 Mask,
12318 VL,
12321 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
12322
12323 unsigned Sz =
12324 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12325 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12326
12328 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
12329 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12331
12332 SmallVector<SDValue, 8> Res(Factor);
12333
12334 for (unsigned i = 0U; i < Factor; ++i)
12335 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
12336 DAG.getTargetConstant(i, DL, MVT::i32));
12337
12338 return DAG.getMergeValues(Res, DL);
12339}
12340
12341SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
12342 SelectionDAG &DAG) const {
12343 SDLoc DL(Op);
12344 MVT VecVT = Op.getSimpleValueType();
12345
12346 const unsigned Factor = Op.getNumOperands();
12347 assert(Factor <= 8);
12348
12349 // i1 vectors need to be widened to i8
12350 if (VecVT.getVectorElementType() == MVT::i1)
12351 return widenVectorOpsToi8(Op, DL, DAG);
12352
12353 // Convert to scalable vectors first.
12354 if (VecVT.isFixedLengthVector()) {
12355 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12357 for (unsigned i = 0U; i < Factor; ++i)
12358 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12359 Subtarget);
12360
12361 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12362 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
12363
12364 SmallVector<SDValue, 8> Res(Factor);
12365 for (unsigned i = 0U; i < Factor; ++i)
12366 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
12367 Subtarget);
12368 return DAG.getMergeValues(Res, DL);
12369 }
12370
12371 MVT XLenVT = Subtarget.getXLenVT();
12372 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12373
12374 // If the VT is larger than LMUL=8, we need to split and reassemble.
12375 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12376 (8 * RISCV::RVVBitsPerBlock)) {
12377 SmallVector<SDValue, 8> Ops(Factor * 2);
12378 for (unsigned i = 0; i != Factor; ++i) {
12379 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12380 Ops[i] = OpLo;
12381 Ops[i + Factor] = OpHi;
12382 }
12383
12384 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12385
12386 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
12387 ArrayRef(Ops).take_front(Factor)),
12389 ArrayRef(Ops).drop_front(Factor))};
12390
12391 SmallVector<SDValue, 8> Concats(Factor);
12392 for (unsigned i = 0; i != Factor; ++i) {
12393 unsigned IdxLo = 2 * i;
12394 unsigned IdxHi = 2 * i + 1;
12395 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
12396 Res[IdxLo / Factor].getValue(IdxLo % Factor),
12397 Res[IdxHi / Factor].getValue(IdxHi % Factor));
12398 }
12399
12400 return DAG.getMergeValues(Concats, DL);
12401 }
12402
12403 SDValue Interleaved;
12404
12405 // Spill to the stack using a segment store for simplicity.
12406 if (Factor != 2) {
12407 EVT MemVT =
12409 VecVT.getVectorElementCount() * Factor);
12410
12411 // Allocate a stack slot.
12412 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12414 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12415 EVT PtrVT = StackPtr.getValueType();
12416 auto &MF = DAG.getMachineFunction();
12417 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12418 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12419
12420 static const Intrinsic::ID IntrIds[] = {
12421 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12422 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12423 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12424 Intrinsic::riscv_vsseg8_mask,
12425 };
12426
12427 unsigned Sz =
12428 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12429 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12430
12431 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12432 for (unsigned i = 0; i < Factor; i++)
12433 StoredVal =
12434 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12435 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
12436
12437 SDValue Ops[] = {DAG.getEntryNode(),
12438 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
12439 StoredVal,
12440 StackPtr,
12441 Mask,
12442 VL,
12444 DL, XLenVT)};
12445
12446 SDValue Chain = DAG.getMemIntrinsicNode(
12447 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12448 VecVT.getVectorElementType(), PtrInfo, Alignment,
12450
12451 SmallVector<SDValue, 8> Loads(Factor);
12452
12454 DAG.getVScale(DL, PtrVT,
12455 APInt(PtrVT.getFixedSizeInBits(),
12456 VecVT.getStoreSize().getKnownMinValue()));
12457 for (unsigned i = 0; i != Factor; ++i) {
12458 if (i != 0)
12459 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
12460
12461 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12462 }
12463
12464 return DAG.getMergeValues(Loads, DL);
12465 }
12466
12467 // Use ri.vzip2{a,b} if available
12468 // TODO: Figure out the best lowering for the spread variants
12469 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
12470 !Op.getOperand(1).isUndef()) {
12471 // Freeze the sources so we can increase their use count.
12472 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
12473 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
12474 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12475 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12476 return DAG.getMergeValues({Lo, Hi}, DL);
12477 }
12478
12479 // If the element type is smaller than ELEN, then we can interleave with
12480 // vwaddu.vv and vwmaccu.vx
12481 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12482 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
12483 DAG, Subtarget);
12484 } else {
12485 // Otherwise, fallback to using vrgathere16.vv
12486 MVT ConcatVT =
12489 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
12490 Op.getOperand(0), Op.getOperand(1));
12491
12492 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
12493
12494 // 0 1 2 3 4 5 6 7 ...
12495 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
12496
12497 // 1 1 1 1 1 1 1 1 ...
12498 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
12499
12500 // 1 0 1 0 1 0 1 0 ...
12501 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
12502 OddMask = DAG.getSetCC(
12503 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
12504 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
12506
12507 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
12508
12509 // Build up the index vector for interleaving the concatenated vector
12510 // 0 0 1 1 2 2 3 3 ...
12511 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
12512 // 0 n 1 n+1 2 n+2 3 n+3 ...
12513 Idx =
12514 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
12515
12516 // Then perform the interleave
12517 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
12518 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
12519 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
12520 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
12521 }
12522
12523 // Extract the two halves from the interleaved result
12524 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
12525 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
12526 VecVT.getVectorMinNumElements());
12527
12528 return DAG.getMergeValues({Lo, Hi}, DL);
12529}
12530
12531// Lower step_vector to the vid instruction. Any non-identity step value must
12532// be accounted for my manual expansion.
12533SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
12534 SelectionDAG &DAG) const {
12535 SDLoc DL(Op);
12536 MVT VT = Op.getSimpleValueType();
12537 assert(VT.isScalableVector() && "Expected scalable vector");
12538 MVT XLenVT = Subtarget.getXLenVT();
12539 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
12540 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
12541 uint64_t StepValImm = Op.getConstantOperandVal(0);
12542 if (StepValImm != 1) {
12543 if (isPowerOf2_64(StepValImm)) {
12544 SDValue StepVal =
12545 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
12546 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
12547 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
12548 } else {
12549 SDValue StepVal = lowerScalarSplat(
12550 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
12551 VL, VT, DL, DAG, Subtarget);
12552 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
12553 }
12554 }
12555 return StepVec;
12556}
12557
12558// Implement vector_reverse using vrgather.vv with indices determined by
12559// subtracting the id of each element from (VLMAX-1). This will convert
12560// the indices like so:
12561// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
12562// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12563SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
12564 SelectionDAG &DAG) const {
12565 SDLoc DL(Op);
12566 MVT VecVT = Op.getSimpleValueType();
12567 if (VecVT.getVectorElementType() == MVT::i1) {
12568 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
12569 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
12570 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
12571 return DAG.getSetCC(DL, VecVT, Op2,
12572 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
12573 }
12574
12575 MVT ContainerVT = VecVT;
12576 SDValue Vec = Op.getOperand(0);
12577 if (VecVT.isFixedLengthVector()) {
12578 ContainerVT = getContainerForFixedLengthVector(VecVT);
12579 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12580 }
12581
12582 MVT XLenVT = Subtarget.getXLenVT();
12583 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12584
12585 // On some uarchs vrgather.vv will read from every input register for each
12586 // output register, regardless of the indices. However to reverse a vector
12587 // each output register only needs to read from one register. So decompose it
12588 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
12589 // O(LMUL^2).
12590 //
12591 // vsetvli a1, zero, e64, m4, ta, ma
12592 // vrgatherei16.vv v12, v8, v16
12593 // ->
12594 // vsetvli a1, zero, e64, m1, ta, ma
12595 // vrgather.vv v15, v8, v16
12596 // vrgather.vv v14, v9, v16
12597 // vrgather.vv v13, v10, v16
12598 // vrgather.vv v12, v11, v16
12599 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
12600 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
12601 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
12602 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
12603 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
12604 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
12605
12606 // Fixed length vectors might not fit exactly into their container, and so
12607 // leave a gap in the front of the vector after being reversed. Slide this
12608 // away.
12609 //
12610 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
12611 // 0 1 2 3 x x x x <- reverse
12612 // x x x x 0 1 2 3 <- vslidedown.vx
12613 if (VecVT.isFixedLengthVector()) {
12614 SDValue Offset = DAG.getNode(
12615 ISD::SUB, DL, XLenVT,
12616 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
12617 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
12618 Concat =
12619 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12620 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
12621 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
12622 }
12623 return Concat;
12624 }
12625
12626 unsigned EltSize = ContainerVT.getScalarSizeInBits();
12627 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
12628 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12629 unsigned MaxVLMAX =
12630 VecVT.isFixedLengthVector()
12631 ? VecVT.getVectorNumElements()
12632 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12633
12634 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12635 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
12636
12637 // If this is SEW=8 and VLMAX is potentially more than 256, we need
12638 // to use vrgatherei16.vv.
12639 if (MaxVLMAX > 256 && EltSize == 8) {
12640 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
12641 // Reverse each half, then reassemble them in reverse order.
12642 // NOTE: It's also possible that after splitting that VLMAX no longer
12643 // requires vrgatherei16.vv.
12644 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12645 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
12646 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
12647 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12648 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12649 // Reassemble the low and high pieces reversed.
12650 // FIXME: This is a CONCAT_VECTORS.
12651 SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0);
12652 return DAG.getInsertSubvector(DL, Res, Lo,
12653 LoVT.getVectorMinNumElements());
12654 }
12655
12656 // Just promote the int type to i16 which will double the LMUL.
12657 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
12658 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12659 }
12660
12661 // At LMUL > 1, do the index computation in 16 bits to reduce register
12662 // pressure.
12663 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
12664 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
12665 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
12666 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12667 IntVT = IntVT.changeVectorElementType(MVT::i16);
12668 }
12669
12670 // Calculate VLMAX-1 for the desired SEW.
12671 SDValue VLMinus1 = DAG.getNode(
12672 ISD::SUB, DL, XLenVT,
12673 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
12674 DAG.getConstant(1, DL, XLenVT));
12675
12676 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
12677 bool IsRV32E64 =
12678 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
12679 SDValue SplatVL;
12680 if (!IsRV32E64)
12681 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
12682 else
12683 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
12684 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
12685
12686 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
12687 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
12688 DAG.getUNDEF(IntVT), Mask, VL);
12689
12690 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
12691 DAG.getUNDEF(ContainerVT), Mask, VL);
12692 if (VecVT.isFixedLengthVector())
12693 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
12694 return Gather;
12695}
12696
12697SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
12698 SelectionDAG &DAG) const {
12699 SDLoc DL(Op);
12700 SDValue V1 = Op.getOperand(0);
12701 SDValue V2 = Op.getOperand(1);
12702 MVT XLenVT = Subtarget.getXLenVT();
12703 MVT VecVT = Op.getSimpleValueType();
12704
12705 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
12706
12707 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
12708 SDValue DownOffset, UpOffset;
12709 if (ImmValue >= 0) {
12710 // The operand is a TargetConstant, we need to rebuild it as a regular
12711 // constant.
12712 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12713 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
12714 } else {
12715 // The operand is a TargetConstant, we need to rebuild it as a regular
12716 // constant rather than negating the original operand.
12717 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12718 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
12719 }
12720
12721 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
12722
12723 SDValue SlideDown = getVSlidedown(
12724 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
12725 Subtarget.hasVLDependentLatency() ? UpOffset
12726 : DAG.getRegister(RISCV::X0, XLenVT));
12727 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
12728 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
12730}
12731
12732SDValue
12733RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
12734 SelectionDAG &DAG) const {
12735 SDLoc DL(Op);
12736 auto *Load = cast<LoadSDNode>(Op);
12737
12739 Load->getMemoryVT(),
12740 *Load->getMemOperand()) &&
12741 "Expecting a correctly-aligned load");
12742
12743 MVT VT = Op.getSimpleValueType();
12744 MVT XLenVT = Subtarget.getXLenVT();
12745 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12746
12747 // If we know the exact VLEN and our fixed length vector completely fills
12748 // the container, use a whole register load instead.
12749 const auto [MinVLMAX, MaxVLMAX] =
12750 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12751 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12752 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12753 MachineMemOperand *MMO = Load->getMemOperand();
12754 SDValue NewLoad =
12755 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
12756 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
12757 MMO->getAAInfo(), MMO->getRanges());
12758 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12759 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12760 }
12761
12762 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12763
12764 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12765 SDValue IntID = DAG.getTargetConstant(
12766 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
12767 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
12768 if (!IsMaskOp)
12769 Ops.push_back(DAG.getUNDEF(ContainerVT));
12770 Ops.push_back(Load->getBasePtr());
12771 Ops.push_back(VL);
12772 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12773 SDValue NewLoad =
12775 Load->getMemoryVT(), Load->getMemOperand());
12776
12777 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12778 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12779}
12780
12781SDValue
12782RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
12783 SelectionDAG &DAG) const {
12784 SDLoc DL(Op);
12785 auto *Store = cast<StoreSDNode>(Op);
12786
12788 Store->getMemoryVT(),
12789 *Store->getMemOperand()) &&
12790 "Expecting a correctly-aligned store");
12791
12792 SDValue StoreVal = Store->getValue();
12793 MVT VT = StoreVal.getSimpleValueType();
12794 MVT XLenVT = Subtarget.getXLenVT();
12795
12796 // If the size less than a byte, we need to pad with zeros to make a byte.
12797 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
12798 VT = MVT::v8i1;
12799 StoreVal =
12800 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
12801 }
12802
12803 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12804
12805 SDValue NewValue =
12806 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12807
12808 // If we know the exact VLEN and our fixed length vector completely fills
12809 // the container, use a whole register store instead.
12810 const auto [MinVLMAX, MaxVLMAX] =
12811 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12812 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12813 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12814 MachineMemOperand *MMO = Store->getMemOperand();
12815 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
12816 MMO->getPointerInfo(), MMO->getBaseAlign(),
12817 MMO->getFlags(), MMO->getAAInfo());
12818 }
12819
12820 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12821
12822 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12823 SDValue IntID = DAG.getTargetConstant(
12824 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
12825 return DAG.getMemIntrinsicNode(
12826 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
12827 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
12828 Store->getMemoryVT(), Store->getMemOperand());
12829}
12830
12831SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
12832 SelectionDAG &DAG) const {
12833 SDLoc DL(Op);
12834 MVT VT = Op.getSimpleValueType();
12835
12836 const auto *MemSD = cast<MemSDNode>(Op);
12837 EVT MemVT = MemSD->getMemoryVT();
12838 MachineMemOperand *MMO = MemSD->getMemOperand();
12839 SDValue Chain = MemSD->getChain();
12840 SDValue BasePtr = MemSD->getBasePtr();
12841
12842 SDValue Mask, PassThru, VL;
12843 bool IsExpandingLoad = false;
12844 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
12845 Mask = VPLoad->getMask();
12846 PassThru = DAG.getUNDEF(VT);
12847 VL = VPLoad->getVectorLength();
12848 } else {
12849 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
12850 Mask = MLoad->getMask();
12851 PassThru = MLoad->getPassThru();
12852 IsExpandingLoad = MLoad->isExpandingLoad();
12853 }
12854
12855 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12856
12857 MVT XLenVT = Subtarget.getXLenVT();
12858
12859 MVT ContainerVT = VT;
12860 if (VT.isFixedLengthVector()) {
12861 ContainerVT = getContainerForFixedLengthVector(VT);
12862 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12863 if (!IsUnmasked) {
12864 MVT MaskVT = getMaskTypeFor(ContainerVT);
12865 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12866 }
12867 }
12868
12869 if (!VL)
12870 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12871
12872 SDValue ExpandingVL;
12873 if (!IsUnmasked && IsExpandingLoad) {
12874 ExpandingVL = VL;
12875 VL =
12876 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12877 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12878 }
12879
12880 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
12881 : Intrinsic::riscv_vle_mask;
12882 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12883 if (IntID == Intrinsic::riscv_vle)
12884 Ops.push_back(DAG.getUNDEF(ContainerVT));
12885 else
12886 Ops.push_back(PassThru);
12887 Ops.push_back(BasePtr);
12888 if (IntID == Intrinsic::riscv_vle_mask)
12889 Ops.push_back(Mask);
12890 Ops.push_back(VL);
12891 if (IntID == Intrinsic::riscv_vle_mask)
12892 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
12893
12894 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12895
12896 SDValue Result =
12897 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12898 Chain = Result.getValue(1);
12899 if (ExpandingVL) {
12900 MVT IndexVT = ContainerVT;
12901 if (ContainerVT.isFloatingPoint())
12902 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
12903
12904 MVT IndexEltVT = IndexVT.getVectorElementType();
12905 bool UseVRGATHEREI16 = false;
12906 // If index vector is an i8 vector and the element count exceeds 256, we
12907 // should change the element type of index vector to i16 to avoid
12908 // overflow.
12909 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
12910 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
12911 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
12912 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
12913 UseVRGATHEREI16 = true;
12914 }
12915
12916 SDValue Iota =
12917 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
12918 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
12919 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
12920 Result =
12921 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
12922 : RISCVISD::VRGATHER_VV_VL,
12923 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
12924 }
12925
12926 if (VT.isFixedLengthVector())
12927 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12928
12929 return DAG.getMergeValues({Result, Chain}, DL);
12930}
12931
12932SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
12933 SDLoc DL(Op);
12934 MVT VT = Op->getSimpleValueType(0);
12935
12936 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
12937 EVT MemVT = VPLoadFF->getMemoryVT();
12938 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
12939 SDValue Chain = VPLoadFF->getChain();
12940 SDValue BasePtr = VPLoadFF->getBasePtr();
12941
12942 SDValue Mask = VPLoadFF->getMask();
12943 SDValue VL = VPLoadFF->getVectorLength();
12944
12945 MVT XLenVT = Subtarget.getXLenVT();
12946
12947 MVT ContainerVT = VT;
12948 if (VT.isFixedLengthVector()) {
12949 ContainerVT = getContainerForFixedLengthVector(VT);
12950 MVT MaskVT = getMaskTypeFor(ContainerVT);
12951 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12952 }
12953
12954 unsigned IntID = Intrinsic::riscv_vleff_mask;
12955 SDValue Ops[] = {
12956 Chain,
12957 DAG.getTargetConstant(IntID, DL, XLenVT),
12958 DAG.getUNDEF(ContainerVT),
12959 BasePtr,
12960 Mask,
12961 VL,
12963
12964 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
12965
12966 SDValue Result =
12967 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12968 SDValue OutVL = Result.getValue(1);
12969 Chain = Result.getValue(2);
12970
12971 if (VT.isFixedLengthVector())
12972 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12973
12974 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
12975}
12976
12977SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
12978 SelectionDAG &DAG) const {
12979 SDLoc DL(Op);
12980
12981 const auto *MemSD = cast<MemSDNode>(Op);
12982 EVT MemVT = MemSD->getMemoryVT();
12983 MachineMemOperand *MMO = MemSD->getMemOperand();
12984 SDValue Chain = MemSD->getChain();
12985 SDValue BasePtr = MemSD->getBasePtr();
12986 SDValue Val, Mask, VL;
12987
12988 bool IsCompressingStore = false;
12989 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
12990 Val = VPStore->getValue();
12991 Mask = VPStore->getMask();
12992 VL = VPStore->getVectorLength();
12993 } else {
12994 const auto *MStore = cast<MaskedStoreSDNode>(Op);
12995 Val = MStore->getValue();
12996 Mask = MStore->getMask();
12997 IsCompressingStore = MStore->isCompressingStore();
12998 }
12999
13000 bool IsUnmasked =
13001 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
13002
13003 MVT VT = Val.getSimpleValueType();
13004 MVT XLenVT = Subtarget.getXLenVT();
13005
13006 MVT ContainerVT = VT;
13007 if (VT.isFixedLengthVector()) {
13008 ContainerVT = getContainerForFixedLengthVector(VT);
13009
13010 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13011 if (!IsUnmasked || IsCompressingStore) {
13012 MVT MaskVT = getMaskTypeFor(ContainerVT);
13013 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13014 }
13015 }
13016
13017 if (!VL)
13018 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13019
13020 if (IsCompressingStore) {
13021 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13022 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13023 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
13024 VL =
13025 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
13026 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
13027 }
13028
13029 unsigned IntID =
13030 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
13031 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13032 Ops.push_back(Val);
13033 Ops.push_back(BasePtr);
13034 if (!IsUnmasked)
13035 Ops.push_back(Mask);
13036 Ops.push_back(VL);
13037
13039 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
13040}
13041
13042SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
13043 SelectionDAG &DAG) const {
13044 SDLoc DL(Op);
13045 SDValue Val = Op.getOperand(0);
13046 SDValue Mask = Op.getOperand(1);
13047 SDValue Passthru = Op.getOperand(2);
13048
13049 MVT VT = Val.getSimpleValueType();
13050 MVT XLenVT = Subtarget.getXLenVT();
13051 MVT ContainerVT = VT;
13052 if (VT.isFixedLengthVector()) {
13053 ContainerVT = getContainerForFixedLengthVector(VT);
13054 MVT MaskVT = getMaskTypeFor(ContainerVT);
13055 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13056 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13057 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
13058 }
13059
13060 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13061 SDValue Res =
13062 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13063 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13064 Passthru, Val, Mask, VL);
13065
13066 if (VT.isFixedLengthVector())
13067 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
13068
13069 return Res;
13070}
13071
13072SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
13073 SelectionDAG &DAG) const {
13074 unsigned Opc = Op.getOpcode();
13075 SDLoc DL(Op);
13076 SDValue Chain = Op.getOperand(0);
13077 SDValue Op1 = Op.getOperand(1);
13078 SDValue Op2 = Op.getOperand(2);
13079 SDValue CC = Op.getOperand(3);
13080 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13081 MVT VT = Op.getSimpleValueType();
13082 MVT InVT = Op1.getSimpleValueType();
13083
13084 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
13085 // condition code.
13086 if (Opc == ISD::STRICT_FSETCCS) {
13087 // Expand strict_fsetccs(x, oeq) to
13088 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
13089 SDVTList VTList = Op->getVTList();
13090 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
13091 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
13092 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13093 Op2, OLECCVal);
13094 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
13095 Op1, OLECCVal);
13096 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13097 Tmp1.getValue(1), Tmp2.getValue(1));
13098 // Tmp1 and Tmp2 might be the same node.
13099 if (Tmp1 != Tmp2)
13100 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
13101 return DAG.getMergeValues({Tmp1, OutChain}, DL);
13102 }
13103
13104 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
13105 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
13106 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13107 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13108 Op2, OEQCCVal);
13109 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13110 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13111 }
13112 }
13113
13114 MVT ContainerInVT = InVT;
13115 if (InVT.isFixedLengthVector()) {
13116 ContainerInVT = getContainerForFixedLengthVector(InVT);
13117 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13118 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13119 }
13120 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13121
13122 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13123
13124 SDValue Res;
13125 if (Opc == ISD::STRICT_FSETCC &&
13126 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13127 CCVal == ISD::SETOLE)) {
13128 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13129 // active when both input elements are ordered.
13130 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13131 SDValue OrderMask1 = DAG.getNode(
13132 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13133 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13134 True, VL});
13135 SDValue OrderMask2 = DAG.getNode(
13136 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13137 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13138 True, VL});
13139 Mask =
13140 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13141 // Use Mask as the passthru operand to let the result be 0 if either of the
13142 // inputs is unordered.
13143 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13144 DAG.getVTList(MaskVT, MVT::Other),
13145 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13146 } else {
13147 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13148 : RISCVISD::STRICT_FSETCCS_VL;
13149 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13150 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13151 }
13152
13153 if (VT.isFixedLengthVector()) {
13154 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13155 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13156 }
13157 return Res;
13158}
13159
13160// Lower vector ABS to smax(X, sub(0, X)).
13161SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13162 SDLoc DL(Op);
13163 MVT VT = Op.getSimpleValueType();
13164 SDValue X = Op.getOperand(0);
13165
13166 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13167 "Unexpected type for ISD::ABS");
13168
13169 MVT ContainerVT = VT;
13170 if (VT.isFixedLengthVector()) {
13171 ContainerVT = getContainerForFixedLengthVector(VT);
13172 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13173 }
13174
13175 SDValue Mask, VL;
13176 if (Op->getOpcode() == ISD::VP_ABS) {
13177 Mask = Op->getOperand(1);
13178 if (VT.isFixedLengthVector())
13179 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13180 Subtarget);
13181 VL = Op->getOperand(2);
13182 } else
13183 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13184
13185 SDValue SplatZero = DAG.getNode(
13186 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13187 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13188 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13189 DAG.getUNDEF(ContainerVT), Mask, VL);
13190 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13191 DAG.getUNDEF(ContainerVT), Mask, VL);
13192
13193 if (VT.isFixedLengthVector())
13194 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
13195 return Max;
13196}
13197
13198SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
13199 SelectionDAG &DAG) const {
13200 const auto &TSInfo =
13201 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13202
13203 unsigned NewOpc = getRISCVVLOp(Op);
13204 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
13205 bool HasMask = TSInfo.hasMaskOp(NewOpc);
13206
13207 MVT VT = Op.getSimpleValueType();
13208 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13209
13210 // Create list of operands by converting existing ones to scalable types.
13212 for (const SDValue &V : Op->op_values()) {
13213 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13214
13215 // Pass through non-vector operands.
13216 if (!V.getValueType().isVector()) {
13217 Ops.push_back(V);
13218 continue;
13219 }
13220
13221 // "cast" fixed length vector to a scalable vector.
13222 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
13223 "Only fixed length vectors are supported!");
13224 MVT VContainerVT = ContainerVT.changeVectorElementType(
13225 V.getSimpleValueType().getVectorElementType());
13226 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
13227 }
13228
13229 SDLoc DL(Op);
13230 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13231 if (HasPassthruOp)
13232 Ops.push_back(DAG.getUNDEF(ContainerVT));
13233 if (HasMask)
13234 Ops.push_back(Mask);
13235 Ops.push_back(VL);
13236
13237 // StrictFP operations have two result values. Their lowered result should
13238 // have same result count.
13239 if (Op->isStrictFPOpcode()) {
13240 SDValue ScalableRes =
13241 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
13242 Op->getFlags());
13243 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13244 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
13245 }
13246
13247 SDValue ScalableRes =
13248 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
13249 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13250}
13251
13252// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
13253// * Operands of each node are assumed to be in the same order.
13254// * The EVL operand is promoted from i32 to i64 on RV64.
13255// * Fixed-length vectors are converted to their scalable-vector container
13256// types.
13257SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
13258 const auto &TSInfo =
13259 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13260
13261 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13262 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
13263
13264 SDLoc DL(Op);
13265 MVT VT = Op.getSimpleValueType();
13267
13268 MVT ContainerVT = VT;
13269 if (VT.isFixedLengthVector())
13270 ContainerVT = getContainerForFixedLengthVector(VT);
13271
13272 for (const auto &OpIdx : enumerate(Op->ops())) {
13273 SDValue V = OpIdx.value();
13274 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13275 // Add dummy passthru value before the mask. Or if there isn't a mask,
13276 // before EVL.
13277 if (HasPassthruOp) {
13278 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
13279 if (MaskIdx) {
13280 if (*MaskIdx == OpIdx.index())
13281 Ops.push_back(DAG.getUNDEF(ContainerVT));
13282 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
13283 OpIdx.index()) {
13284 if (Op.getOpcode() == ISD::VP_MERGE) {
13285 // For VP_MERGE, copy the false operand instead of an undef value.
13286 Ops.push_back(Ops.back());
13287 } else {
13288 assert(Op.getOpcode() == ISD::VP_SELECT);
13289 // For VP_SELECT, add an undef value.
13290 Ops.push_back(DAG.getUNDEF(ContainerVT));
13291 }
13292 }
13293 }
13294 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
13295 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
13296 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
13298 Subtarget.getXLenVT()));
13299 // Pass through operands which aren't fixed-length vectors.
13300 if (!V.getValueType().isFixedLengthVector()) {
13301 Ops.push_back(V);
13302 continue;
13303 }
13304 // "cast" fixed length vector to a scalable vector.
13305 MVT OpVT = V.getSimpleValueType();
13306 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
13307 assert(useRVVForFixedLengthVectorVT(OpVT) &&
13308 "Only fixed length vectors are supported!");
13309 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
13310 }
13311
13312 if (!VT.isFixedLengthVector())
13313 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
13314
13315 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
13316
13317 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
13318}
13319
13320SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
13321 SelectionDAG &DAG) const {
13322 SDLoc DL(Op);
13323 MVT VT = Op.getSimpleValueType();
13324
13325 SDValue Src = Op.getOperand(0);
13326 // NOTE: Mask is dropped.
13327 SDValue VL = Op.getOperand(2);
13328
13329 MVT ContainerVT = VT;
13330 if (VT.isFixedLengthVector()) {
13331 ContainerVT = getContainerForFixedLengthVector(VT);
13332 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
13333 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13334 }
13335
13336 MVT XLenVT = Subtarget.getXLenVT();
13337 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13338 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13339 DAG.getUNDEF(ContainerVT), Zero, VL);
13340
13341 SDValue SplatValue = DAG.getSignedConstant(
13342 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
13343 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13344 DAG.getUNDEF(ContainerVT), SplatValue, VL);
13345
13346 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
13347 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
13348 if (!VT.isFixedLengthVector())
13349 return Result;
13350 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13351}
13352
13353SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13354 SelectionDAG &DAG) const {
13355 SDLoc DL(Op);
13356 MVT VT = Op.getSimpleValueType();
13357
13358 SDValue Op1 = Op.getOperand(0);
13359 SDValue Op2 = Op.getOperand(1);
13360 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13361 // NOTE: Mask is dropped.
13362 SDValue VL = Op.getOperand(4);
13363
13364 MVT ContainerVT = VT;
13365 if (VT.isFixedLengthVector()) {
13366 ContainerVT = getContainerForFixedLengthVector(VT);
13367 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13368 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13369 }
13370
13372 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13373
13374 switch (Condition) {
13375 default:
13376 break;
13377 // X != Y --> (X^Y)
13378 case ISD::SETNE:
13379 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13380 break;
13381 // X == Y --> ~(X^Y)
13382 case ISD::SETEQ: {
13383 SDValue Temp =
13384 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13385 Result =
13386 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
13387 break;
13388 }
13389 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13390 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13391 case ISD::SETGT:
13392 case ISD::SETULT: {
13393 SDValue Temp =
13394 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13395 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
13396 break;
13397 }
13398 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13399 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13400 case ISD::SETLT:
13401 case ISD::SETUGT: {
13402 SDValue Temp =
13403 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13404 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
13405 break;
13406 }
13407 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13408 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13409 case ISD::SETGE:
13410 case ISD::SETULE: {
13411 SDValue Temp =
13412 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13413 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
13414 break;
13415 }
13416 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13417 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13418 case ISD::SETLE:
13419 case ISD::SETUGE: {
13420 SDValue Temp =
13421 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13422 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
13423 break;
13424 }
13425 }
13426
13427 if (!VT.isFixedLengthVector())
13428 return Result;
13429 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13430}
13431
13432// Lower Floating-Point/Integer Type-Convert VP SDNodes
13433SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13434 SelectionDAG &DAG) const {
13435 SDLoc DL(Op);
13436
13437 SDValue Src = Op.getOperand(0);
13438 SDValue Mask = Op.getOperand(1);
13439 SDValue VL = Op.getOperand(2);
13440 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13441
13442 MVT DstVT = Op.getSimpleValueType();
13443 MVT SrcVT = Src.getSimpleValueType();
13444 if (DstVT.isFixedLengthVector()) {
13445 DstVT = getContainerForFixedLengthVector(DstVT);
13446 SrcVT = getContainerForFixedLengthVector(SrcVT);
13447 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13448 MVT MaskVT = getMaskTypeFor(DstVT);
13449 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13450 }
13451
13452 unsigned DstEltSize = DstVT.getScalarSizeInBits();
13453 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13454
13456 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13457 if (SrcVT.isInteger()) {
13458 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13459
13460 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13461 ? RISCVISD::VSEXT_VL
13462 : RISCVISD::VZEXT_VL;
13463
13464 // Do we need to do any pre-widening before converting?
13465 if (SrcEltSize == 1) {
13466 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13467 MVT XLenVT = Subtarget.getXLenVT();
13468 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13469 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13470 DAG.getUNDEF(IntVT), Zero, VL);
13471 SDValue One = DAG.getSignedConstant(
13472 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
13473 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13474 DAG.getUNDEF(IntVT), One, VL);
13475 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
13476 ZeroSplat, DAG.getUNDEF(IntVT), VL);
13477 } else if (DstEltSize > (2 * SrcEltSize)) {
13478 // Widen before converting.
13479 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
13480 DstVT.getVectorElementCount());
13481 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
13482 }
13483
13484 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13485 } else {
13486 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13487 "Wrong input/output vector types");
13488
13489 // Convert f16 to f32 then convert f32 to i64.
13490 if (DstEltSize > (2 * SrcEltSize)) {
13491 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13492 MVT InterimFVT =
13493 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13494 Src =
13495 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
13496 }
13497
13498 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13499 }
13500 } else { // Narrowing + Conversion
13501 if (SrcVT.isInteger()) {
13502 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13503 // First do a narrowing convert to an FP type half the size, then round
13504 // the FP type to a small FP type if needed.
13505
13506 MVT InterimFVT = DstVT;
13507 if (SrcEltSize > (2 * DstEltSize)) {
13508 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
13509 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13510 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13511 }
13512
13513 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
13514
13515 if (InterimFVT != DstVT) {
13516 Src = Result;
13517 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
13518 }
13519 } else {
13520 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13521 "Wrong input/output vector types");
13522 // First do a narrowing conversion to an integer half the size, then
13523 // truncate if needed.
13524
13525 if (DstEltSize == 1) {
13526 // First convert to the same size integer, then convert to mask using
13527 // setcc.
13528 assert(SrcEltSize >= 16 && "Unexpected FP type!");
13529 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
13530 DstVT.getVectorElementCount());
13531 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13532
13533 // Compare the integer result to 0. The integer should be 0 or 1/-1,
13534 // otherwise the conversion was undefined.
13535 MVT XLenVT = Subtarget.getXLenVT();
13536 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
13537 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
13538 DAG.getUNDEF(InterimIVT), SplatZero, VL);
13539 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
13540 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
13541 DAG.getUNDEF(DstVT), Mask, VL});
13542 } else {
13543 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13544 DstVT.getVectorElementCount());
13545
13546 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13547
13548 while (InterimIVT != DstVT) {
13549 SrcEltSize /= 2;
13550 Src = Result;
13551 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13552 DstVT.getVectorElementCount());
13553 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
13554 Src, Mask, VL);
13555 }
13556 }
13557 }
13558 }
13559
13560 MVT VT = Op.getSimpleValueType();
13561 if (!VT.isFixedLengthVector())
13562 return Result;
13563 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13564}
13565
13566SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
13567 SelectionDAG &DAG) const {
13568 SDLoc DL(Op);
13569 MVT VT = Op.getSimpleValueType();
13570 MVT XLenVT = Subtarget.getXLenVT();
13571
13572 SDValue Mask = Op.getOperand(0);
13573 SDValue TrueVal = Op.getOperand(1);
13574 SDValue FalseVal = Op.getOperand(2);
13575 SDValue VL = Op.getOperand(3);
13576
13577 // Use default legalization if a vector of EVL type would be legal.
13578 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
13580 if (isTypeLegal(EVLVecVT))
13581 return SDValue();
13582
13583 MVT ContainerVT = VT;
13584 if (VT.isFixedLengthVector()) {
13585 ContainerVT = getContainerForFixedLengthVector(VT);
13586 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
13587 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
13588 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
13589 }
13590
13591 // Promote to a vector of i8.
13592 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
13593
13594 // Promote TrueVal and FalseVal using VLMax.
13595 // FIXME: Is there a better way to do this?
13596 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
13597 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13598 DAG.getUNDEF(PromotedVT),
13599 DAG.getConstant(1, DL, XLenVT), VLMax);
13600 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13601 DAG.getUNDEF(PromotedVT),
13602 DAG.getConstant(0, DL, XLenVT), VLMax);
13603 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
13604 SplatZero, DAG.getUNDEF(PromotedVT), VL);
13605 // Any element past VL uses FalseVal, so use VLMax
13606 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
13607 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
13608
13609 // VP_MERGE the two promoted values.
13610 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
13611 TrueVal, FalseVal, FalseVal, VL);
13612
13613 // Convert back to mask.
13614 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13615 SDValue Result = DAG.getNode(
13616 RISCVISD::SETCC_VL, DL, ContainerVT,
13617 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
13618 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
13619
13620 if (VT.isFixedLengthVector())
13621 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13622 return Result;
13623}
13624
13625SDValue
13626RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
13627 SelectionDAG &DAG) const {
13628 using namespace SDPatternMatch;
13629
13630 SDLoc DL(Op);
13631
13632 SDValue Op1 = Op.getOperand(0);
13633 SDValue Op2 = Op.getOperand(1);
13634 SDValue Offset = Op.getOperand(2);
13635 SDValue Mask = Op.getOperand(3);
13636 SDValue EVL1 = Op.getOperand(4);
13637 SDValue EVL2 = Op.getOperand(5);
13638
13639 const MVT XLenVT = Subtarget.getXLenVT();
13640 MVT VT = Op.getSimpleValueType();
13641 MVT ContainerVT = VT;
13642 if (VT.isFixedLengthVector()) {
13643 ContainerVT = getContainerForFixedLengthVector(VT);
13644 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13645 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13646 MVT MaskVT = getMaskTypeFor(ContainerVT);
13647 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13648 }
13649
13650 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
13651 if (IsMaskVector) {
13652 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
13653
13654 // Expand input operands
13655 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13656 DAG.getUNDEF(ContainerVT),
13657 DAG.getConstant(1, DL, XLenVT), EVL1);
13658 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13659 DAG.getUNDEF(ContainerVT),
13660 DAG.getConstant(0, DL, XLenVT), EVL1);
13661 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
13662 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
13663
13664 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13665 DAG.getUNDEF(ContainerVT),
13666 DAG.getConstant(1, DL, XLenVT), EVL2);
13667 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13668 DAG.getUNDEF(ContainerVT),
13669 DAG.getConstant(0, DL, XLenVT), EVL2);
13670 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
13671 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
13672 }
13673
13674 auto getVectorFirstEle = [](SDValue Vec) {
13675 SDValue FirstEle;
13676 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13677 return FirstEle;
13678
13679 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
13681 return Vec.getOperand(0);
13682
13683 return SDValue();
13684 };
13685
13686 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13687 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13688 MVT EltVT = ContainerVT.getVectorElementType();
13690 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
13691 EltVT == MVT::bf16) {
13692 EltVT = EltVT.changeTypeToInteger();
13693 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13694 Op2 = DAG.getBitcast(ContainerVT, Op2);
13695 FirstEle =
13696 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
13697 }
13698 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13699 : RISCVISD::VSLIDE1UP_VL,
13700 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
13701 FirstEle, Mask, EVL2);
13702 Result = DAG.getBitcast(
13704 Result);
13705 return VT.isFixedLengthVector()
13706 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
13707 : Result;
13708 }
13709
13710 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
13711 SDValue DownOffset, UpOffset;
13712 if (ImmValue >= 0) {
13713 // The operand is a TargetConstant, we need to rebuild it as a regular
13714 // constant.
13715 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
13716 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
13717 } else {
13718 // The operand is a TargetConstant, we need to rebuild it as a regular
13719 // constant rather than negating the original operand.
13720 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
13721 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
13722 }
13723
13724 if (ImmValue != 0)
13725 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13726 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
13727 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
13728 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
13729 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
13730
13731 if (IsMaskVector) {
13732 // Truncate Result back to a mask vector (Result has same EVL as Op2)
13733 Result = DAG.getNode(
13734 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
13735 {Result, DAG.getConstant(0, DL, ContainerVT),
13736 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
13737 Mask, EVL2});
13738 }
13739
13740 if (!VT.isFixedLengthVector())
13741 return Result;
13742 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13743}
13744
13745SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
13746 SelectionDAG &DAG) const {
13747 SDLoc DL(Op);
13748 SDValue Val = Op.getOperand(0);
13749 SDValue Mask = Op.getOperand(1);
13750 SDValue VL = Op.getOperand(2);
13751 MVT VT = Op.getSimpleValueType();
13752
13753 MVT ContainerVT = VT;
13754 if (VT.isFixedLengthVector()) {
13755 ContainerVT = getContainerForFixedLengthVector(VT);
13756 MVT MaskVT = getMaskTypeFor(ContainerVT);
13757 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13758 }
13759
13761 if (VT.getScalarType() == MVT::i1) {
13762 if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
13763 Result =
13764 DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
13765 ContainerVT, VL);
13766 } else {
13767 MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8);
13768 SDValue LHS =
13769 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT),
13770 DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL);
13771 SDValue RHS = DAG.getConstant(0, DL, WidenVT);
13772 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13773 {LHS, RHS, DAG.getCondCode(ISD::SETNE),
13774 DAG.getUNDEF(ContainerVT), Mask, VL});
13775 }
13776 } else {
13777 Result =
13778 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
13779 }
13780
13781 if (!VT.isFixedLengthVector())
13782 return Result;
13783 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13784}
13785
13786SDValue
13787RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
13788 SelectionDAG &DAG) const {
13789 SDLoc DL(Op);
13790 MVT VT = Op.getSimpleValueType();
13791 MVT XLenVT = Subtarget.getXLenVT();
13792
13793 SDValue Op1 = Op.getOperand(0);
13794 SDValue Mask = Op.getOperand(1);
13795 SDValue EVL = Op.getOperand(2);
13796
13797 MVT ContainerVT = VT;
13798 if (VT.isFixedLengthVector()) {
13799 ContainerVT = getContainerForFixedLengthVector(VT);
13800 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13801 MVT MaskVT = getMaskTypeFor(ContainerVT);
13802 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13803 }
13804
13805 MVT GatherVT = ContainerVT;
13806 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
13807 // Check if we are working with mask vectors
13808 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
13809 if (IsMaskVector) {
13810 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
13811
13812 // Expand input operand
13813 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13814 DAG.getUNDEF(IndicesVT),
13815 DAG.getConstant(1, DL, XLenVT), EVL);
13816 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13817 DAG.getUNDEF(IndicesVT),
13818 DAG.getConstant(0, DL, XLenVT), EVL);
13819 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
13820 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
13821 }
13822
13823 unsigned EltSize = GatherVT.getScalarSizeInBits();
13824 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
13825 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13826 unsigned MaxVLMAX =
13827 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13828
13829 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13830 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
13831 // to use vrgatherei16.vv.
13832 // TODO: It's also possible to use vrgatherei16.vv for other types to
13833 // decrease register width for the index calculation.
13834 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13835 if (MaxVLMAX > 256 && EltSize == 8) {
13836 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
13837 // Split the vector in half and reverse each half using a full register
13838 // reverse.
13839 // Swap the halves and concatenate them.
13840 // Slide the concatenated result by (VLMax - VL).
13841 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13842 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
13843 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
13844
13845 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13846 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13847
13848 // Reassemble the low and high pieces reversed.
13849 // NOTE: this Result is unmasked (because we do not need masks for
13850 // shuffles). If in the future this has to change, we can use a SELECT_VL
13851 // between Result and UNDEF using the mask originally passed to VP_REVERSE
13852 SDValue Result =
13853 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
13854
13855 // Slide off any elements from past EVL that were reversed into the low
13856 // elements.
13857 unsigned MinElts = GatherVT.getVectorMinNumElements();
13858 SDValue VLMax =
13859 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
13860 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
13861
13862 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
13863 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
13864
13865 if (IsMaskVector) {
13866 // Truncate Result back to a mask vector
13867 Result =
13868 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13869 {Result, DAG.getConstant(0, DL, GatherVT),
13871 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13872 }
13873
13874 if (!VT.isFixedLengthVector())
13875 return Result;
13876 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13877 }
13878
13879 // Just promote the int type to i16 which will double the LMUL.
13880 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
13881 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13882 }
13883
13884 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
13885 SDValue VecLen =
13886 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
13887 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13888 DAG.getUNDEF(IndicesVT), VecLen, EVL);
13889 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
13890 DAG.getUNDEF(IndicesVT), Mask, EVL);
13891 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
13892 DAG.getUNDEF(GatherVT), Mask, EVL);
13893
13894 if (IsMaskVector) {
13895 // Truncate Result back to a mask vector
13896 Result = DAG.getNode(
13897 RISCVISD::SETCC_VL, DL, ContainerVT,
13898 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
13899 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13900 }
13901
13902 if (!VT.isFixedLengthVector())
13903 return Result;
13904 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13905}
13906
13907SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
13908 SelectionDAG &DAG) const {
13909 MVT VT = Op.getSimpleValueType();
13910 if (VT.getVectorElementType() != MVT::i1)
13911 return lowerVPOp(Op, DAG);
13912
13913 // It is safe to drop mask parameter as masked-off elements are undef.
13914 SDValue Op1 = Op->getOperand(0);
13915 SDValue Op2 = Op->getOperand(1);
13916 SDValue VL = Op->getOperand(3);
13917
13918 MVT ContainerVT = VT;
13919 const bool IsFixed = VT.isFixedLengthVector();
13920 if (IsFixed) {
13921 ContainerVT = getContainerForFixedLengthVector(VT);
13922 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13923 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13924 }
13925
13926 SDLoc DL(Op);
13927 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
13928 if (!IsFixed)
13929 return Val;
13930 return convertFromScalableVector(VT, Val, DAG, Subtarget);
13931}
13932
13933SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
13934 SelectionDAG &DAG) const {
13935 SDLoc DL(Op);
13936 MVT XLenVT = Subtarget.getXLenVT();
13937 MVT VT = Op.getSimpleValueType();
13938 MVT ContainerVT = VT;
13939 if (VT.isFixedLengthVector())
13940 ContainerVT = getContainerForFixedLengthVector(VT);
13941
13942 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13943
13944 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
13945 // Check if the mask is known to be all ones
13946 SDValue Mask = VPNode->getMask();
13947 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13948
13949 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
13950 : Intrinsic::riscv_vlse_mask,
13951 DL, XLenVT);
13952 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
13953 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
13954 VPNode->getStride()};
13955 if (!IsUnmasked) {
13956 if (VT.isFixedLengthVector()) {
13957 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13958 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13959 }
13960 Ops.push_back(Mask);
13961 }
13962 Ops.push_back(VPNode->getVectorLength());
13963 if (!IsUnmasked) {
13964 SDValue Policy =
13966 Ops.push_back(Policy);
13967 }
13968
13969 SDValue Result =
13971 VPNode->getMemoryVT(), VPNode->getMemOperand());
13972 SDValue Chain = Result.getValue(1);
13973
13974 if (VT.isFixedLengthVector())
13975 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13976
13977 return DAG.getMergeValues({Result, Chain}, DL);
13978}
13979
13980SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
13981 SelectionDAG &DAG) const {
13982 SDLoc DL(Op);
13983 MVT XLenVT = Subtarget.getXLenVT();
13984
13985 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
13986 SDValue StoreVal = VPNode->getValue();
13987 MVT VT = StoreVal.getSimpleValueType();
13988 MVT ContainerVT = VT;
13989 if (VT.isFixedLengthVector()) {
13990 ContainerVT = getContainerForFixedLengthVector(VT);
13991 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
13992 }
13993
13994 // Check if the mask is known to be all ones
13995 SDValue Mask = VPNode->getMask();
13996 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13997
13998 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
13999 : Intrinsic::riscv_vsse_mask,
14000 DL, XLenVT);
14001 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
14002 VPNode->getBasePtr(), VPNode->getStride()};
14003 if (!IsUnmasked) {
14004 if (VT.isFixedLengthVector()) {
14005 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
14006 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14007 }
14008 Ops.push_back(Mask);
14009 }
14010 Ops.push_back(VPNode->getVectorLength());
14011
14012 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
14013 Ops, VPNode->getMemoryVT(),
14014 VPNode->getMemOperand());
14015}
14016
14017// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
14018// matched to a RVV indexed load. The RVV indexed load instructions only
14019// support the "unsigned unscaled" addressing mode; indices are implicitly
14020// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14021// signed or scaled indexing is extended to the XLEN value type and scaled
14022// accordingly.
14023SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
14024 SelectionDAG &DAG) const {
14025 SDLoc DL(Op);
14026 MVT VT = Op.getSimpleValueType();
14027
14028 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14029 EVT MemVT = MemSD->getMemoryVT();
14030 MachineMemOperand *MMO = MemSD->getMemOperand();
14031 SDValue Chain = MemSD->getChain();
14032 SDValue BasePtr = MemSD->getBasePtr();
14033
14034 [[maybe_unused]] ISD::LoadExtType LoadExtType;
14035 SDValue Index, Mask, PassThru, VL;
14036
14037 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
14038 Index = VPGN->getIndex();
14039 Mask = VPGN->getMask();
14040 PassThru = DAG.getUNDEF(VT);
14041 VL = VPGN->getVectorLength();
14042 // VP doesn't support extending loads.
14044 } else {
14045 // Else it must be a MGATHER.
14046 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
14047 Index = MGN->getIndex();
14048 Mask = MGN->getMask();
14049 PassThru = MGN->getPassThru();
14050 LoadExtType = MGN->getExtensionType();
14051 }
14052
14053 MVT IndexVT = Index.getSimpleValueType();
14054 MVT XLenVT = Subtarget.getXLenVT();
14055
14057 "Unexpected VTs!");
14058 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14059 // Targets have to explicitly opt-in for extending vector loads.
14060 assert(LoadExtType == ISD::NON_EXTLOAD &&
14061 "Unexpected extending MGATHER/VP_GATHER");
14062
14063 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14064 // the selection of the masked intrinsics doesn't do this for us.
14065 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14066
14067 MVT ContainerVT = VT;
14068 if (VT.isFixedLengthVector()) {
14069 ContainerVT = getContainerForFixedLengthVector(VT);
14070 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14071 ContainerVT.getVectorElementCount());
14072
14073 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14074
14075 if (!IsUnmasked) {
14076 MVT MaskVT = getMaskTypeFor(ContainerVT);
14077 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14078 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
14079 }
14080 }
14081
14082 if (!VL)
14083 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14084
14085 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14086 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14087 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14088 }
14089
14090 unsigned IntID =
14091 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
14092 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14093 if (IsUnmasked)
14094 Ops.push_back(DAG.getUNDEF(ContainerVT));
14095 else
14096 Ops.push_back(PassThru);
14097 Ops.push_back(BasePtr);
14098 Ops.push_back(Index);
14099 if (!IsUnmasked)
14100 Ops.push_back(Mask);
14101 Ops.push_back(VL);
14102 if (!IsUnmasked)
14103 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
14104
14105 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14106 SDValue Result =
14107 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14108 Chain = Result.getValue(1);
14109
14110 if (VT.isFixedLengthVector())
14111 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14112
14113 return DAG.getMergeValues({Result, Chain}, DL);
14114}
14115
14116// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14117// matched to a RVV indexed store. The RVV indexed store instructions only
14118// support the "unsigned unscaled" addressing mode; indices are implicitly
14119// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14120// signed or scaled indexing is extended to the XLEN value type and scaled
14121// accordingly.
14122SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14123 SelectionDAG &DAG) const {
14124 SDLoc DL(Op);
14125 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14126 EVT MemVT = MemSD->getMemoryVT();
14127 MachineMemOperand *MMO = MemSD->getMemOperand();
14128 SDValue Chain = MemSD->getChain();
14129 SDValue BasePtr = MemSD->getBasePtr();
14130
14131 [[maybe_unused]] bool IsTruncatingStore = false;
14132 SDValue Index, Mask, Val, VL;
14133
14134 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14135 Index = VPSN->getIndex();
14136 Mask = VPSN->getMask();
14137 Val = VPSN->getValue();
14138 VL = VPSN->getVectorLength();
14139 // VP doesn't support truncating stores.
14140 IsTruncatingStore = false;
14141 } else {
14142 // Else it must be a MSCATTER.
14143 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14144 Index = MSN->getIndex();
14145 Mask = MSN->getMask();
14146 Val = MSN->getValue();
14147 IsTruncatingStore = MSN->isTruncatingStore();
14148 }
14149
14150 MVT VT = Val.getSimpleValueType();
14151 MVT IndexVT = Index.getSimpleValueType();
14152 MVT XLenVT = Subtarget.getXLenVT();
14153
14155 "Unexpected VTs!");
14156 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14157 // Targets have to explicitly opt-in for extending vector loads and
14158 // truncating vector stores.
14159 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14160
14161 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14162 // the selection of the masked intrinsics doesn't do this for us.
14163 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14164
14165 MVT ContainerVT = VT;
14166 if (VT.isFixedLengthVector()) {
14167 ContainerVT = getContainerForFixedLengthVector(VT);
14168 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14169 ContainerVT.getVectorElementCount());
14170
14171 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14172 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14173
14174 if (!IsUnmasked) {
14175 MVT MaskVT = getMaskTypeFor(ContainerVT);
14176 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14177 }
14178 }
14179
14180 if (!VL)
14181 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14182
14183 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14184 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14185 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14186 }
14187
14188 unsigned IntID =
14189 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14190 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14191 Ops.push_back(Val);
14192 Ops.push_back(BasePtr);
14193 Ops.push_back(Index);
14194 if (!IsUnmasked)
14195 Ops.push_back(Mask);
14196 Ops.push_back(VL);
14197
14199 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14200}
14201
14202SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14203 SelectionDAG &DAG) const {
14204 const MVT XLenVT = Subtarget.getXLenVT();
14205 SDLoc DL(Op);
14206 SDValue Chain = Op->getOperand(0);
14207 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14208 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14209 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14210
14211 // Encoding used for rounding mode in RISC-V differs from that used in
14212 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14213 // table, which consists of a sequence of 4-bit fields, each representing
14214 // corresponding FLT_ROUNDS mode.
14215 static const int Table =
14221
14222 SDValue Shift =
14223 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14224 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14225 DAG.getConstant(Table, DL, XLenVT), Shift);
14226 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14227 DAG.getConstant(7, DL, XLenVT));
14228
14229 return DAG.getMergeValues({Masked, Chain}, DL);
14230}
14231
14232SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14233 SelectionDAG &DAG) const {
14234 const MVT XLenVT = Subtarget.getXLenVT();
14235 SDLoc DL(Op);
14236 SDValue Chain = Op->getOperand(0);
14237 SDValue RMValue = Op->getOperand(1);
14238 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14239
14240 // Encoding used for rounding mode in RISC-V differs from that used in
14241 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
14242 // a table, which consists of a sequence of 4-bit fields, each representing
14243 // corresponding RISC-V mode.
14244 static const unsigned Table =
14250
14251 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
14252
14253 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
14254 DAG.getConstant(2, DL, XLenVT));
14255 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14256 DAG.getConstant(Table, DL, XLenVT), Shift);
14257 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14258 DAG.getConstant(0x7, DL, XLenVT));
14259 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14260 RMValue);
14261}
14262
14263SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
14264 SelectionDAG &DAG) const {
14265 const MVT XLenVT = Subtarget.getXLenVT();
14266 SDLoc DL(Op);
14267 SDValue Chain = Op->getOperand(0);
14268 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14269 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14270 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14271}
14272
14273SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
14274 SelectionDAG &DAG) const {
14275 const MVT XLenVT = Subtarget.getXLenVT();
14276 SDLoc DL(Op);
14277 SDValue Chain = Op->getOperand(0);
14278 SDValue EnvValue = Op->getOperand(1);
14279 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14280
14281 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14282 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14283 EnvValue);
14284}
14285
14286SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
14287 SelectionDAG &DAG) const {
14288 const MVT XLenVT = Subtarget.getXLenVT();
14289 SDLoc DL(Op);
14290 SDValue Chain = Op->getOperand(0);
14291 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
14292 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14293
14294 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14295 EnvValue);
14296}
14297
14300
14301SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
14302 SelectionDAG &DAG) const {
14303 const MVT XLenVT = Subtarget.getXLenVT();
14304 SDLoc DL(Op);
14305 SDValue Chain = Op->getOperand(0);
14306 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14307 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14308 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14309 Chain = Result.getValue(1);
14310 return DAG.getMergeValues({Result, Chain}, DL);
14311}
14312
14313SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
14314 SelectionDAG &DAG) const {
14315 const MVT XLenVT = Subtarget.getXLenVT();
14316 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14317 SDLoc DL(Op);
14318 SDValue Chain = Op->getOperand(0);
14319 SDValue EnvValue = Op->getOperand(1);
14320 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14321 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14322
14323 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14324 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
14325 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14326 ModeMask);
14327 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
14328 EnvValue);
14329}
14330
14331SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
14332 SelectionDAG &DAG) const {
14333 const MVT XLenVT = Subtarget.getXLenVT();
14334 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14335 SDLoc DL(Op);
14336 SDValue Chain = Op->getOperand(0);
14337 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14338 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14339
14340 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14341 ModeMask);
14342}
14343
14344SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
14345 SelectionDAG &DAG) const {
14346 MachineFunction &MF = DAG.getMachineFunction();
14347
14348 bool isRISCV64 = Subtarget.is64Bit();
14349 EVT PtrVT = getPointerTy(DAG.getDataLayout());
14350
14351 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
14352 return DAG.getFrameIndex(FI, PtrVT);
14353}
14354
14355// Returns the opcode of the target-specific SDNode that implements the 32-bit
14356// form of the given Opcode.
14357static unsigned getRISCVWOpcode(unsigned Opcode) {
14358 switch (Opcode) {
14359 default:
14360 llvm_unreachable("Unexpected opcode");
14361 case ISD::SHL:
14362 return RISCVISD::SLLW;
14363 case ISD::SRA:
14364 return RISCVISD::SRAW;
14365 case ISD::SRL:
14366 return RISCVISD::SRLW;
14367 case ISD::SDIV:
14368 return RISCVISD::DIVW;
14369 case ISD::UDIV:
14370 return RISCVISD::DIVUW;
14371 case ISD::UREM:
14372 return RISCVISD::REMUW;
14373 case ISD::ROTL:
14374 return RISCVISD::ROLW;
14375 case ISD::ROTR:
14376 return RISCVISD::RORW;
14377 }
14378}
14379
14380// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
14381// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
14382// otherwise be promoted to i64, making it difficult to select the
14383// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
14384// type i8/i16/i32 is lost.
14386 unsigned ExtOpc = ISD::ANY_EXTEND) {
14387 SDLoc DL(N);
14388 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
14389 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
14390 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
14391 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
14392 // ReplaceNodeResults requires we maintain the same type for the return value.
14393 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
14394}
14395
14396// Converts the given 32-bit operation to a i64 operation with signed extension
14397// semantic to reduce the signed extension instructions.
14399 SDLoc DL(N);
14400 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14401 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14402 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
14403 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14404 DAG.getValueType(MVT::i32));
14405 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
14406}
14407
14410 SelectionDAG &DAG) const {
14411 SDLoc DL(N);
14412 switch (N->getOpcode()) {
14413 default:
14414 llvm_unreachable("Don't know how to custom type legalize this operation!");
14417 case ISD::FP_TO_SINT:
14418 case ISD::FP_TO_UINT: {
14419 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14420 "Unexpected custom legalisation");
14421 bool IsStrict = N->isStrictFPOpcode();
14422 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
14423 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14424 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
14425 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14427 if (!isTypeLegal(Op0.getValueType()))
14428 return;
14429 if (IsStrict) {
14430 SDValue Chain = N->getOperand(0);
14431 // In absence of Zfh, promote f16 to f32, then convert.
14432 if (Op0.getValueType() == MVT::f16 &&
14433 !Subtarget.hasStdExtZfhOrZhinx()) {
14434 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
14435 {Chain, Op0});
14436 Chain = Op0.getValue(1);
14437 }
14438 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14439 : RISCVISD::STRICT_FCVT_WU_RV64;
14440 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14441 SDValue Res = DAG.getNode(
14442 Opc, DL, VTs, Chain, Op0,
14443 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14444 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14445 Results.push_back(Res.getValue(1));
14446 return;
14447 }
14448 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14449 // convert.
14450 if ((Op0.getValueType() == MVT::f16 &&
14451 !Subtarget.hasStdExtZfhOrZhinx()) ||
14452 Op0.getValueType() == MVT::bf16)
14453 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14454
14455 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14456 SDValue Res =
14457 DAG.getNode(Opc, DL, MVT::i64, Op0,
14458 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14459 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14460 return;
14461 }
14462 // If the FP type needs to be softened, emit a library call using the 'si'
14463 // version. If we left it to default legalization we'd end up with 'di'. If
14464 // the FP type doesn't need to be softened just let generic type
14465 // legalization promote the result type.
14466 RTLIB::Libcall LC;
14467 if (IsSigned)
14468 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
14469 else
14470 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
14471 MakeLibCallOptions CallOptions;
14472 EVT OpVT = Op0.getValueType();
14473 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
14474 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
14475 SDValue Result;
14476 std::tie(Result, Chain) =
14477 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
14478 Results.push_back(Result);
14479 if (IsStrict)
14480 Results.push_back(Chain);
14481 break;
14482 }
14483 case ISD::LROUND: {
14484 SDValue Op0 = N->getOperand(0);
14485 EVT Op0VT = Op0.getValueType();
14486 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14488 if (!isTypeLegal(Op0VT))
14489 return;
14490
14491 // In absence of Zfh, promote f16 to f32, then convert.
14492 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14493 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14494
14495 SDValue Res =
14496 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
14497 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
14498 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14499 return;
14500 }
14501 // If the FP type needs to be softened, emit a library call to lround. We'll
14502 // need to truncate the result. We assume any value that doesn't fit in i32
14503 // is allowed to return an unspecified value.
14504 RTLIB::Libcall LC =
14505 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14506 MakeLibCallOptions CallOptions;
14507 EVT OpVT = Op0.getValueType();
14508 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
14509 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
14510 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
14511 Results.push_back(Result);
14512 break;
14513 }
14514 case ISD::READCYCLECOUNTER:
14515 case ISD::READSTEADYCOUNTER: {
14516 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14517 "has custom type legalization on riscv32");
14518
14519 SDValue LoCounter, HiCounter;
14520 MVT XLenVT = Subtarget.getXLenVT();
14521 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14522 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
14523 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
14524 } else {
14525 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
14526 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
14527 }
14528 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
14529 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
14530 N->getOperand(0), LoCounter, HiCounter);
14531
14532 Results.push_back(
14533 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
14534 Results.push_back(RCW.getValue(2));
14535 break;
14536 }
14537 case ISD::LOAD: {
14538 if (!ISD::isNON_EXTLoad(N))
14539 return;
14540
14541 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14542 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
14544
14545 if (N->getValueType(0) == MVT::i64) {
14546 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14547 "Unexpected custom legalisation");
14548
14549 if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
14550 return;
14551
14552 SDLoc DL(N);
14553 SDValue Result = DAG.getMemIntrinsicNode(
14554 RISCVISD::LD_RV32, DL,
14555 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
14556 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
14557 SDValue Lo = Result.getValue(0);
14558 SDValue Hi = Result.getValue(1);
14559 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
14560 Results.append({Pair, Result.getValue(2)});
14561 return;
14562 }
14563
14564 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14565 "Unexpected custom legalisation");
14566
14567 SDLoc dl(N);
14568 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
14569 Ld->getBasePtr(), Ld->getMemoryVT(),
14570 Ld->getMemOperand());
14571 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
14572 Results.push_back(Res.getValue(1));
14573 return;
14574 }
14575 case ISD::MUL: {
14576 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
14577 unsigned XLen = Subtarget.getXLen();
14578 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
14579 if (Size > XLen) {
14580 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
14581 SDValue LHS = N->getOperand(0);
14582 SDValue RHS = N->getOperand(1);
14583 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
14584
14585 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
14586 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
14587 // We need exactly one side to be unsigned.
14588 if (LHSIsU == RHSIsU)
14589 return;
14590
14591 auto MakeMULPair = [&](SDValue S, SDValue U) {
14592 MVT XLenVT = Subtarget.getXLenVT();
14593 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
14594 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
14595 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
14596 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
14597 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
14598 };
14599
14600 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
14601 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
14602
14603 // The other operand should be signed, but still prefer MULH when
14604 // possible.
14605 if (RHSIsU && LHSIsS && !RHSIsS)
14606 Results.push_back(MakeMULPair(LHS, RHS));
14607 else if (LHSIsU && RHSIsS && !LHSIsS)
14608 Results.push_back(MakeMULPair(RHS, LHS));
14609
14610 return;
14611 }
14612 [[fallthrough]];
14613 }
14614 case ISD::ADD:
14615 case ISD::SUB:
14616 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14617 "Unexpected custom legalisation");
14618 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
14619 break;
14620 case ISD::SHL:
14621 case ISD::SRA:
14622 case ISD::SRL:
14623 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14624 "Unexpected custom legalisation");
14625 if (N->getOperand(1).getOpcode() != ISD::Constant) {
14626 // If we can use a BSET instruction, allow default promotion to apply.
14627 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
14628 isOneConstant(N->getOperand(0)))
14629 break;
14630 Results.push_back(customLegalizeToWOp(N, DAG));
14631 break;
14632 }
14633
14634 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
14635 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
14636 // shift amount.
14637 if (N->getOpcode() == ISD::SHL) {
14638 SDLoc DL(N);
14639 SDValue NewOp0 =
14640 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14641 SDValue NewOp1 =
14642 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
14643 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
14644 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14645 DAG.getValueType(MVT::i32));
14646 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14647 }
14648
14649 break;
14650 case ISD::ROTL:
14651 case ISD::ROTR:
14652 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14653 "Unexpected custom legalisation");
14654 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
14655 Subtarget.hasVendorXTHeadBb()) &&
14656 "Unexpected custom legalization");
14657 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
14658 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
14659 return;
14660 Results.push_back(customLegalizeToWOp(N, DAG));
14661 break;
14662 case ISD::CTTZ:
14664 case ISD::CTLZ:
14665 case ISD::CTLZ_ZERO_UNDEF: {
14666 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14667 "Unexpected custom legalisation");
14668
14669 SDValue NewOp0 =
14670 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14671 bool IsCTZ =
14672 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
14673 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
14674 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
14675 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14676 return;
14677 }
14678 case ISD::SDIV:
14679 case ISD::UDIV:
14680 case ISD::UREM: {
14681 MVT VT = N->getSimpleValueType(0);
14682 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
14683 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
14684 "Unexpected custom legalisation");
14685 // Don't promote division/remainder by constant since we should expand those
14686 // to multiply by magic constant.
14687 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
14688 if (N->getOperand(1).getOpcode() == ISD::Constant &&
14689 !isIntDivCheap(N->getValueType(0), Attr))
14690 return;
14691
14692 // If the input is i32, use ANY_EXTEND since the W instructions don't read
14693 // the upper 32 bits. For other types we need to sign or zero extend
14694 // based on the opcode.
14695 unsigned ExtOpc = ISD::ANY_EXTEND;
14696 if (VT != MVT::i32)
14697 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
14699
14700 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
14701 break;
14702 }
14703 case ISD::SADDO: {
14704 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14705 "Unexpected custom legalisation");
14706
14707 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
14708 // use the default legalization.
14709 if (!isa<ConstantSDNode>(N->getOperand(1)))
14710 return;
14711
14712 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14713 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
14714 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
14715 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14716 DAG.getValueType(MVT::i32));
14717
14718 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14719
14720 // For an addition, the result should be less than one of the operands (LHS)
14721 // if and only if the other operand (RHS) is negative, otherwise there will
14722 // be overflow.
14723 // For a subtraction, the result should be less than one of the operands
14724 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
14725 // otherwise there will be overflow.
14726 EVT OType = N->getValueType(1);
14727 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
14728 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
14729
14730 SDValue Overflow =
14731 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
14732 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14733 Results.push_back(Overflow);
14734 return;
14735 }
14736 case ISD::UADDO:
14737 case ISD::USUBO: {
14738 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14739 "Unexpected custom legalisation");
14740 bool IsAdd = N->getOpcode() == ISD::UADDO;
14741 // Create an ADDW or SUBW.
14742 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14743 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14744 SDValue Res =
14745 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
14746 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14747 DAG.getValueType(MVT::i32));
14748
14749 SDValue Overflow;
14750 if (IsAdd && isOneConstant(RHS)) {
14751 // Special case uaddo X, 1 overflowed if the addition result is 0.
14752 // The general case (X + C) < C is not necessarily beneficial. Although we
14753 // reduce the live range of X, we may introduce the materialization of
14754 // constant C, especially when the setcc result is used by branch. We have
14755 // no compare with constant and branch instructions.
14756 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
14757 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
14758 } else if (IsAdd && isAllOnesConstant(RHS)) {
14759 // Special case uaddo X, -1 overflowed if X != 0.
14760 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
14761 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
14762 } else {
14763 // Sign extend the LHS and perform an unsigned compare with the ADDW
14764 // result. Since the inputs are sign extended from i32, this is equivalent
14765 // to comparing the lower 32 bits.
14766 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14767 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
14768 IsAdd ? ISD::SETULT : ISD::SETUGT);
14769 }
14770
14771 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14772 Results.push_back(Overflow);
14773 return;
14774 }
14775 case ISD::UADDSAT:
14776 case ISD::USUBSAT: {
14777 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14778 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
14779 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
14780 // promotion for UADDO/USUBO.
14781 Results.push_back(expandAddSubSat(N, DAG));
14782 return;
14783 }
14784 case ISD::SADDSAT:
14785 case ISD::SSUBSAT: {
14786 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14787 "Unexpected custom legalisation");
14788 Results.push_back(expandAddSubSat(N, DAG));
14789 return;
14790 }
14791 case ISD::ABS: {
14792 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14793 "Unexpected custom legalisation");
14794
14795 if (Subtarget.hasStdExtZbb()) {
14796 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
14797 // This allows us to remember that the result is sign extended. Expanding
14798 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
14799 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
14800 N->getOperand(0));
14801 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
14802 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
14803 return;
14804 }
14805
14806 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
14807 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14808
14809 // Freeze the source so we can increase it's use count.
14810 Src = DAG.getFreeze(Src);
14811
14812 // Copy sign bit to all bits using the sraiw pattern.
14813 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
14814 DAG.getValueType(MVT::i32));
14815 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
14816 DAG.getConstant(31, DL, MVT::i64));
14817
14818 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
14819 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
14820
14821 // NOTE: The result is only required to be anyextended, but sext is
14822 // consistent with type legalization of sub.
14823 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
14824 DAG.getValueType(MVT::i32));
14825 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14826 return;
14827 }
14828 case ISD::BITCAST: {
14829 EVT VT = N->getValueType(0);
14830 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
14831 SDValue Op0 = N->getOperand(0);
14832 EVT Op0VT = Op0.getValueType();
14833 MVT XLenVT = Subtarget.getXLenVT();
14834 if (VT == MVT::i16 &&
14835 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
14836 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
14837 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
14838 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
14839 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
14840 Subtarget.hasStdExtFOrZfinx()) {
14841 SDValue FPConv =
14842 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
14843 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
14844 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
14845 Subtarget.hasStdExtDOrZdinx()) {
14846 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
14847 DAG.getVTList(MVT::i32, MVT::i32), Op0);
14848 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
14849 NewReg.getValue(0), NewReg.getValue(1));
14850 Results.push_back(RetReg);
14851 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
14852 isTypeLegal(Op0VT)) {
14853 // Custom-legalize bitcasts from fixed-length vector types to illegal
14854 // scalar types in order to improve codegen. Bitcast the vector to a
14855 // one-element vector type whose element type is the same as the result
14856 // type, and extract the first element.
14857 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
14858 if (isTypeLegal(BVT)) {
14859 SDValue BVec = DAG.getBitcast(BVT, Op0);
14860 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
14861 }
14862 }
14863 break;
14864 }
14865 case ISD::BITREVERSE: {
14866 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
14867 "Unexpected custom legalisation");
14868 MVT XLenVT = Subtarget.getXLenVT();
14869 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14870 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
14871 // ReplaceNodeResults requires we maintain the same type for the return
14872 // value.
14873 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
14874 break;
14875 }
14876 case RISCVISD::BREV8:
14877 case RISCVISD::ORC_B: {
14878 MVT VT = N->getSimpleValueType(0);
14879 MVT XLenVT = Subtarget.getXLenVT();
14880 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
14881 "Unexpected custom legalisation");
14882 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
14883 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
14884 "Unexpected extension");
14885 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14886 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
14887 // ReplaceNodeResults requires we maintain the same type for the return
14888 // value.
14889 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
14890 break;
14891 }
14893 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
14894 // type is illegal (currently only vXi64 RV32).
14895 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
14896 // transferred to the destination register. We issue two of these from the
14897 // upper- and lower- halves of the SEW-bit vector element, slid down to the
14898 // first element.
14899 SDValue Vec = N->getOperand(0);
14900 SDValue Idx = N->getOperand(1);
14901
14902 // The vector type hasn't been legalized yet so we can't issue target
14903 // specific nodes if it needs legalization.
14904 // FIXME: We would manually legalize if it's important.
14905 if (!isTypeLegal(Vec.getValueType()))
14906 return;
14907
14908 MVT VecVT = Vec.getSimpleValueType();
14909
14910 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
14911 VecVT.getVectorElementType() == MVT::i64 &&
14912 "Unexpected EXTRACT_VECTOR_ELT legalization");
14913
14914 // If this is a fixed vector, we need to convert it to a scalable vector.
14915 MVT ContainerVT = VecVT;
14916 if (VecVT.isFixedLengthVector()) {
14917 ContainerVT = getContainerForFixedLengthVector(VecVT);
14918 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
14919 }
14920
14921 MVT XLenVT = Subtarget.getXLenVT();
14922
14923 // Use a VL of 1 to avoid processing more elements than we need.
14924 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
14925
14926 // Unless the index is known to be 0, we must slide the vector down to get
14927 // the desired element into index 0.
14928 if (!isNullConstant(Idx)) {
14929 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
14930 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
14931 }
14932
14933 // Extract the lower XLEN bits of the correct vector element.
14934 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
14935
14936 // To extract the upper XLEN bits of the vector element, shift the first
14937 // element right by 32 bits and re-extract the lower XLEN bits.
14938 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14939 DAG.getUNDEF(ContainerVT),
14940 DAG.getConstant(32, DL, XLenVT), VL);
14941 SDValue LShr32 =
14942 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
14943 DAG.getUNDEF(ContainerVT), Mask, VL);
14944
14945 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
14946
14947 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
14948 break;
14949 }
14951 unsigned IntNo = N->getConstantOperandVal(0);
14952 switch (IntNo) {
14953 default:
14955 "Don't know how to custom type legalize this intrinsic!");
14956 case Intrinsic::experimental_get_vector_length: {
14957 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
14958 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14959 return;
14960 }
14961 case Intrinsic::experimental_cttz_elts: {
14962 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
14963 Results.push_back(
14964 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
14965 return;
14966 }
14967 case Intrinsic::riscv_orc_b:
14968 case Intrinsic::riscv_brev8:
14969 case Intrinsic::riscv_sha256sig0:
14970 case Intrinsic::riscv_sha256sig1:
14971 case Intrinsic::riscv_sha256sum0:
14972 case Intrinsic::riscv_sha256sum1:
14973 case Intrinsic::riscv_sm3p0:
14974 case Intrinsic::riscv_sm3p1: {
14975 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
14976 return;
14977 unsigned Opc;
14978 switch (IntNo) {
14979 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
14980 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
14981 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
14982 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
14983 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
14984 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
14985 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
14986 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
14987 }
14988
14989 SDValue NewOp =
14990 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14991 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
14992 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14993 return;
14994 }
14995 case Intrinsic::riscv_sm4ks:
14996 case Intrinsic::riscv_sm4ed: {
14997 unsigned Opc =
14998 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
14999 SDValue NewOp0 =
15000 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15001 SDValue NewOp1 =
15002 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15003 SDValue Res =
15004 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
15005 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15006 return;
15007 }
15008 case Intrinsic::riscv_mopr: {
15009 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15010 return;
15011 SDValue NewOp =
15012 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15013 SDValue Res = DAG.getNode(
15014 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
15015 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
15016 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15017 return;
15018 }
15019 case Intrinsic::riscv_moprr: {
15020 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15021 return;
15022 SDValue NewOp0 =
15023 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15024 SDValue NewOp1 =
15025 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15026 SDValue Res = DAG.getNode(
15027 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
15028 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
15029 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15030 return;
15031 }
15032 case Intrinsic::riscv_clmul: {
15033 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15034 return;
15035
15036 SDValue NewOp0 =
15037 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15038 SDValue NewOp1 =
15039 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15040 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
15041 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15042 return;
15043 }
15044 case Intrinsic::riscv_clmulh:
15045 case Intrinsic::riscv_clmulr: {
15046 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15047 return;
15048
15049 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
15050 // to the full 128-bit clmul result of multiplying two xlen values.
15051 // Perform clmulr or clmulh on the shifted values. Finally, extract the
15052 // upper 32 bits.
15053 //
15054 // The alternative is to mask the inputs to 32 bits and use clmul, but
15055 // that requires two shifts to mask each input without zext.w.
15056 // FIXME: If the inputs are known zero extended or could be freely
15057 // zero extended, the mask form would be better.
15058 SDValue NewOp0 =
15059 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15060 SDValue NewOp1 =
15061 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15062 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
15063 DAG.getConstant(32, DL, MVT::i64));
15064 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
15065 DAG.getConstant(32, DL, MVT::i64));
15066 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
15067 : RISCVISD::CLMULR;
15068 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
15069 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
15070 DAG.getConstant(32, DL, MVT::i64));
15071 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15072 return;
15073 }
15074 case Intrinsic::riscv_vmv_x_s: {
15075 EVT VT = N->getValueType(0);
15076 MVT XLenVT = Subtarget.getXLenVT();
15077 if (VT.bitsLT(XLenVT)) {
15078 // Simple case just extract using vmv.x.s and truncate.
15079 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
15080 Subtarget.getXLenVT(), N->getOperand(1));
15081 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
15082 return;
15083 }
15084
15085 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
15086 "Unexpected custom legalization");
15087
15088 // We need to do the move in two steps.
15089 SDValue Vec = N->getOperand(1);
15090 MVT VecVT = Vec.getSimpleValueType();
15091
15092 // First extract the lower XLEN bits of the element.
15093 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15094
15095 // To extract the upper XLEN bits of the vector element, shift the first
15096 // element right by 32 bits and re-extract the lower XLEN bits.
15097 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
15098
15099 SDValue ThirtyTwoV =
15100 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
15101 DAG.getConstant(32, DL, XLenVT), VL);
15102 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
15103 DAG.getUNDEF(VecVT), Mask, VL);
15104 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15105
15106 Results.push_back(
15107 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15108 break;
15109 }
15110 }
15111 break;
15112 }
15113 case ISD::VECREDUCE_ADD:
15114 case ISD::VECREDUCE_AND:
15115 case ISD::VECREDUCE_OR:
15116 case ISD::VECREDUCE_XOR:
15117 case ISD::VECREDUCE_SMAX:
15118 case ISD::VECREDUCE_UMAX:
15119 case ISD::VECREDUCE_SMIN:
15120 case ISD::VECREDUCE_UMIN:
15121 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
15122 Results.push_back(V);
15123 break;
15124 case ISD::VP_REDUCE_ADD:
15125 case ISD::VP_REDUCE_AND:
15126 case ISD::VP_REDUCE_OR:
15127 case ISD::VP_REDUCE_XOR:
15128 case ISD::VP_REDUCE_SMAX:
15129 case ISD::VP_REDUCE_UMAX:
15130 case ISD::VP_REDUCE_SMIN:
15131 case ISD::VP_REDUCE_UMIN:
15132 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
15133 Results.push_back(V);
15134 break;
15135 case ISD::GET_ROUNDING: {
15136 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
15137 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
15138 Results.push_back(Res.getValue(0));
15139 Results.push_back(Res.getValue(1));
15140 break;
15141 }
15142 }
15143}
15144
15145/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
15146/// which corresponds to it.
15147static unsigned getVecReduceOpcode(unsigned Opc) {
15148 switch (Opc) {
15149 default:
15150 llvm_unreachable("Unhandled binary to transform reduction");
15151 case ISD::ADD:
15152 return ISD::VECREDUCE_ADD;
15153 case ISD::UMAX:
15154 return ISD::VECREDUCE_UMAX;
15155 case ISD::SMAX:
15156 return ISD::VECREDUCE_SMAX;
15157 case ISD::UMIN:
15158 return ISD::VECREDUCE_UMIN;
15159 case ISD::SMIN:
15160 return ISD::VECREDUCE_SMIN;
15161 case ISD::AND:
15162 return ISD::VECREDUCE_AND;
15163 case ISD::OR:
15164 return ISD::VECREDUCE_OR;
15165 case ISD::XOR:
15166 return ISD::VECREDUCE_XOR;
15167 case ISD::FADD:
15168 // Note: This is the associative form of the generic reduction opcode.
15169 return ISD::VECREDUCE_FADD;
15170 case ISD::FMAXNUM:
15171 return ISD::VECREDUCE_FMAX;
15172 case ISD::FMINNUM:
15173 return ISD::VECREDUCE_FMIN;
15174 }
15175}
15176
15177/// Perform two related transforms whose purpose is to incrementally recognize
15178/// an explode_vector followed by scalar reduction as a vector reduction node.
15179/// This exists to recover from a deficiency in SLP which can't handle
15180/// forests with multiple roots sharing common nodes. In some cases, one
15181/// of the trees will be vectorized, and the other will remain (unprofitably)
15182/// scalarized.
15183static SDValue
15185 const RISCVSubtarget &Subtarget) {
15186
15187 // This transforms need to run before all integer types have been legalized
15188 // to i64 (so that the vector element type matches the add type), and while
15189 // it's safe to introduce odd sized vector types.
15191 return SDValue();
15192
15193 // Without V, this transform isn't useful. We could form the (illegal)
15194 // operations and let them be scalarized again, but there's really no point.
15195 if (!Subtarget.hasVInstructions())
15196 return SDValue();
15197
15198 const SDLoc DL(N);
15199 const EVT VT = N->getValueType(0);
15200 const unsigned Opc = N->getOpcode();
15201
15202 if (!VT.isInteger()) {
15203 switch (Opc) {
15204 default:
15205 return SDValue();
15206 case ISD::FADD:
15207 // For FADD, we only handle the case with reassociation allowed. We
15208 // could handle strict reduction order, but at the moment, there's no
15209 // known reason to, and the complexity isn't worth it.
15210 if (!N->getFlags().hasAllowReassociation())
15211 return SDValue();
15212 break;
15213 case ISD::FMAXNUM:
15214 case ISD::FMINNUM:
15215 break;
15216 }
15217 }
15218
15219 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
15220 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
15221 "Inconsistent mappings");
15222 SDValue LHS = N->getOperand(0);
15223 SDValue RHS = N->getOperand(1);
15224
15225 if (!LHS.hasOneUse() || !RHS.hasOneUse())
15226 return SDValue();
15227
15228 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15229 std::swap(LHS, RHS);
15230
15231 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15232 !isa<ConstantSDNode>(RHS.getOperand(1)))
15233 return SDValue();
15234
15235 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
15236 SDValue SrcVec = RHS.getOperand(0);
15237 EVT SrcVecVT = SrcVec.getValueType();
15238 assert(SrcVecVT.getVectorElementType() == VT);
15239 if (SrcVecVT.isScalableVector())
15240 return SDValue();
15241
15242 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
15243 return SDValue();
15244
15245 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
15246 // reduce_op (extract_subvector [2 x VT] from V). This will form the
15247 // root of our reduction tree. TODO: We could extend this to any two
15248 // adjacent aligned constant indices if desired.
15249 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15250 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
15251 uint64_t LHSIdx =
15252 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
15253 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
15254 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
15255 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15256 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
15257 }
15258 }
15259
15260 // Match (binop (reduce (extract_subvector V, 0),
15261 // (extract_vector_elt V, sizeof(SubVec))))
15262 // into a reduction of one more element from the original vector V.
15263 if (LHS.getOpcode() != ReduceOpc)
15264 return SDValue();
15265
15266 SDValue ReduceVec = LHS.getOperand(0);
15267 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15268 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
15269 isNullConstant(ReduceVec.getOperand(1)) &&
15270 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
15271 // For illegal types (e.g. 3xi32), most will be combined again into a
15272 // wider (hopefully legal) type. If this is a terminal state, we are
15273 // relying on type legalization here to produce something reasonable
15274 // and this lowering quality could probably be improved. (TODO)
15275 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
15276 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15277 return DAG.getNode(ReduceOpc, DL, VT, Vec,
15278 ReduceVec->getFlags() & N->getFlags());
15279 }
15280
15281 return SDValue();
15282}
15283
15284
15285// Try to fold (<bop> x, (reduction.<bop> vec, start))
15287 const RISCVSubtarget &Subtarget) {
15288 auto BinOpToRVVReduce = [](unsigned Opc) {
15289 switch (Opc) {
15290 default:
15291 llvm_unreachable("Unhandled binary to transform reduction");
15292 case ISD::ADD:
15293 return RISCVISD::VECREDUCE_ADD_VL;
15294 case ISD::UMAX:
15295 return RISCVISD::VECREDUCE_UMAX_VL;
15296 case ISD::SMAX:
15297 return RISCVISD::VECREDUCE_SMAX_VL;
15298 case ISD::UMIN:
15299 return RISCVISD::VECREDUCE_UMIN_VL;
15300 case ISD::SMIN:
15301 return RISCVISD::VECREDUCE_SMIN_VL;
15302 case ISD::AND:
15303 return RISCVISD::VECREDUCE_AND_VL;
15304 case ISD::OR:
15305 return RISCVISD::VECREDUCE_OR_VL;
15306 case ISD::XOR:
15307 return RISCVISD::VECREDUCE_XOR_VL;
15308 case ISD::FADD:
15309 return RISCVISD::VECREDUCE_FADD_VL;
15310 case ISD::FMAXNUM:
15311 return RISCVISD::VECREDUCE_FMAX_VL;
15312 case ISD::FMINNUM:
15313 return RISCVISD::VECREDUCE_FMIN_VL;
15314 }
15315 };
15316
15317 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
15318 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15319 isNullConstant(V.getOperand(1)) &&
15320 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
15321 };
15322
15323 unsigned Opc = N->getOpcode();
15324 unsigned ReduceIdx;
15325 if (IsReduction(N->getOperand(0), Opc))
15326 ReduceIdx = 0;
15327 else if (IsReduction(N->getOperand(1), Opc))
15328 ReduceIdx = 1;
15329 else
15330 return SDValue();
15331
15332 // Skip if FADD disallows reassociation but the combiner needs.
15333 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
15334 return SDValue();
15335
15336 SDValue Extract = N->getOperand(ReduceIdx);
15337 SDValue Reduce = Extract.getOperand(0);
15338 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
15339 return SDValue();
15340
15341 SDValue ScalarV = Reduce.getOperand(2);
15342 EVT ScalarVT = ScalarV.getValueType();
15343 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
15344 ScalarV.getOperand(0)->isUndef() &&
15345 isNullConstant(ScalarV.getOperand(2)))
15346 ScalarV = ScalarV.getOperand(1);
15347
15348 // Make sure that ScalarV is a splat with VL=1.
15349 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
15350 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
15351 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
15352 return SDValue();
15353
15354 if (!isNonZeroAVL(ScalarV.getOperand(2)))
15355 return SDValue();
15356
15357 // Check the scalar of ScalarV is neutral element
15358 // TODO: Deal with value other than neutral element.
15359 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
15360 0))
15361 return SDValue();
15362
15363 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
15364 // FIXME: We might be able to improve this if operand 0 is undef.
15365 if (!isNonZeroAVL(Reduce.getOperand(5)))
15366 return SDValue();
15367
15368 SDValue NewStart = N->getOperand(1 - ReduceIdx);
15369
15370 SDLoc DL(N);
15371 SDValue NewScalarV =
15372 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
15373 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
15374
15375 // If we looked through an INSERT_SUBVECTOR we need to restore it.
15376 if (ScalarVT != ScalarV.getValueType())
15377 NewScalarV =
15378 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
15379
15380 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
15381 NewScalarV, Reduce.getOperand(3),
15382 Reduce.getOperand(4), Reduce.getOperand(5)};
15383 SDValue NewReduce =
15384 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
15385 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
15386 Extract.getOperand(1));
15387}
15388
15389// Optimize (add (shl x, c0), (shl y, c1)) ->
15390// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
15391// or
15392// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
15394 const RISCVSubtarget &Subtarget) {
15395 // Perform this optimization only in the zba/xandesperf/xqciac/xtheadba
15396 // extension.
15397 if (!Subtarget.hasShlAdd(3))
15398 return SDValue();
15399
15400 // Skip for vector types and larger types.
15401 EVT VT = N->getValueType(0);
15402 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15403 return SDValue();
15404
15405 // The two operand nodes must be SHL and have no other use.
15406 SDValue N0 = N->getOperand(0);
15407 SDValue N1 = N->getOperand(1);
15408 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
15409 !N0->hasOneUse() || !N1->hasOneUse())
15410 return SDValue();
15411
15412 // Check c0 and c1.
15413 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15414 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
15415 if (!N0C || !N1C)
15416 return SDValue();
15417 int64_t C0 = N0C->getSExtValue();
15418 int64_t C1 = N1C->getSExtValue();
15419 if (C0 <= 0 || C1 <= 0)
15420 return SDValue();
15421
15422 int64_t Diff = std::abs(C0 - C1);
15423 if (!Subtarget.hasShlAdd(Diff))
15424 return SDValue();
15425
15426 // Build nodes.
15427 SDLoc DL(N);
15428 int64_t Bits = std::min(C0, C1);
15429 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
15430 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
15431 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
15432 DAG.getConstant(Diff, DL, VT), NS);
15433 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
15434}
15435
15436// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15437// or 3.
15439 SelectionDAG &DAG) {
15440 using namespace llvm::SDPatternMatch;
15441
15442 // Looking for a reg-reg add and not an addi.
15443 if (isa<ConstantSDNode>(N->getOperand(1)))
15444 return SDValue();
15445
15446 // Based on testing it seems that performance degrades if the ADDI has
15447 // more than 2 uses.
15448 if (AddI->use_size() > 2)
15449 return SDValue();
15450
15451 APInt AddVal;
15452 SDValue SHLVal;
15453 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
15454 return SDValue();
15455
15456 APInt VShift;
15457 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
15458 return SDValue();
15459
15460 if (VShift.slt(1) || VShift.sgt(3))
15461 return SDValue();
15462
15463 SDLoc DL(N);
15464 EVT VT = N->getValueType(0);
15465 // The shift must be positive but the add can be signed.
15466 uint64_t ShlConst = VShift.getZExtValue();
15467 int64_t AddConst = AddVal.getSExtValue();
15468
15469 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
15470 DAG.getConstant(ShlConst, DL, VT), Other);
15471 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
15472 DAG.getSignedConstant(AddConst, DL, VT));
15473}
15474
15475// Optimize (add (add (shl x, c0), c1), y) ->
15476// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
15478 const RISCVSubtarget &Subtarget) {
15479 // Perform this optimization only in the zba extension.
15480 if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))
15481 return SDValue();
15482
15483 // Skip for vector types and larger types.
15484 EVT VT = N->getValueType(0);
15485 if (VT != Subtarget.getXLenVT())
15486 return SDValue();
15487
15488 SDValue AddI = N->getOperand(0);
15489 SDValue Other = N->getOperand(1);
15490 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15491 return V;
15492 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
15493 return V;
15494 return SDValue();
15495}
15496
15497// Combine a constant select operand into its use:
15498//
15499// (and (select cond, -1, c), x)
15500// -> (select cond, x, (and x, c)) [AllOnes=1]
15501// (or (select cond, 0, c), x)
15502// -> (select cond, x, (or x, c)) [AllOnes=0]
15503// (xor (select cond, 0, c), x)
15504// -> (select cond, x, (xor x, c)) [AllOnes=0]
15505// (add (select cond, 0, c), x)
15506// -> (select cond, x, (add x, c)) [AllOnes=0]
15507// (sub x, (select cond, 0, c))
15508// -> (select cond, x, (sub x, c)) [AllOnes=0]
15510 SelectionDAG &DAG, bool AllOnes,
15511 const RISCVSubtarget &Subtarget) {
15512 EVT VT = N->getValueType(0);
15513
15514 // Skip vectors.
15515 if (VT.isVector())
15516 return SDValue();
15517
15518 if (!Subtarget.hasConditionalMoveFusion()) {
15519 // (select cond, x, (and x, c)) has custom lowering with Zicond.
15520 if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
15521 return SDValue();
15522
15523 // Maybe harmful when condition code has multiple use.
15524 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
15525 return SDValue();
15526
15527 // Maybe harmful when VT is wider than XLen.
15528 if (VT.getSizeInBits() > Subtarget.getXLen())
15529 return SDValue();
15530 }
15531
15532 if ((Slct.getOpcode() != ISD::SELECT &&
15533 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
15534 !Slct.hasOneUse())
15535 return SDValue();
15536
15537 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
15539 };
15540
15541 bool SwapSelectOps;
15542 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
15543 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
15544 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
15545 SDValue NonConstantVal;
15546 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
15547 SwapSelectOps = false;
15548 NonConstantVal = FalseVal;
15549 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
15550 SwapSelectOps = true;
15551 NonConstantVal = TrueVal;
15552 } else
15553 return SDValue();
15554
15555 // Slct is now know to be the desired identity constant when CC is true.
15556 TrueVal = OtherOp;
15557 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
15558 // Unless SwapSelectOps says the condition should be false.
15559 if (SwapSelectOps)
15560 std::swap(TrueVal, FalseVal);
15561
15562 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
15563 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
15564 {Slct.getOperand(0), Slct.getOperand(1),
15565 Slct.getOperand(2), TrueVal, FalseVal});
15566
15567 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
15568 {Slct.getOperand(0), TrueVal, FalseVal});
15569}
15570
15571// Attempt combineSelectAndUse on each operand of a commutative operator N.
15573 bool AllOnes,
15574 const RISCVSubtarget &Subtarget) {
15575 SDValue N0 = N->getOperand(0);
15576 SDValue N1 = N->getOperand(1);
15577 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
15578 return Result;
15579 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
15580 return Result;
15581 return SDValue();
15582}
15583
15584// Transform (add (mul x, c0), c1) ->
15585// (add (mul (add x, c1/c0), c0), c1%c0).
15586// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
15587// that should be excluded is when c0*(c1/c0) is simm12, which will lead
15588// to an infinite loop in DAGCombine if transformed.
15589// Or transform (add (mul x, c0), c1) ->
15590// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
15591// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
15592// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
15593// lead to an infinite loop in DAGCombine if transformed.
15594// Or transform (add (mul x, c0), c1) ->
15595// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
15596// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
15597// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
15598// lead to an infinite loop in DAGCombine if transformed.
15599// Or transform (add (mul x, c0), c1) ->
15600// (mul (add x, c1/c0), c0).
15601// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
15603 const RISCVSubtarget &Subtarget) {
15604 // Skip for vector types and larger types.
15605 EVT VT = N->getValueType(0);
15606 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15607 return SDValue();
15608 // The first operand node must be a MUL and has no other use.
15609 SDValue N0 = N->getOperand(0);
15610 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
15611 return SDValue();
15612 // Check if c0 and c1 match above conditions.
15613 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15614 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15615 if (!N0C || !N1C)
15616 return SDValue();
15617 // If N0C has multiple uses it's possible one of the cases in
15618 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
15619 // in an infinite loop.
15620 if (!N0C->hasOneUse())
15621 return SDValue();
15622 int64_t C0 = N0C->getSExtValue();
15623 int64_t C1 = N1C->getSExtValue();
15624 int64_t CA, CB;
15625 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
15626 return SDValue();
15627 // Search for proper CA (non-zero) and CB that both are simm12.
15628 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
15629 !isInt<12>(C0 * (C1 / C0))) {
15630 CA = C1 / C0;
15631 CB = C1 % C0;
15632 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
15633 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
15634 CA = C1 / C0 + 1;
15635 CB = C1 % C0 - C0;
15636 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
15637 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
15638 CA = C1 / C0 - 1;
15639 CB = C1 % C0 + C0;
15640 } else
15641 return SDValue();
15642 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
15643 SDLoc DL(N);
15644 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
15645 DAG.getSignedConstant(CA, DL, VT));
15646 SDValue New1 =
15647 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
15648 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
15649}
15650
15651// add (zext, zext) -> zext (add (zext, zext))
15652// sub (zext, zext) -> sext (sub (zext, zext))
15653// mul (zext, zext) -> zext (mul (zext, zext))
15654// sdiv (zext, zext) -> zext (sdiv (zext, zext))
15655// udiv (zext, zext) -> zext (udiv (zext, zext))
15656// srem (zext, zext) -> zext (srem (zext, zext))
15657// urem (zext, zext) -> zext (urem (zext, zext))
15658//
15659// where the sum of the extend widths match, and the the range of the bin op
15660// fits inside the width of the narrower bin op. (For profitability on rvv, we
15661// use a power of two for both inner and outer extend.)
15663
15664 EVT VT = N->getValueType(0);
15665 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
15666 return SDValue();
15667
15668 SDValue N0 = N->getOperand(0);
15669 SDValue N1 = N->getOperand(1);
15671 return SDValue();
15672 if (!N0.hasOneUse() || !N1.hasOneUse())
15673 return SDValue();
15674
15675 SDValue Src0 = N0.getOperand(0);
15676 SDValue Src1 = N1.getOperand(0);
15677 EVT SrcVT = Src0.getValueType();
15678 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
15679 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
15680 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
15681 return SDValue();
15682
15683 LLVMContext &C = *DAG.getContext();
15685 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
15686
15687 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
15688 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
15689
15690 // Src0 and Src1 are zero extended, so they're always positive if signed.
15691 //
15692 // sub can produce a negative from two positive operands, so it needs sign
15693 // extended. Other nodes produce a positive from two positive operands, so
15694 // zero extend instead.
15695 unsigned OuterExtend =
15696 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15697
15698 return DAG.getNode(
15699 OuterExtend, SDLoc(N), VT,
15700 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
15701}
15702
15703// Try to turn (add (xor bool, 1) -1) into (neg bool).
15705 SDValue N0 = N->getOperand(0);
15706 SDValue N1 = N->getOperand(1);
15707 EVT VT = N->getValueType(0);
15708 SDLoc DL(N);
15709
15710 // RHS should be -1.
15711 if (!isAllOnesConstant(N1))
15712 return SDValue();
15713
15714 // Look for (xor X, 1).
15715 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
15716 return SDValue();
15717
15718 // First xor input should be 0 or 1.
15720 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
15721 return SDValue();
15722
15723 // Emit a negate of the setcc.
15724 return DAG.getNegative(N0.getOperand(0), DL, VT);
15725}
15726
15729 const RISCVSubtarget &Subtarget) {
15730 SelectionDAG &DAG = DCI.DAG;
15731 if (SDValue V = combineAddOfBooleanXor(N, DAG))
15732 return V;
15733 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
15734 return V;
15735 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
15736 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
15737 return V;
15738 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
15739 return V;
15740 }
15741 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15742 return V;
15743 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15744 return V;
15745 if (SDValue V = combineBinOpOfZExt(N, DAG))
15746 return V;
15747
15748 // fold (add (select lhs, rhs, cc, 0, y), x) ->
15749 // (select lhs, rhs, cc, x, (add x, y))
15750 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
15751}
15752
15753// Try to turn a sub boolean RHS and constant LHS into an addi.
15755 SDValue N0 = N->getOperand(0);
15756 SDValue N1 = N->getOperand(1);
15757 EVT VT = N->getValueType(0);
15758 SDLoc DL(N);
15759
15760 // Require a constant LHS.
15761 auto *N0C = dyn_cast<ConstantSDNode>(N0);
15762 if (!N0C)
15763 return SDValue();
15764
15765 // All our optimizations involve subtracting 1 from the immediate and forming
15766 // an ADDI. Make sure the new immediate is valid for an ADDI.
15767 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
15768 if (!ImmValMinus1.isSignedIntN(12))
15769 return SDValue();
15770
15771 SDValue NewLHS;
15772 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
15773 // (sub constant, (setcc x, y, eq/neq)) ->
15774 // (add (setcc x, y, neq/eq), constant - 1)
15775 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15776 EVT SetCCOpVT = N1.getOperand(0).getValueType();
15777 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
15778 return SDValue();
15779 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15780 NewLHS =
15781 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
15782 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
15783 N1.getOperand(0).getOpcode() == ISD::SETCC) {
15784 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
15785 // Since setcc returns a bool the xor is equivalent to 1-setcc.
15786 NewLHS = N1.getOperand(0);
15787 } else
15788 return SDValue();
15789
15790 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
15791 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
15792}
15793
15794// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
15795// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
15796// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
15797// valid with Y=3, while 0b0000_1000_0000_0100 is not.
15799 const RISCVSubtarget &Subtarget) {
15800 if (!Subtarget.hasStdExtZbb())
15801 return SDValue();
15802
15803 EVT VT = N->getValueType(0);
15804
15805 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
15806 return SDValue();
15807
15808 SDValue N0 = N->getOperand(0);
15809 SDValue N1 = N->getOperand(1);
15810
15811 if (N0->getOpcode() != ISD::SHL)
15812 return SDValue();
15813
15814 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
15815 if (!ShAmtCLeft)
15816 return SDValue();
15817 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
15818
15819 if (ShiftedAmount >= 8)
15820 return SDValue();
15821
15822 SDValue LeftShiftOperand = N0->getOperand(0);
15823 SDValue RightShiftOperand = N1;
15824
15825 if (ShiftedAmount != 0) { // Right operand must be a right shift.
15826 if (N1->getOpcode() != ISD::SRL)
15827 return SDValue();
15828 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
15829 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
15830 return SDValue();
15831 RightShiftOperand = N1.getOperand(0);
15832 }
15833
15834 // At least one shift should have a single use.
15835 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
15836 return SDValue();
15837
15838 if (LeftShiftOperand != RightShiftOperand)
15839 return SDValue();
15840
15841 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
15842 Mask <<= ShiftedAmount;
15843 // Check that X has indeed the right shape (only the Y-th bit can be set in
15844 // every byte).
15845 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
15846 return SDValue();
15847
15848 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
15849}
15850
15852 const RISCVSubtarget &Subtarget) {
15853 if (SDValue V = combineSubOfBoolean(N, DAG))
15854 return V;
15855
15856 EVT VT = N->getValueType(0);
15857 SDValue N0 = N->getOperand(0);
15858 SDValue N1 = N->getOperand(1);
15859 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
15860 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
15861 isNullConstant(N1.getOperand(1)) &&
15862 N1.getValueType() == N1.getOperand(0).getValueType()) {
15863 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15864 if (CCVal == ISD::SETLT) {
15865 SDLoc DL(N);
15866 unsigned ShAmt = N0.getValueSizeInBits() - 1;
15867 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
15868 DAG.getConstant(ShAmt, DL, VT));
15869 }
15870 }
15871
15872 if (SDValue V = combineBinOpOfZExt(N, DAG))
15873 return V;
15874 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
15875 return V;
15876
15877 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
15878 // (select lhs, rhs, cc, x, (sub x, y))
15879 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
15880}
15881
15882// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
15883// Legalizing setcc can introduce xors like this. Doing this transform reduces
15884// the number of xors and may allow the xor to fold into a branch condition.
15886 SDValue N0 = N->getOperand(0);
15887 SDValue N1 = N->getOperand(1);
15888 bool IsAnd = N->getOpcode() == ISD::AND;
15889
15890 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
15891 return SDValue();
15892
15893 if (!N0.hasOneUse() || !N1.hasOneUse())
15894 return SDValue();
15895
15896 SDValue N01 = N0.getOperand(1);
15897 SDValue N11 = N1.getOperand(1);
15898
15899 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
15900 // (xor X, -1) based on the upper bits of the other operand being 0. If the
15901 // operation is And, allow one of the Xors to use -1.
15902 if (isOneConstant(N01)) {
15903 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
15904 return SDValue();
15905 } else if (isOneConstant(N11)) {
15906 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
15907 if (!(IsAnd && isAllOnesConstant(N01)))
15908 return SDValue();
15909 } else
15910 return SDValue();
15911
15912 EVT VT = N->getValueType(0);
15913
15914 SDValue N00 = N0.getOperand(0);
15915 SDValue N10 = N1.getOperand(0);
15916
15917 // The LHS of the xors needs to be 0/1.
15919 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
15920 return SDValue();
15921
15922 // Invert the opcode and insert a new xor.
15923 SDLoc DL(N);
15924 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15925 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
15926 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
15927}
15928
15929// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
15930// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
15931// value to an unsigned value. This will be lowered to vmax and series of
15932// vnclipu instructions later. This can be extended to other truncated types
15933// other than i8 by replacing 256 and 255 with the equivalent constants for the
15934// type.
15936 EVT VT = N->getValueType(0);
15937 SDValue N0 = N->getOperand(0);
15938 EVT SrcVT = N0.getValueType();
15939
15940 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15941 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
15942 return SDValue();
15943
15944 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
15945 return SDValue();
15946
15947 SDValue Cond = N0.getOperand(0);
15948 SDValue True = N0.getOperand(1);
15949 SDValue False = N0.getOperand(2);
15950
15951 if (Cond.getOpcode() != ISD::SETCC)
15952 return SDValue();
15953
15954 // FIXME: Support the version of this pattern with the select operands
15955 // swapped.
15956 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15957 if (CCVal != ISD::SETULT)
15958 return SDValue();
15959
15960 SDValue CondLHS = Cond.getOperand(0);
15961 SDValue CondRHS = Cond.getOperand(1);
15962
15963 if (CondLHS != True)
15964 return SDValue();
15965
15966 unsigned ScalarBits = VT.getScalarSizeInBits();
15967
15968 // FIXME: Support other constants.
15969 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
15970 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
15971 return SDValue();
15972
15973 if (False.getOpcode() != ISD::SIGN_EXTEND)
15974 return SDValue();
15975
15976 False = False.getOperand(0);
15977
15978 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
15979 return SDValue();
15980
15981 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
15982 if (!FalseRHSC || !FalseRHSC->isZero())
15983 return SDValue();
15984
15985 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
15986 if (CCVal2 != ISD::SETGT)
15987 return SDValue();
15988
15989 // Emit the signed to unsigned saturation pattern.
15990 SDLoc DL(N);
15991 SDValue Max =
15992 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
15993 SDValue Min =
15994 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
15995 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
15996 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
15997}
15998
16000 const RISCVSubtarget &Subtarget) {
16001 SDValue N0 = N->getOperand(0);
16002 EVT VT = N->getValueType(0);
16003
16004 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
16005 // extending X. This is safe since we only need the LSB after the shift and
16006 // shift amounts larger than 31 would produce poison. If we wait until
16007 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16008 // to use a BEXT instruction.
16009 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
16010 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
16011 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16012 SDLoc DL(N0);
16013 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16014 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16015 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16016 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
16017 }
16018
16019 return combineTruncSelectToSMaxUSat(N, DAG);
16020}
16021
16022// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
16023// truncation. But RVV doesn't have truncation instructions for more than twice
16024// the bitwidth.
16025//
16026// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
16027//
16028// vsetvli a0, zero, e32, m2, ta, ma
16029// vnsrl.wi v12, v8, 0
16030// vsetvli zero, zero, e16, m1, ta, ma
16031// vnsrl.wi v8, v12, 0
16032// vsetvli zero, zero, e8, mf2, ta, ma
16033// vnsrl.wi v8, v8, 0
16034//
16035// So reverse the combine so we generate an vmseq/vmsne again:
16036//
16037// and (lshr (trunc X), ShAmt), 1
16038// -->
16039// zext (icmp ne (and X, (1 << ShAmt)), 0)
16040//
16041// and (lshr (not (trunc X)), ShAmt), 1
16042// -->
16043// zext (icmp eq (and X, (1 << ShAmt)), 0)
16045 const RISCVSubtarget &Subtarget) {
16046 using namespace SDPatternMatch;
16047 SDLoc DL(N);
16048
16049 if (!Subtarget.hasVInstructions())
16050 return SDValue();
16051
16052 EVT VT = N->getValueType(0);
16053 if (!VT.isVector())
16054 return SDValue();
16055
16056 APInt ShAmt;
16057 SDValue Inner;
16058 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
16059 m_One())))
16060 return SDValue();
16061
16062 SDValue X;
16063 bool IsNot;
16064 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
16065 IsNot = true;
16066 else if (sd_match(Inner, m_Trunc(m_Value(X))))
16067 IsNot = false;
16068 else
16069 return SDValue();
16070
16071 EVT WideVT = X.getValueType();
16072 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
16073 return SDValue();
16074
16075 SDValue Res =
16076 DAG.getNode(ISD::AND, DL, WideVT, X,
16077 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
16078 Res = DAG.getSetCC(DL,
16079 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16080 WideVT.getVectorElementCount()),
16081 Res, DAG.getConstant(0, DL, WideVT),
16082 IsNot ? ISD::SETEQ : ISD::SETNE);
16083 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
16084}
16085
16088 SelectionDAG &DAG = DCI.DAG;
16089 if (N->getOpcode() != ISD::AND)
16090 return SDValue();
16091
16092 SDValue N0 = N->getOperand(0);
16093 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
16094 return SDValue();
16095 if (!N0.hasOneUse())
16096 return SDValue();
16097
16100 return SDValue();
16101
16102 EVT LoadedVT = ALoad->getMemoryVT();
16103 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
16104 if (!MaskConst)
16105 return SDValue();
16106 uint64_t Mask = MaskConst->getZExtValue();
16107 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
16108 if (Mask != ExpectedMask)
16109 return SDValue();
16110
16111 SDValue ZextLoad = DAG.getAtomicLoad(
16112 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
16113 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
16114 DCI.CombineTo(N, ZextLoad);
16115 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
16117 return SDValue(N, 0);
16118}
16119
16120// Combines two comparison operation and logic operation to one selection
16121// operation(min, max) and logic operation. Returns new constructed Node if
16122// conditions for optimization are satisfied.
16125 const RISCVSubtarget &Subtarget) {
16126 SelectionDAG &DAG = DCI.DAG;
16127
16128 SDValue N0 = N->getOperand(0);
16129 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
16130 // extending X. This is safe since we only need the LSB after the shift and
16131 // shift amounts larger than 31 would produce poison. If we wait until
16132 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16133 // to use a BEXT instruction.
16134 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16135 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
16136 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
16137 N0.hasOneUse()) {
16138 SDLoc DL(N);
16139 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16140 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16141 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16142 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
16143 DAG.getConstant(1, DL, MVT::i64));
16144 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16145 }
16146
16147 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
16148 return V;
16149
16150 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16151 return V;
16152 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16153 return V;
16154 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
16155 return V;
16156
16157 if (DCI.isAfterLegalizeDAG())
16158 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16159 return V;
16160
16161 // fold (and (select lhs, rhs, cc, -1, y), x) ->
16162 // (select lhs, rhs, cc, x, (and x, y))
16163 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
16164}
16165
16166// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
16167// FIXME: Generalize to other binary operators with same operand.
16169 SelectionDAG &DAG) {
16170 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
16171
16172 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
16173 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
16174 !N0.hasOneUse() || !N1.hasOneUse())
16175 return SDValue();
16176
16177 // Should have the same condition.
16178 SDValue Cond = N0.getOperand(1);
16179 if (Cond != N1.getOperand(1))
16180 return SDValue();
16181
16182 SDValue TrueV = N0.getOperand(0);
16183 SDValue FalseV = N1.getOperand(0);
16184
16185 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
16186 TrueV.getOperand(1) != FalseV.getOperand(1) ||
16187 !isOneConstant(TrueV.getOperand(1)) ||
16188 !TrueV.hasOneUse() || !FalseV.hasOneUse())
16189 return SDValue();
16190
16191 EVT VT = N->getValueType(0);
16192 SDLoc DL(N);
16193
16194 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
16195 Cond);
16196 SDValue NewN1 =
16197 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
16198 SDValue NewOr =
16199 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
16200 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
16201}
16202
16203// (xor X, (xor (and X, C2), Y))
16204// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
16205// where C2 is a shifted mask with width = Width and shift = ShAmt
16206// qc_insb might become qc.insb or qc.insbi depending on the operands.
16208 const RISCVSubtarget &Subtarget) {
16209 if (!Subtarget.hasVendorXqcibm())
16210 return SDValue();
16211
16212 using namespace SDPatternMatch;
16213 SDValue Base, Inserted;
16214 APInt CMask;
16215 if (!sd_match(N, m_Xor(m_Value(Base),
16217 m_ConstInt(CMask))),
16218 m_Value(Inserted))))))
16219 return SDValue();
16220
16221 if (N->getValueType(0) != MVT::i32)
16222 return SDValue();
16223 unsigned Width, ShAmt;
16224 if (!CMask.isShiftedMask(ShAmt, Width))
16225 return SDValue();
16226
16227 // Check if all zero bits in CMask are also zero in Inserted
16228 if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
16229 return SDValue();
16230
16231 SDLoc DL(N);
16232
16233 // `Inserted` needs to be right shifted before it is put into the
16234 // instruction.
16235 Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
16236 DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
16237
16238 SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
16239 DAG.getConstant(ShAmt, DL, MVT::i32)};
16240 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16241}
16242
16244 const RISCVSubtarget &Subtarget) {
16245 if (!Subtarget.hasVendorXqcibm())
16246 return SDValue();
16247
16248 using namespace SDPatternMatch;
16249
16250 SDValue X;
16251 APInt MaskImm;
16252 if (!sd_match(N, m_Or(m_OneUse(m_Value(X)), m_ConstInt(MaskImm))))
16253 return SDValue();
16254
16255 unsigned ShAmt, Width;
16256 if (!MaskImm.isShiftedMask(ShAmt, Width) || MaskImm.isSignedIntN(12))
16257 return SDValue();
16258
16259 if (N->getValueType(0) != MVT::i32)
16260 return SDValue();
16261
16262 // If Zbs is enabled and it is a single bit set we can use BSETI which
16263 // can be compressed to C_BSETI when Xqcibm in enabled.
16264 if (Width == 1 && Subtarget.hasStdExtZbs())
16265 return SDValue();
16266
16267 // If C1 is a shifted mask (but can't be formed as an ORI),
16268 // use a bitfield insert of -1.
16269 // Transform (or x, C1)
16270 // -> (qc.insbi x, -1, width, shift)
16271 SDLoc DL(N);
16272
16273 SDValue Ops[] = {X, DAG.getSignedConstant(-1, DL, MVT::i32),
16274 DAG.getConstant(Width, DL, MVT::i32),
16275 DAG.getConstant(ShAmt, DL, MVT::i32)};
16276 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16277}
16278
16279// Generate a QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value
16280// being inserted only sets known zero bits.
16282 const RISCVSubtarget &Subtarget) {
16283 // Supported only in Xqcibm for now.
16284 if (!Subtarget.hasVendorXqcibm())
16285 return SDValue();
16286
16287 using namespace SDPatternMatch;
16288
16289 SDValue Inserted;
16290 APInt MaskImm, OrImm;
16291 if (!sd_match(
16292 N, m_SpecificVT(MVT::i32, m_Or(m_OneUse(m_And(m_Value(Inserted),
16293 m_ConstInt(MaskImm))),
16294 m_ConstInt(OrImm)))))
16295 return SDValue();
16296
16297 // Compute the Known Zero for the AND as this allows us to catch more general
16298 // cases than just looking for AND with imm.
16299 KnownBits Known = DAG.computeKnownBits(N->getOperand(0));
16300
16301 // The bits being inserted must only set those bits that are known to be
16302 // zero.
16303 if (!OrImm.isSubsetOf(Known.Zero)) {
16304 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
16305 // currently handle this case.
16306 return SDValue();
16307 }
16308
16309 unsigned ShAmt, Width;
16310 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
16311 if (!Known.Zero.isShiftedMask(ShAmt, Width))
16312 return SDValue();
16313
16314 // QC_INSB(I) dst, src, #width, #shamt.
16315 SDLoc DL(N);
16316
16317 SDValue ImmNode =
16318 DAG.getSignedConstant(OrImm.getSExtValue() >> ShAmt, DL, MVT::i32);
16319
16320 SDValue Ops[] = {Inserted, ImmNode, DAG.getConstant(Width, DL, MVT::i32),
16321 DAG.getConstant(ShAmt, DL, MVT::i32)};
16322 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16323}
16324
16326 const RISCVSubtarget &Subtarget) {
16327 SelectionDAG &DAG = DCI.DAG;
16328
16329 if (SDValue V = combineOrToBitfieldInsert(N, DAG, Subtarget))
16330 return V;
16331 if (SDValue V = combineOrAndToBitfieldInsert(N, DAG, Subtarget))
16332 return V;
16333 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16334 return V;
16335 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16336 return V;
16337
16338 if (DCI.isAfterLegalizeDAG())
16339 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16340 return V;
16341
16342 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
16343 // We may be able to pull a common operation out of the true and false value.
16344 SDValue N0 = N->getOperand(0);
16345 SDValue N1 = N->getOperand(1);
16346 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
16347 return V;
16348 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
16349 return V;
16350
16351 // fold (or (select cond, 0, y), x) ->
16352 // (select cond, x, (or x, y))
16353 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16354}
16355
16357 const RISCVSubtarget &Subtarget) {
16358 SDValue N0 = N->getOperand(0);
16359 SDValue N1 = N->getOperand(1);
16360
16361 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
16362 // (ADDI (BSET X0, X), -1). If we wait until type legalization, we'll create
16363 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
16364 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16365 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
16366 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
16367 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16368 SDLoc DL(N);
16369 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16370 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16371 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
16372 SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
16373 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
16374 }
16375
16376 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
16377 // NOTE: Assumes ROL being legal means ROLW is legal.
16378 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16379 if (N0.getOpcode() == RISCVISD::SLLW &&
16381 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
16382 SDLoc DL(N);
16383 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
16384 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
16385 }
16386
16387 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
16388 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
16389 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
16391 if (ConstN00 && CC == ISD::SETLT) {
16392 EVT VT = N0.getValueType();
16393 SDLoc DL(N0);
16394 const APInt &Imm = ConstN00->getAPIntValue();
16395 if ((Imm + 1).isSignedIntN(12))
16396 return DAG.getSetCC(DL, VT, N0.getOperand(1),
16397 DAG.getConstant(Imm + 1, DL, VT), CC);
16398 }
16399 }
16400
16401 if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
16402 return V;
16403
16404 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16405 return V;
16406 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16407 return V;
16408
16409 // fold (xor (select cond, 0, y), x) ->
16410 // (select cond, x, (xor x, y))
16411 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16412}
16413
16414// Try to expand a multiply to a sequence of shifts and add/subs,
16415// for a machine without native mul instruction.
16417 uint64_t MulAmt) {
16418 SDLoc DL(N);
16419 EVT VT = N->getValueType(0);
16421
16422 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
16423 SDValue N0 = N->getOperand(0);
16424
16425 // Find the Non-adjacent form of the multiplier.
16426 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
16427 if (E & 1) {
16428 bool IsAdd = (E & 3) == 1;
16429 E -= IsAdd ? 1 : -1;
16430 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
16431 DAG.getShiftAmountConstant(I, VT, DL));
16432 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
16433 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
16434 }
16435 }
16436
16437 return Result;
16438}
16439
16440// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
16442 uint64_t MulAmt) {
16443 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16445 uint64_t ShiftAmt1;
16446 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
16447 Op = ISD::SUB;
16448 ShiftAmt1 = MulAmt + MulAmtLowBit;
16449 } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
16450 Op = ISD::ADD;
16451 ShiftAmt1 = MulAmt - MulAmtLowBit;
16452 } else {
16453 return SDValue();
16454 }
16455 EVT VT = N->getValueType(0);
16456 SDLoc DL(N);
16457 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16458 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16459 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16460 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
16461 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
16462}
16463
16464// Try to expand a scalar multiply to a faster sequence.
16467 const RISCVSubtarget &Subtarget) {
16468
16469 EVT VT = N->getValueType(0);
16470
16471 // LI + MUL is usually smaller than the alternative sequence.
16473 return SDValue();
16474
16475 if (VT != Subtarget.getXLenVT())
16476 return SDValue();
16477
16478 bool ShouldExpandMul =
16479 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
16480 !Subtarget.hasStdExtZmmul();
16481 if (!ShouldExpandMul)
16482 return SDValue();
16483
16484 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
16485 if (!CNode)
16486 return SDValue();
16487 uint64_t MulAmt = CNode->getZExtValue();
16488
16489 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
16490 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
16491 return SDValue();
16492
16493 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
16494 // We're adding additional uses of X here, and in principle, we should be freezing
16495 // X before doing so. However, adding freeze here causes real regressions, and no
16496 // other target properly freezes X in these cases either.
16497 SDValue X = N->getOperand(0);
16498
16499 if (Subtarget.hasShlAdd(3)) {
16500 int Shift;
16501 if (int ShXAmount = isShifted359(MulAmt, Shift)) {
16502 // 3/5/9 * 2^N -> shl (shXadd X, X), N
16503 SDLoc DL(N);
16504 SDValue X = N->getOperand(0);
16505 // Put the shift first if we can fold a zext into the shift forming
16506 // a slli.uw.
16507 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
16508 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
16509 SDValue Shl =
16510 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
16511 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
16512 DAG.getConstant(ShXAmount, DL, VT), Shl);
16513 }
16514 // Otherwise, put the shl second so that it can fold with following
16515 // instructions (e.g. sext or add).
16516 SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16517 DAG.getConstant(ShXAmount, DL, VT), X);
16518 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
16519 DAG.getConstant(Shift, DL, VT));
16520 }
16521
16522 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
16523 int ShX;
16524 int ShY;
16525 switch (MulAmt) {
16526 case 3 * 5:
16527 ShY = 1;
16528 ShX = 2;
16529 break;
16530 case 3 * 9:
16531 ShY = 1;
16532 ShX = 3;
16533 break;
16534 case 5 * 5:
16535 ShX = ShY = 2;
16536 break;
16537 case 5 * 9:
16538 ShY = 2;
16539 ShX = 3;
16540 break;
16541 case 9 * 9:
16542 ShX = ShY = 3;
16543 break;
16544 default:
16545 ShX = ShY = 0;
16546 break;
16547 }
16548 if (ShX) {
16549 SDLoc DL(N);
16550 SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16551 DAG.getConstant(ShY, DL, VT), X);
16552 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16553 DAG.getConstant(ShX, DL, VT), Mul359);
16554 }
16555
16556 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
16557 // shXadd. First check if this a sum of two power of 2s because that's
16558 // easy. Then count how many zeros are up to the first bit.
16559 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
16560 unsigned ScaleShift = llvm::countr_zero(MulAmt);
16561 if (ScaleShift >= 1 && ScaleShift < 4) {
16562 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
16563 SDLoc DL(N);
16564 SDValue Shift1 =
16565 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16566 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16567 DAG.getConstant(ScaleShift, DL, VT), Shift1);
16568 }
16569 }
16570
16571 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
16572 // This is the two instruction form, there are also three instruction
16573 // variants we could implement. e.g.
16574 // (2^(1,2,3) * 3,5,9 + 1) << C2
16575 // 2^(C1>3) * 3,5,9 +/- 1
16576 if (int ShXAmount = isShifted359(MulAmt - 1, Shift)) {
16577 assert(Shift != 0 && "MulAmt=4,6,10 handled before");
16578 if (Shift <= 3) {
16579 SDLoc DL(N);
16580 SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16581 DAG.getConstant(ShXAmount, DL, VT), X);
16582 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16583 DAG.getConstant(Shift, DL, VT), X);
16584 }
16585 }
16586
16587 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
16588 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
16589 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
16590 if (ScaleShift >= 1 && ScaleShift < 4) {
16591 unsigned ShiftAmt = llvm::countr_zero((MulAmt - 1) & (MulAmt - 2));
16592 SDLoc DL(N);
16593 SDValue Shift1 =
16594 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16595 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
16596 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16597 DAG.getConstant(ScaleShift, DL, VT), X));
16598 }
16599 }
16600
16601 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
16602 for (uint64_t Offset : {3, 5, 9}) {
16603 if (isPowerOf2_64(MulAmt + Offset)) {
16604 unsigned ShAmt = llvm::countr_zero(MulAmt + Offset);
16605 if (ShAmt >= VT.getSizeInBits())
16606 continue;
16607 SDLoc DL(N);
16608 SDValue Shift1 =
16609 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
16610 SDValue Mul359 =
16611 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16612 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
16613 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
16614 }
16615 }
16616
16617 for (uint64_t Divisor : {3, 5, 9}) {
16618 if (MulAmt % Divisor != 0)
16619 continue;
16620 uint64_t MulAmt2 = MulAmt / Divisor;
16621 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
16622 // of 25 which happen to be quite common.
16623 if (int ShBAmount = isShifted359(MulAmt2, Shift)) {
16624 SDLoc DL(N);
16625 SDValue Mul359A =
16626 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16627 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
16628 SDValue Mul359B =
16629 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359A,
16630 DAG.getConstant(ShBAmount, DL, VT), Mul359A);
16631 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
16632 DAG.getConstant(Shift, DL, VT));
16633 }
16634 }
16635 }
16636
16637 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
16638 return V;
16639
16640 if (!Subtarget.hasStdExtZmmul())
16641 return expandMulToNAFSequence(N, DAG, MulAmt);
16642
16643 return SDValue();
16644}
16645
16646// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
16647// (bitcast (sra (v2Xi16 (bitcast X)), 15))
16648// Same for other equivalent types with other equivalent constants.
16650 EVT VT = N->getValueType(0);
16651 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16652
16653 // Do this for legal vectors unless they are i1 or i8 vectors.
16654 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
16655 return SDValue();
16656
16657 if (N->getOperand(0).getOpcode() != ISD::AND ||
16658 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
16659 return SDValue();
16660
16661 SDValue And = N->getOperand(0);
16662 SDValue Srl = And.getOperand(0);
16663
16664 APInt V1, V2, V3;
16665 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
16666 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
16668 return SDValue();
16669
16670 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
16671 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
16672 V3 != (HalfSize - 1))
16673 return SDValue();
16674
16675 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
16676 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
16677 VT.getVectorElementCount() * 2);
16678 SDLoc DL(N);
16679 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
16680 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
16681 DAG.getConstant(HalfSize - 1, DL, HalfVT));
16682 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
16683}
16684
16687 const RISCVSubtarget &Subtarget) {
16688 EVT VT = N->getValueType(0);
16689 if (!VT.isVector())
16690 return expandMul(N, DAG, DCI, Subtarget);
16691
16692 SDLoc DL(N);
16693 SDValue N0 = N->getOperand(0);
16694 SDValue N1 = N->getOperand(1);
16695 SDValue MulOper;
16696 unsigned AddSubOpc;
16697
16698 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
16699 // (mul x, add (y, 1)) -> (add x, (mul x, y))
16700 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
16701 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
16702 auto IsAddSubWith1 = [&](SDValue V) -> bool {
16703 AddSubOpc = V->getOpcode();
16704 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
16705 SDValue Opnd = V->getOperand(1);
16706 MulOper = V->getOperand(0);
16707 if (AddSubOpc == ISD::SUB)
16708 std::swap(Opnd, MulOper);
16709 if (isOneOrOneSplat(Opnd))
16710 return true;
16711 }
16712 return false;
16713 };
16714
16715 if (IsAddSubWith1(N0)) {
16716 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
16717 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
16718 }
16719
16720 if (IsAddSubWith1(N1)) {
16721 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
16722 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
16723 }
16724
16725 if (SDValue V = combineBinOpOfZExt(N, DAG))
16726 return V;
16727
16729 return V;
16730
16731 return SDValue();
16732}
16733
16734/// According to the property that indexed load/store instructions zero-extend
16735/// their indices, try to narrow the type of index operand.
16736static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
16737 if (isIndexTypeSigned(IndexType))
16738 return false;
16739
16740 if (!N->hasOneUse())
16741 return false;
16742
16743 EVT VT = N.getValueType();
16744 SDLoc DL(N);
16745
16746 // In general, what we're doing here is seeing if we can sink a truncate to
16747 // a smaller element type into the expression tree building our index.
16748 // TODO: We can generalize this and handle a bunch more cases if useful.
16749
16750 // Narrow a buildvector to the narrowest element type. This requires less
16751 // work and less register pressure at high LMUL, and creates smaller constants
16752 // which may be cheaper to materialize.
16753 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
16754 KnownBits Known = DAG.computeKnownBits(N);
16755 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
16756 LLVMContext &C = *DAG.getContext();
16757 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
16758 if (ResultVT.bitsLT(VT.getVectorElementType())) {
16759 N = DAG.getNode(ISD::TRUNCATE, DL,
16760 VT.changeVectorElementType(ResultVT), N);
16761 return true;
16762 }
16763 }
16764
16765 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
16766 if (N.getOpcode() != ISD::SHL)
16767 return false;
16768
16769 SDValue N0 = N.getOperand(0);
16770 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
16771 N0.getOpcode() != RISCVISD::VZEXT_VL)
16772 return false;
16773 if (!N0->hasOneUse())
16774 return false;
16775
16776 APInt ShAmt;
16777 SDValue N1 = N.getOperand(1);
16778 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
16779 return false;
16780
16781 SDValue Src = N0.getOperand(0);
16782 EVT SrcVT = Src.getValueType();
16783 unsigned SrcElen = SrcVT.getScalarSizeInBits();
16784 unsigned ShAmtV = ShAmt.getZExtValue();
16785 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
16786 NewElen = std::max(NewElen, 8U);
16787
16788 // Skip if NewElen is not narrower than the original extended type.
16789 if (NewElen >= N0.getValueType().getScalarSizeInBits())
16790 return false;
16791
16792 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
16793 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
16794
16795 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
16796 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
16797 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
16798 return true;
16799}
16800
16801/// Try to map an integer comparison with size > XLEN to vector instructions
16802/// before type legalization splits it up into chunks.
16803static SDValue
16805 const SDLoc &DL, SelectionDAG &DAG,
16806 const RISCVSubtarget &Subtarget) {
16807 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
16808
16809 if (!Subtarget.hasVInstructions())
16810 return SDValue();
16811
16812 MVT XLenVT = Subtarget.getXLenVT();
16813 EVT OpVT = X.getValueType();
16814 // We're looking for an oversized integer equality comparison.
16815 if (!OpVT.isScalarInteger())
16816 return SDValue();
16817
16818 unsigned OpSize = OpVT.getSizeInBits();
16819 // The size should be larger than XLen and smaller than the maximum vector
16820 // size.
16821 if (OpSize <= Subtarget.getXLen() ||
16822 OpSize > Subtarget.getRealMinVLen() *
16824 return SDValue();
16825
16826 // Don't perform this combine if constructing the vector will be expensive.
16827 auto IsVectorBitCastCheap = [](SDValue X) {
16829 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
16830 X.getOpcode() == ISD::LOAD;
16831 };
16832 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
16833 return SDValue();
16834
16836 Attribute::NoImplicitFloat))
16837 return SDValue();
16838
16839 // Bail out for non-byte-sized types.
16840 if (!OpVT.isByteSized())
16841 return SDValue();
16842
16843 unsigned VecSize = OpSize / 8;
16844 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
16845 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
16846
16847 SDValue VecX = DAG.getBitcast(VecVT, X);
16848 SDValue VecY = DAG.getBitcast(VecVT, Y);
16849 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
16850 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
16851
16852 SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
16853 DAG.getCondCode(ISD::SETNE), Mask, VL);
16854 return DAG.getSetCC(DL, VT,
16855 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
16856 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
16857 VL),
16858 DAG.getConstant(0, DL, XLenVT), CC);
16859}
16860
16863 const RISCVSubtarget &Subtarget) {
16864 SelectionDAG &DAG = DCI.DAG;
16865 SDLoc dl(N);
16866 SDValue N0 = N->getOperand(0);
16867 SDValue N1 = N->getOperand(1);
16868 EVT VT = N->getValueType(0);
16869 EVT OpVT = N0.getValueType();
16870
16871 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
16872 // Looking for an equality compare.
16873 if (!isIntEqualitySetCC(Cond))
16874 return SDValue();
16875
16876 if (SDValue V =
16877 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
16878 return V;
16879
16880 if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
16881 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
16883 const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
16884 // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
16885 if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
16886 AndRHSC.isNegatedPowerOf2()) {
16887 unsigned ShiftBits = AndRHSC.countr_zero();
16888 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
16889 DAG.getConstant(ShiftBits, dl, OpVT));
16890 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
16891 }
16892
16893 // Similar to above but handling the lower 32 bits by using sraiw. Allow
16894 // comparing with constants other than 0 if the constant can be folded into
16895 // addi or xori after shifting.
16896 uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
16897 uint64_t AndRHSInt = AndRHSC.getZExtValue();
16898 if (OpVT == MVT::i64 && isUInt<32>(AndRHSInt) &&
16899 isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
16900 unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
16901 int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
16902 if (NewC >= -2048 && NewC <= 2048) {
16903 SDValue SExt =
16904 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
16905 DAG.getValueType(MVT::i32));
16906 SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
16907 DAG.getConstant(ShiftBits, dl, OpVT));
16908 return DAG.getSetCC(dl, VT, Shift,
16909 DAG.getSignedConstant(NewC, dl, OpVT), Cond);
16910 }
16911 }
16912 }
16913
16914 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
16915 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
16916 // bit 31. Same for setne. C1' may be cheaper to materialize and the
16917 // sext_inreg can become a sext.w instead of a shift pair.
16918 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
16919 return SDValue();
16920
16921 // RHS needs to be a constant.
16922 auto *N1C = dyn_cast<ConstantSDNode>(N1);
16923 if (!N1C)
16924 return SDValue();
16925
16926 // LHS needs to be (and X, 0xffffffff).
16927 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
16929 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
16930 return SDValue();
16931
16932 // Don't do this if the sign bit is provably zero, it will be turned back into
16933 // an AND.
16934 APInt SignMask = APInt::getOneBitSet(64, 31);
16935 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
16936 return SDValue();
16937
16938 const APInt &C1 = N1C->getAPIntValue();
16939
16940 // If the constant is larger than 2^32 - 1 it is impossible for both sides
16941 // to be equal.
16942 if (C1.getActiveBits() > 32)
16943 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
16944
16945 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
16946 N0.getOperand(0), DAG.getValueType(MVT::i32));
16947 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
16948 dl, OpVT), Cond);
16949}
16950
16951static SDValue
16953 const RISCVSubtarget &Subtarget) {
16954 SelectionDAG &DAG = DCI.DAG;
16955 SDValue Src = N->getOperand(0);
16956 EVT VT = N->getValueType(0);
16957 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16958 unsigned Opc = Src.getOpcode();
16959 SDLoc DL(N);
16960
16961 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
16962 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
16963 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
16964 Subtarget.hasStdExtZfhmin())
16965 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
16966
16967 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
16968 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
16969 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
16970 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
16971 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
16972 Src.getOperand(1));
16973
16974 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
16975 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
16976 return DAG.getNegative(Src, DL, VT);
16977
16978 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
16979 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
16980 isAllOnesConstant(Src.getOperand(1)) &&
16981 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
16982 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
16983 DAG.getAllOnesConstant(DL, VT));
16984
16985 return SDValue();
16986}
16987
16988namespace {
16989// Forward declaration of the structure holding the necessary information to
16990// apply a combine.
16991struct CombineResult;
16992
16993enum ExtKind : uint8_t {
16994 ZExt = 1 << 0,
16995 SExt = 1 << 1,
16996 FPExt = 1 << 2,
16997 BF16Ext = 1 << 3
16998};
16999/// Helper class for folding sign/zero extensions.
17000/// In particular, this class is used for the following combines:
17001/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
17002/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17003/// mul | mul_vl -> vwmul(u) | vwmul_su
17004/// shl | shl_vl -> vwsll
17005/// fadd -> vfwadd | vfwadd_w
17006/// fsub -> vfwsub | vfwsub_w
17007/// fmul -> vfwmul
17008/// An object of this class represents an operand of the operation we want to
17009/// combine.
17010/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
17011/// NodeExtensionHelper for `a` and one for `b`.
17012///
17013/// This class abstracts away how the extension is materialized and
17014/// how its number of users affect the combines.
17015///
17016/// In particular:
17017/// - VWADD_W is conceptually == add(op0, sext(op1))
17018/// - VWADDU_W == add(op0, zext(op1))
17019/// - VWSUB_W == sub(op0, sext(op1))
17020/// - VWSUBU_W == sub(op0, zext(op1))
17021/// - VFWADD_W == fadd(op0, fpext(op1))
17022/// - VFWSUB_W == fsub(op0, fpext(op1))
17023/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
17024/// zext|sext(smaller_value).
17025struct NodeExtensionHelper {
17026 /// Records if this operand is like being zero extended.
17027 bool SupportsZExt;
17028 /// Records if this operand is like being sign extended.
17029 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
17030 /// instance, a splat constant (e.g., 3), would support being both sign and
17031 /// zero extended.
17032 bool SupportsSExt;
17033 /// Records if this operand is like being floating point extended.
17034 bool SupportsFPExt;
17035 /// Records if this operand is extended from bf16.
17036 bool SupportsBF16Ext;
17037 /// This boolean captures whether we care if this operand would still be
17038 /// around after the folding happens.
17039 bool EnforceOneUse;
17040 /// Original value that this NodeExtensionHelper represents.
17041 SDValue OrigOperand;
17042
17043 /// Get the value feeding the extension or the value itself.
17044 /// E.g., for zext(a), this would return a.
17045 SDValue getSource() const {
17046 switch (OrigOperand.getOpcode()) {
17047 case ISD::ZERO_EXTEND:
17048 case ISD::SIGN_EXTEND:
17049 case RISCVISD::VSEXT_VL:
17050 case RISCVISD::VZEXT_VL:
17051 case RISCVISD::FP_EXTEND_VL:
17052 return OrigOperand.getOperand(0);
17053 default:
17054 return OrigOperand;
17055 }
17056 }
17057
17058 /// Check if this instance represents a splat.
17059 bool isSplat() const {
17060 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
17061 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
17062 }
17063
17064 /// Get the extended opcode.
17065 unsigned getExtOpc(ExtKind SupportsExt) const {
17066 switch (SupportsExt) {
17067 case ExtKind::SExt:
17068 return RISCVISD::VSEXT_VL;
17069 case ExtKind::ZExt:
17070 return RISCVISD::VZEXT_VL;
17071 case ExtKind::FPExt:
17072 case ExtKind::BF16Ext:
17073 return RISCVISD::FP_EXTEND_VL;
17074 }
17075 llvm_unreachable("Unknown ExtKind enum");
17076 }
17077
17078 /// Get or create a value that can feed \p Root with the given extension \p
17079 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
17080 /// operand. \see ::getSource().
17081 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
17082 const RISCVSubtarget &Subtarget,
17083 std::optional<ExtKind> SupportsExt) const {
17084 if (!SupportsExt.has_value())
17085 return OrigOperand;
17086
17087 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
17088
17089 SDValue Source = getSource();
17090 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
17091 if (Source.getValueType() == NarrowVT)
17092 return Source;
17093
17094 unsigned ExtOpc = getExtOpc(*SupportsExt);
17095
17096 // If we need an extension, we should be changing the type.
17097 SDLoc DL(OrigOperand);
17098 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
17099 switch (OrigOperand.getOpcode()) {
17100 case ISD::ZERO_EXTEND:
17101 case ISD::SIGN_EXTEND:
17102 case RISCVISD::VSEXT_VL:
17103 case RISCVISD::VZEXT_VL:
17104 case RISCVISD::FP_EXTEND_VL:
17105 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
17106 case ISD::SPLAT_VECTOR:
17107 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
17108 case RISCVISD::VMV_V_X_VL:
17109 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
17110 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
17111 case RISCVISD::VFMV_V_F_VL:
17112 Source = Source.getOperand(1);
17113 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
17114 Source = Source.getOperand(0);
17115 assert(Source.getValueType() == NarrowVT.getVectorElementType());
17116 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
17117 DAG.getUNDEF(NarrowVT), Source, VL);
17118 default:
17119 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
17120 // and that operand should already have the right NarrowVT so no
17121 // extension should be required at this point.
17122 llvm_unreachable("Unsupported opcode");
17123 }
17124 }
17125
17126 /// Helper function to get the narrow type for \p Root.
17127 /// The narrow type is the type of \p Root where we divided the size of each
17128 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
17129 /// \pre Both the narrow type and the original type should be legal.
17130 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
17131 MVT VT = Root->getSimpleValueType(0);
17132
17133 // Determine the narrow size.
17134 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17135
17136 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
17137 : SupportsExt == ExtKind::FPExt
17138 ? MVT::getFloatingPointVT(NarrowSize)
17139 : MVT::getIntegerVT(NarrowSize);
17140
17141 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
17142 "Trying to extend something we can't represent");
17143 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
17144 return NarrowVT;
17145 }
17146
17147 /// Get the opcode to materialize:
17148 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
17149 static unsigned getSExtOpcode(unsigned Opcode) {
17150 switch (Opcode) {
17151 case ISD::ADD:
17152 case RISCVISD::ADD_VL:
17153 case RISCVISD::VWADD_W_VL:
17154 case RISCVISD::VWADDU_W_VL:
17155 case ISD::OR:
17156 case RISCVISD::OR_VL:
17157 return RISCVISD::VWADD_VL;
17158 case ISD::SUB:
17159 case RISCVISD::SUB_VL:
17160 case RISCVISD::VWSUB_W_VL:
17161 case RISCVISD::VWSUBU_W_VL:
17162 return RISCVISD::VWSUB_VL;
17163 case ISD::MUL:
17164 case RISCVISD::MUL_VL:
17165 return RISCVISD::VWMUL_VL;
17166 default:
17167 llvm_unreachable("Unexpected opcode");
17168 }
17169 }
17170
17171 /// Get the opcode to materialize:
17172 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
17173 static unsigned getZExtOpcode(unsigned Opcode) {
17174 switch (Opcode) {
17175 case ISD::ADD:
17176 case RISCVISD::ADD_VL:
17177 case RISCVISD::VWADD_W_VL:
17178 case RISCVISD::VWADDU_W_VL:
17179 case ISD::OR:
17180 case RISCVISD::OR_VL:
17181 return RISCVISD::VWADDU_VL;
17182 case ISD::SUB:
17183 case RISCVISD::SUB_VL:
17184 case RISCVISD::VWSUB_W_VL:
17185 case RISCVISD::VWSUBU_W_VL:
17186 return RISCVISD::VWSUBU_VL;
17187 case ISD::MUL:
17188 case RISCVISD::MUL_VL:
17189 return RISCVISD::VWMULU_VL;
17190 case ISD::SHL:
17191 case RISCVISD::SHL_VL:
17192 return RISCVISD::VWSLL_VL;
17193 default:
17194 llvm_unreachable("Unexpected opcode");
17195 }
17196 }
17197
17198 /// Get the opcode to materialize:
17199 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
17200 static unsigned getFPExtOpcode(unsigned Opcode) {
17201 switch (Opcode) {
17202 case RISCVISD::FADD_VL:
17203 case RISCVISD::VFWADD_W_VL:
17204 return RISCVISD::VFWADD_VL;
17205 case RISCVISD::FSUB_VL:
17206 case RISCVISD::VFWSUB_W_VL:
17207 return RISCVISD::VFWSUB_VL;
17208 case RISCVISD::FMUL_VL:
17209 return RISCVISD::VFWMUL_VL;
17210 case RISCVISD::VFMADD_VL:
17211 return RISCVISD::VFWMADD_VL;
17212 case RISCVISD::VFMSUB_VL:
17213 return RISCVISD::VFWMSUB_VL;
17214 case RISCVISD::VFNMADD_VL:
17215 return RISCVISD::VFWNMADD_VL;
17216 case RISCVISD::VFNMSUB_VL:
17217 return RISCVISD::VFWNMSUB_VL;
17218 default:
17219 llvm_unreachable("Unexpected opcode");
17220 }
17221 }
17222
17223 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
17224 /// newOpcode(a, b).
17225 static unsigned getSUOpcode(unsigned Opcode) {
17226 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
17227 "SU is only supported for MUL");
17228 return RISCVISD::VWMULSU_VL;
17229 }
17230
17231 /// Get the opcode to materialize
17232 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
17233 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
17234 switch (Opcode) {
17235 case ISD::ADD:
17236 case RISCVISD::ADD_VL:
17237 case ISD::OR:
17238 case RISCVISD::OR_VL:
17239 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
17240 : RISCVISD::VWADDU_W_VL;
17241 case ISD::SUB:
17242 case RISCVISD::SUB_VL:
17243 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
17244 : RISCVISD::VWSUBU_W_VL;
17245 case RISCVISD::FADD_VL:
17246 return RISCVISD::VFWADD_W_VL;
17247 case RISCVISD::FSUB_VL:
17248 return RISCVISD::VFWSUB_W_VL;
17249 default:
17250 llvm_unreachable("Unexpected opcode");
17251 }
17252 }
17253
17254 using CombineToTry = std::function<std::optional<CombineResult>(
17255 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
17256 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
17257 const RISCVSubtarget &)>;
17258
17259 /// Check if this node needs to be fully folded or extended for all users.
17260 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
17261
17262 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
17263 const RISCVSubtarget &Subtarget) {
17264 unsigned Opc = OrigOperand.getOpcode();
17265 MVT VT = OrigOperand.getSimpleValueType();
17266
17267 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
17268 "Unexpected Opcode");
17269
17270 // The pasthru must be undef for tail agnostic.
17271 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
17272 return;
17273
17274 // Get the scalar value.
17275 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
17276 : OrigOperand.getOperand(1);
17277
17278 // See if we have enough sign bits or zero bits in the scalar to use a
17279 // widening opcode by splatting to smaller element size.
17280 unsigned EltBits = VT.getScalarSizeInBits();
17281 unsigned ScalarBits = Op.getValueSizeInBits();
17282 // If we're not getting all bits from the element, we need special handling.
17283 if (ScalarBits < EltBits) {
17284 // This should only occur on RV32.
17285 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
17286 !Subtarget.is64Bit() && "Unexpected splat");
17287 // vmv.v.x sign extends narrow inputs.
17288 SupportsSExt = true;
17289
17290 // If the input is positive, then sign extend is also zero extend.
17291 if (DAG.SignBitIsZero(Op))
17292 SupportsZExt = true;
17293
17294 EnforceOneUse = false;
17295 return;
17296 }
17297
17298 unsigned NarrowSize = EltBits / 2;
17299 // If the narrow type cannot be expressed with a legal VMV,
17300 // this is not a valid candidate.
17301 if (NarrowSize < 8)
17302 return;
17303
17304 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
17305 SupportsSExt = true;
17306
17307 if (DAG.MaskedValueIsZero(Op,
17308 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
17309 SupportsZExt = true;
17310
17311 EnforceOneUse = false;
17312 }
17313
17314 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17315 return (NarrowEltVT == MVT::f32 ||
17316 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
17317 }
17318
17319 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17320 return NarrowEltVT == MVT::bf16 && Subtarget.hasStdExtZvfbfwma();
17321 }
17322
17323 /// Helper method to set the various fields of this struct based on the
17324 /// type of \p Root.
17325 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
17326 const RISCVSubtarget &Subtarget) {
17327 SupportsZExt = false;
17328 SupportsSExt = false;
17329 SupportsFPExt = false;
17330 SupportsBF16Ext = false;
17331 EnforceOneUse = true;
17332 unsigned Opc = OrigOperand.getOpcode();
17333 // For the nodes we handle below, we end up using their inputs directly: see
17334 // getSource(). However since they either don't have a passthru or we check
17335 // that their passthru is undef, we can safely ignore their mask and VL.
17336 switch (Opc) {
17337 case ISD::ZERO_EXTEND:
17338 case ISD::SIGN_EXTEND: {
17339 MVT VT = OrigOperand.getSimpleValueType();
17340 if (!VT.isVector())
17341 break;
17342
17343 SDValue NarrowElt = OrigOperand.getOperand(0);
17344 MVT NarrowVT = NarrowElt.getSimpleValueType();
17345 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
17346 if (NarrowVT.getVectorElementType() == MVT::i1)
17347 break;
17348
17349 SupportsZExt = Opc == ISD::ZERO_EXTEND;
17350 SupportsSExt = Opc == ISD::SIGN_EXTEND;
17351 break;
17352 }
17353 case RISCVISD::VZEXT_VL:
17354 SupportsZExt = true;
17355 break;
17356 case RISCVISD::VSEXT_VL:
17357 SupportsSExt = true;
17358 break;
17359 case RISCVISD::FP_EXTEND_VL: {
17360 MVT NarrowEltVT =
17362 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
17363 SupportsFPExt = true;
17364 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
17365 SupportsBF16Ext = true;
17366
17367 break;
17368 }
17369 case ISD::SPLAT_VECTOR:
17370 case RISCVISD::VMV_V_X_VL:
17371 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
17372 break;
17373 case RISCVISD::VFMV_V_F_VL: {
17374 MVT VT = OrigOperand.getSimpleValueType();
17375
17376 if (!OrigOperand.getOperand(0).isUndef())
17377 break;
17378
17379 SDValue Op = OrigOperand.getOperand(1);
17380 if (Op.getOpcode() != ISD::FP_EXTEND)
17381 break;
17382
17383 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17384 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
17385 if (NarrowSize != ScalarBits)
17386 break;
17387
17388 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
17389 SupportsFPExt = true;
17390 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
17391 Subtarget))
17392 SupportsBF16Ext = true;
17393 break;
17394 }
17395 default:
17396 break;
17397 }
17398 }
17399
17400 /// Check if \p Root supports any extension folding combines.
17401 static bool isSupportedRoot(const SDNode *Root,
17402 const RISCVSubtarget &Subtarget) {
17403 switch (Root->getOpcode()) {
17404 case ISD::ADD:
17405 case ISD::SUB:
17406 case ISD::MUL: {
17407 return Root->getValueType(0).isScalableVector();
17408 }
17409 case ISD::OR: {
17410 return Root->getValueType(0).isScalableVector() &&
17411 Root->getFlags().hasDisjoint();
17412 }
17413 // Vector Widening Integer Add/Sub/Mul Instructions
17414 case RISCVISD::ADD_VL:
17415 case RISCVISD::MUL_VL:
17416 case RISCVISD::VWADD_W_VL:
17417 case RISCVISD::VWADDU_W_VL:
17418 case RISCVISD::SUB_VL:
17419 case RISCVISD::VWSUB_W_VL:
17420 case RISCVISD::VWSUBU_W_VL:
17421 // Vector Widening Floating-Point Add/Sub/Mul Instructions
17422 case RISCVISD::FADD_VL:
17423 case RISCVISD::FSUB_VL:
17424 case RISCVISD::FMUL_VL:
17425 case RISCVISD::VFWADD_W_VL:
17426 case RISCVISD::VFWSUB_W_VL:
17427 return true;
17428 case RISCVISD::OR_VL:
17429 return Root->getFlags().hasDisjoint();
17430 case ISD::SHL:
17431 return Root->getValueType(0).isScalableVector() &&
17432 Subtarget.hasStdExtZvbb();
17433 case RISCVISD::SHL_VL:
17434 return Subtarget.hasStdExtZvbb();
17435 case RISCVISD::VFMADD_VL:
17436 case RISCVISD::VFNMSUB_VL:
17437 case RISCVISD::VFNMADD_VL:
17438 case RISCVISD::VFMSUB_VL:
17439 return true;
17440 default:
17441 return false;
17442 }
17443 }
17444
17445 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
17446 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
17447 const RISCVSubtarget &Subtarget) {
17448 assert(isSupportedRoot(Root, Subtarget) &&
17449 "Trying to build an helper with an "
17450 "unsupported root");
17451 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
17453 OrigOperand = Root->getOperand(OperandIdx);
17454
17455 unsigned Opc = Root->getOpcode();
17456 switch (Opc) {
17457 // We consider
17458 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
17459 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
17460 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
17461 case RISCVISD::VWADD_W_VL:
17462 case RISCVISD::VWADDU_W_VL:
17463 case RISCVISD::VWSUB_W_VL:
17464 case RISCVISD::VWSUBU_W_VL:
17465 case RISCVISD::VFWADD_W_VL:
17466 case RISCVISD::VFWSUB_W_VL:
17467 // Operand 1 can't be changed.
17468 if (OperandIdx == 1)
17469 break;
17470 [[fallthrough]];
17471 default:
17472 fillUpExtensionSupport(Root, DAG, Subtarget);
17473 break;
17474 }
17475 }
17476
17477 /// Helper function to get the Mask and VL from \p Root.
17478 static std::pair<SDValue, SDValue>
17479 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
17480 const RISCVSubtarget &Subtarget) {
17481 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
17482 switch (Root->getOpcode()) {
17483 case ISD::ADD:
17484 case ISD::SUB:
17485 case ISD::MUL:
17486 case ISD::OR:
17487 case ISD::SHL: {
17488 SDLoc DL(Root);
17489 MVT VT = Root->getSimpleValueType(0);
17490 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
17491 }
17492 default:
17493 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
17494 }
17495 }
17496
17497 /// Helper function to check if \p N is commutative with respect to the
17498 /// foldings that are supported by this class.
17499 static bool isCommutative(const SDNode *N) {
17500 switch (N->getOpcode()) {
17501 case ISD::ADD:
17502 case ISD::MUL:
17503 case ISD::OR:
17504 case RISCVISD::ADD_VL:
17505 case RISCVISD::MUL_VL:
17506 case RISCVISD::OR_VL:
17507 case RISCVISD::FADD_VL:
17508 case RISCVISD::FMUL_VL:
17509 case RISCVISD::VFMADD_VL:
17510 case RISCVISD::VFNMSUB_VL:
17511 case RISCVISD::VFNMADD_VL:
17512 case RISCVISD::VFMSUB_VL:
17513 return true;
17514 case RISCVISD::VWADD_W_VL:
17515 case RISCVISD::VWADDU_W_VL:
17516 case ISD::SUB:
17517 case RISCVISD::SUB_VL:
17518 case RISCVISD::VWSUB_W_VL:
17519 case RISCVISD::VWSUBU_W_VL:
17520 case RISCVISD::VFWADD_W_VL:
17521 case RISCVISD::FSUB_VL:
17522 case RISCVISD::VFWSUB_W_VL:
17523 case ISD::SHL:
17524 case RISCVISD::SHL_VL:
17525 return false;
17526 default:
17527 llvm_unreachable("Unexpected opcode");
17528 }
17529 }
17530
17531 /// Get a list of combine to try for folding extensions in \p Root.
17532 /// Note that each returned CombineToTry function doesn't actually modify
17533 /// anything. Instead they produce an optional CombineResult that if not None,
17534 /// need to be materialized for the combine to be applied.
17535 /// \see CombineResult::materialize.
17536 /// If the related CombineToTry function returns std::nullopt, that means the
17537 /// combine didn't match.
17538 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
17539};
17540
17541/// Helper structure that holds all the necessary information to materialize a
17542/// combine that does some extension folding.
17543struct CombineResult {
17544 /// Opcode to be generated when materializing the combine.
17545 unsigned TargetOpcode;
17546 // No value means no extension is needed.
17547 std::optional<ExtKind> LHSExt;
17548 std::optional<ExtKind> RHSExt;
17549 /// Root of the combine.
17550 SDNode *Root;
17551 /// LHS of the TargetOpcode.
17552 NodeExtensionHelper LHS;
17553 /// RHS of the TargetOpcode.
17554 NodeExtensionHelper RHS;
17555
17556 CombineResult(unsigned TargetOpcode, SDNode *Root,
17557 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
17558 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
17559 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
17560 LHS(LHS), RHS(RHS) {}
17561
17562 /// Return a value that uses TargetOpcode and that can be used to replace
17563 /// Root.
17564 /// The actual replacement is *not* done in that method.
17565 SDValue materialize(SelectionDAG &DAG,
17566 const RISCVSubtarget &Subtarget) const {
17567 SDValue Mask, VL, Passthru;
17568 std::tie(Mask, VL) =
17569 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
17570 switch (Root->getOpcode()) {
17571 default:
17572 Passthru = Root->getOperand(2);
17573 break;
17574 case ISD::ADD:
17575 case ISD::SUB:
17576 case ISD::MUL:
17577 case ISD::OR:
17578 case ISD::SHL:
17579 Passthru = DAG.getUNDEF(Root->getValueType(0));
17580 break;
17581 }
17582 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
17583 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
17584 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
17585 Passthru, Mask, VL);
17586 }
17587};
17588
17589/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17590/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17591/// are zext) and LHS and RHS can be folded into Root.
17592/// AllowExtMask define which form `ext` can take in this pattern.
17593///
17594/// \note If the pattern can match with both zext and sext, the returned
17595/// CombineResult will feature the zext result.
17596///
17597/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17598/// can be used to apply the pattern.
17599static std::optional<CombineResult>
17600canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
17601 const NodeExtensionHelper &RHS,
17602 uint8_t AllowExtMask, SelectionDAG &DAG,
17603 const RISCVSubtarget &Subtarget) {
17604 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
17605 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17606 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17607 /*RHSExt=*/{ExtKind::ZExt});
17608 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
17609 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17610 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17611 /*RHSExt=*/{ExtKind::SExt});
17612 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
17613 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17614 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17615 /*RHSExt=*/{ExtKind::FPExt});
17616 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
17617 RHS.SupportsBF16Ext)
17618 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17619 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
17620 /*RHSExt=*/{ExtKind::BF16Ext});
17621 return std::nullopt;
17622}
17623
17624/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17625/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17626/// are zext) and LHS and RHS can be folded into Root.
17627///
17628/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17629/// can be used to apply the pattern.
17630static std::optional<CombineResult>
17631canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
17632 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17633 const RISCVSubtarget &Subtarget) {
17634 return canFoldToVWWithSameExtensionImpl(
17635 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
17636 Subtarget);
17637}
17638
17639/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
17640///
17641/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17642/// can be used to apply the pattern.
17643static std::optional<CombineResult>
17644canFoldToVWWithSameExtZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17645 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17646 const RISCVSubtarget &Subtarget) {
17647 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
17648 Subtarget);
17649}
17650
17651/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
17652///
17653/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17654/// can be used to apply the pattern.
17655static std::optional<CombineResult>
17656canFoldToVWWithSameExtBF16(SDNode *Root, const NodeExtensionHelper &LHS,
17657 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17658 const RISCVSubtarget &Subtarget) {
17659 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
17660 Subtarget);
17661}
17662
17663/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
17664///
17665/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17666/// can be used to apply the pattern.
17667static std::optional<CombineResult>
17668canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
17669 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17670 const RISCVSubtarget &Subtarget) {
17671 if (RHS.SupportsFPExt)
17672 return CombineResult(
17673 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
17674 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
17675
17676 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
17677 // sext/zext?
17678 // Control this behavior behind an option (AllowSplatInVW_W) for testing
17679 // purposes.
17680 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
17681 return CombineResult(
17682 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
17683 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
17684 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
17685 return CombineResult(
17686 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
17687 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
17688 return std::nullopt;
17689}
17690
17691/// Check if \p Root follows a pattern Root(sext(LHS), RHS)
17692///
17693/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17694/// can be used to apply the pattern.
17695static std::optional<CombineResult>
17696canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17697 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17698 const RISCVSubtarget &Subtarget) {
17699 if (LHS.SupportsSExt)
17700 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17701 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17702 /*RHSExt=*/std::nullopt);
17703 return std::nullopt;
17704}
17705
17706/// Check if \p Root follows a pattern Root(zext(LHS), RHS)
17707///
17708/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17709/// can be used to apply the pattern.
17710static std::optional<CombineResult>
17711canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17712 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17713 const RISCVSubtarget &Subtarget) {
17714 if (LHS.SupportsZExt)
17715 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17716 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17717 /*RHSExt=*/std::nullopt);
17718 return std::nullopt;
17719}
17720
17721/// Check if \p Root follows a pattern Root(fpext(LHS), RHS)
17722///
17723/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17724/// can be used to apply the pattern.
17725static std::optional<CombineResult>
17726canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17727 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17728 const RISCVSubtarget &Subtarget) {
17729 if (LHS.SupportsFPExt)
17730 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17731 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17732 /*RHSExt=*/std::nullopt);
17733 return std::nullopt;
17734}
17735
17736/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
17737///
17738/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17739/// can be used to apply the pattern.
17740static std::optional<CombineResult>
17741canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
17742 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17743 const RISCVSubtarget &Subtarget) {
17744
17745 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
17746 return std::nullopt;
17747 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
17748 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17749 /*RHSExt=*/{ExtKind::ZExt});
17750}
17751
17753NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
17754 SmallVector<CombineToTry> Strategies;
17755 switch (Root->getOpcode()) {
17756 case ISD::ADD:
17757 case ISD::SUB:
17758 case ISD::OR:
17759 case RISCVISD::ADD_VL:
17760 case RISCVISD::SUB_VL:
17761 case RISCVISD::OR_VL:
17762 case RISCVISD::FADD_VL:
17763 case RISCVISD::FSUB_VL:
17764 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
17765 Strategies.push_back(canFoldToVWWithSameExtension);
17766 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
17767 Strategies.push_back(canFoldToVW_W);
17768 break;
17769 case RISCVISD::FMUL_VL:
17770 case RISCVISD::VFMADD_VL:
17771 case RISCVISD::VFMSUB_VL:
17772 case RISCVISD::VFNMADD_VL:
17773 case RISCVISD::VFNMSUB_VL:
17774 Strategies.push_back(canFoldToVWWithSameExtension);
17775 if (Root->getOpcode() == RISCVISD::VFMADD_VL)
17776 Strategies.push_back(canFoldToVWWithSameExtBF16);
17777 break;
17778 case ISD::MUL:
17779 case RISCVISD::MUL_VL:
17780 // mul -> vwmul(u)
17781 Strategies.push_back(canFoldToVWWithSameExtension);
17782 // mul -> vwmulsu
17783 Strategies.push_back(canFoldToVW_SU);
17784 break;
17785 case ISD::SHL:
17786 case RISCVISD::SHL_VL:
17787 // shl -> vwsll
17788 Strategies.push_back(canFoldToVWWithSameExtZEXT);
17789 break;
17790 case RISCVISD::VWADD_W_VL:
17791 case RISCVISD::VWSUB_W_VL:
17792 // vwadd_w|vwsub_w -> vwadd|vwsub
17793 Strategies.push_back(canFoldToVWWithSEXT);
17794 break;
17795 case RISCVISD::VWADDU_W_VL:
17796 case RISCVISD::VWSUBU_W_VL:
17797 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
17798 Strategies.push_back(canFoldToVWWithZEXT);
17799 break;
17800 case RISCVISD::VFWADD_W_VL:
17801 case RISCVISD::VFWSUB_W_VL:
17802 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
17803 Strategies.push_back(canFoldToVWWithFPEXT);
17804 break;
17805 default:
17806 llvm_unreachable("Unexpected opcode");
17807 }
17808 return Strategies;
17809}
17810} // End anonymous namespace.
17811
17813 // TODO: Extend this to other binops using generic identity logic
17814 assert(N->getOpcode() == RISCVISD::ADD_VL);
17815 SDValue A = N->getOperand(0);
17816 SDValue B = N->getOperand(1);
17817 SDValue Passthru = N->getOperand(2);
17818 if (!Passthru.isUndef())
17819 // TODO:This could be a vmerge instead
17820 return SDValue();
17821 ;
17823 return A;
17824 // Peek through fixed to scalable
17825 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
17826 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
17827 return A;
17828 return SDValue();
17829}
17830
17831/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
17832/// The supported combines are:
17833/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
17834/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17835/// mul | mul_vl -> vwmul(u) | vwmul_su
17836/// shl | shl_vl -> vwsll
17837/// fadd_vl -> vfwadd | vfwadd_w
17838/// fsub_vl -> vfwsub | vfwsub_w
17839/// fmul_vl -> vfwmul
17840/// vwadd_w(u) -> vwadd(u)
17841/// vwsub_w(u) -> vwsub(u)
17842/// vfwadd_w -> vfwadd
17843/// vfwsub_w -> vfwsub
17846 const RISCVSubtarget &Subtarget) {
17847 SelectionDAG &DAG = DCI.DAG;
17848 if (DCI.isBeforeLegalize())
17849 return SDValue();
17850
17851 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
17852 return SDValue();
17853
17854 SmallVector<SDNode *> Worklist;
17855 SmallPtrSet<SDNode *, 8> Inserted;
17856 Worklist.push_back(N);
17857 Inserted.insert(N);
17858 SmallVector<CombineResult> CombinesToApply;
17859
17860 while (!Worklist.empty()) {
17861 SDNode *Root = Worklist.pop_back_val();
17862
17863 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
17864 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
17865 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
17866 &Inserted](const NodeExtensionHelper &Op) {
17867 if (Op.needToPromoteOtherUsers()) {
17868 for (SDUse &Use : Op.OrigOperand->uses()) {
17869 SDNode *TheUser = Use.getUser();
17870 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
17871 return false;
17872 // We only support the first 2 operands of FMA.
17873 if (Use.getOperandNo() >= 2)
17874 return false;
17875 if (Inserted.insert(TheUser).second)
17876 Worklist.push_back(TheUser);
17877 }
17878 }
17879 return true;
17880 };
17881
17882 // Control the compile time by limiting the number of node we look at in
17883 // total.
17884 if (Inserted.size() > ExtensionMaxWebSize)
17885 return SDValue();
17886
17888 NodeExtensionHelper::getSupportedFoldings(Root);
17889
17890 assert(!FoldingStrategies.empty() && "Nothing to be folded");
17891 bool Matched = false;
17892 for (int Attempt = 0;
17893 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
17894 ++Attempt) {
17895
17896 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
17897 FoldingStrategies) {
17898 std::optional<CombineResult> Res =
17899 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
17900 if (Res) {
17901 Matched = true;
17902 CombinesToApply.push_back(*Res);
17903 // All the inputs that are extended need to be folded, otherwise
17904 // we would be leaving the old input (since it is may still be used),
17905 // and the new one.
17906 if (Res->LHSExt.has_value())
17907 if (!AppendUsersIfNeeded(LHS))
17908 return SDValue();
17909 if (Res->RHSExt.has_value())
17910 if (!AppendUsersIfNeeded(RHS))
17911 return SDValue();
17912 break;
17913 }
17914 }
17915 std::swap(LHS, RHS);
17916 }
17917 // Right now we do an all or nothing approach.
17918 if (!Matched)
17919 return SDValue();
17920 }
17921 // Store the value for the replacement of the input node separately.
17922 SDValue InputRootReplacement;
17923 // We do the RAUW after we materialize all the combines, because some replaced
17924 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
17925 // some of these nodes may appear in the NodeExtensionHelpers of some of the
17926 // yet-to-be-visited CombinesToApply roots.
17928 ValuesToReplace.reserve(CombinesToApply.size());
17929 for (CombineResult Res : CombinesToApply) {
17930 SDValue NewValue = Res.materialize(DAG, Subtarget);
17931 if (!InputRootReplacement) {
17932 assert(Res.Root == N &&
17933 "First element is expected to be the current node");
17934 InputRootReplacement = NewValue;
17935 } else {
17936 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
17937 }
17938 }
17939 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
17940 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
17941 DCI.AddToWorklist(OldNewValues.second.getNode());
17942 }
17943 return InputRootReplacement;
17944}
17945
17946// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
17947// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
17948// y will be the Passthru and cond will be the Mask.
17950 unsigned Opc = N->getOpcode();
17951 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17952 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17953
17954 SDValue Y = N->getOperand(0);
17955 SDValue MergeOp = N->getOperand(1);
17956 unsigned MergeOpc = MergeOp.getOpcode();
17957
17958 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
17959 return SDValue();
17960
17961 SDValue X = MergeOp->getOperand(1);
17962
17963 if (!MergeOp.hasOneUse())
17964 return SDValue();
17965
17966 // Passthru should be undef
17967 SDValue Passthru = N->getOperand(2);
17968 if (!Passthru.isUndef())
17969 return SDValue();
17970
17971 // Mask should be all ones
17972 SDValue Mask = N->getOperand(3);
17973 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
17974 return SDValue();
17975
17976 // False value of MergeOp should be all zeros
17977 SDValue Z = MergeOp->getOperand(2);
17978
17979 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
17980 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
17981 Z = Z.getOperand(1);
17982
17983 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
17984 return SDValue();
17985
17986 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
17987 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
17988 N->getFlags());
17989}
17990
17993 const RISCVSubtarget &Subtarget) {
17994 [[maybe_unused]] unsigned Opc = N->getOpcode();
17995 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
17996 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
17997
17998 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17999 return V;
18000
18001 return combineVWADDSUBWSelect(N, DCI.DAG);
18002}
18003
18004// Helper function for performMemPairCombine.
18005// Try to combine the memory loads/stores LSNode1 and LSNode2
18006// into a single memory pair operation.
18008 LSBaseSDNode *LSNode2, SDValue BasePtr,
18009 uint64_t Imm) {
18011 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
18012
18013 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
18014 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
18015 return SDValue();
18016
18018 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
18019
18020 // The new operation has twice the width.
18021 MVT XLenVT = Subtarget.getXLenVT();
18022 EVT MemVT = LSNode1->getMemoryVT();
18023 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
18024 MachineMemOperand *MMO = LSNode1->getMemOperand();
18026 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
18027
18028 if (LSNode1->getOpcode() == ISD::LOAD) {
18029 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
18030 unsigned Opcode;
18031 if (MemVT == MVT::i32)
18032 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
18033 else
18034 Opcode = RISCVISD::TH_LDD;
18035
18036 SDValue Res = DAG.getMemIntrinsicNode(
18037 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
18038 {LSNode1->getChain(), BasePtr,
18039 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
18040 NewMemVT, NewMMO);
18041
18042 SDValue Node1 =
18043 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
18044 SDValue Node2 =
18045 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
18046
18047 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
18048 return Node1;
18049 } else {
18050 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
18051
18052 SDValue Res = DAG.getMemIntrinsicNode(
18053 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
18054 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
18055 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
18056 NewMemVT, NewMMO);
18057
18058 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
18059 return Res;
18060 }
18061}
18062
18063// Try to combine two adjacent loads/stores to a single pair instruction from
18064// the XTHeadMemPair vendor extension.
18067 SelectionDAG &DAG = DCI.DAG;
18069 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
18070
18071 // Target does not support load/store pair.
18072 if (!Subtarget.hasVendorXTHeadMemPair())
18073 return SDValue();
18074
18075 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
18076 EVT MemVT = LSNode1->getMemoryVT();
18077 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
18078
18079 // No volatile, indexed or atomic loads/stores.
18080 if (!LSNode1->isSimple() || LSNode1->isIndexed())
18081 return SDValue();
18082
18083 // Function to get a base + constant representation from a memory value.
18084 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
18085 if (Ptr->getOpcode() == ISD::ADD)
18086 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
18087 return {Ptr->getOperand(0), C1->getZExtValue()};
18088 return {Ptr, 0};
18089 };
18090
18091 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
18092
18093 SDValue Chain = N->getOperand(0);
18094 for (SDUse &Use : Chain->uses()) {
18095 if (Use.getUser() != N && Use.getResNo() == 0 &&
18096 Use.getUser()->getOpcode() == N->getOpcode()) {
18098
18099 // No volatile, indexed or atomic loads/stores.
18100 if (!LSNode2->isSimple() || LSNode2->isIndexed())
18101 continue;
18102
18103 // Check if LSNode1 and LSNode2 have the same type and extension.
18104 if (LSNode1->getOpcode() == ISD::LOAD)
18105 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
18107 continue;
18108
18109 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
18110 continue;
18111
18112 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
18113
18114 // Check if the base pointer is the same for both instruction.
18115 if (Base1 != Base2)
18116 continue;
18117
18118 // Check if the offsets match the XTHeadMemPair encoding constraints.
18119 bool Valid = false;
18120 if (MemVT == MVT::i32) {
18121 // Check for adjacent i32 values and a 2-bit index.
18122 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
18123 Valid = true;
18124 } else if (MemVT == MVT::i64) {
18125 // Check for adjacent i64 values and a 2-bit index.
18126 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
18127 Valid = true;
18128 }
18129
18130 if (!Valid)
18131 continue;
18132
18133 // Try to combine.
18134 if (SDValue Res =
18135 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
18136 return Res;
18137 }
18138 }
18139
18140 return SDValue();
18141}
18142
18143// Fold
18144// (fp_to_int (froundeven X)) -> fcvt X, rne
18145// (fp_to_int (ftrunc X)) -> fcvt X, rtz
18146// (fp_to_int (ffloor X)) -> fcvt X, rdn
18147// (fp_to_int (fceil X)) -> fcvt X, rup
18148// (fp_to_int (fround X)) -> fcvt X, rmm
18149// (fp_to_int (frint X)) -> fcvt X
18152 const RISCVSubtarget &Subtarget) {
18153 SelectionDAG &DAG = DCI.DAG;
18154 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18155 MVT XLenVT = Subtarget.getXLenVT();
18156
18157 SDValue Src = N->getOperand(0);
18158
18159 // Don't do this for strict-fp Src.
18160 if (Src->isStrictFPOpcode())
18161 return SDValue();
18162
18163 // Ensure the FP type is legal.
18164 if (!TLI.isTypeLegal(Src.getValueType()))
18165 return SDValue();
18166
18167 // Don't do this for f16 with Zfhmin and not Zfh.
18168 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18169 return SDValue();
18170
18171 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18172 // If the result is invalid, we didn't find a foldable instruction.
18173 if (FRM == RISCVFPRndMode::Invalid)
18174 return SDValue();
18175
18176 SDLoc DL(N);
18177 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
18178 EVT VT = N->getValueType(0);
18179
18180 if (VT.isVector() && TLI.isTypeLegal(VT)) {
18181 MVT SrcVT = Src.getSimpleValueType();
18182 MVT SrcContainerVT = SrcVT;
18183 MVT ContainerVT = VT.getSimpleVT();
18184 SDValue XVal = Src.getOperand(0);
18185
18186 // For widening and narrowing conversions we just combine it into a
18187 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
18188 // end up getting lowered to their appropriate pseudo instructions based on
18189 // their operand types
18190 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
18191 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
18192 return SDValue();
18193
18194 // Make fixed-length vectors scalable first
18195 if (SrcVT.isFixedLengthVector()) {
18196 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
18197 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
18198 ContainerVT =
18199 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
18200 }
18201
18202 auto [Mask, VL] =
18203 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
18204
18205 SDValue FpToInt;
18206 if (FRM == RISCVFPRndMode::RTZ) {
18207 // Use the dedicated trunc static rounding mode if we're truncating so we
18208 // don't need to generate calls to fsrmi/fsrm
18209 unsigned Opc =
18210 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
18211 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
18212 } else {
18213 unsigned Opc =
18214 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
18215 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
18216 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
18217 }
18218
18219 // If converted from fixed-length to scalable, convert back
18220 if (VT.isFixedLengthVector())
18221 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
18222
18223 return FpToInt;
18224 }
18225
18226 // Only handle XLen or i32 types. Other types narrower than XLen will
18227 // eventually be legalized to XLenVT.
18228 if (VT != MVT::i32 && VT != XLenVT)
18229 return SDValue();
18230
18231 unsigned Opc;
18232 if (VT == XLenVT)
18233 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18234 else
18235 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18236
18237 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
18238 DAG.getTargetConstant(FRM, DL, XLenVT));
18239 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
18240}
18241
18242// Fold
18243// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
18244// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
18245// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
18246// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
18247// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
18248// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
18251 const RISCVSubtarget &Subtarget) {
18252 SelectionDAG &DAG = DCI.DAG;
18253 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18254 MVT XLenVT = Subtarget.getXLenVT();
18255
18256 // Only handle XLen types. Other types narrower than XLen will eventually be
18257 // legalized to XLenVT.
18258 EVT DstVT = N->getValueType(0);
18259 if (DstVT != XLenVT)
18260 return SDValue();
18261
18262 SDValue Src = N->getOperand(0);
18263
18264 // Don't do this for strict-fp Src.
18265 if (Src->isStrictFPOpcode())
18266 return SDValue();
18267
18268 // Ensure the FP type is also legal.
18269 if (!TLI.isTypeLegal(Src.getValueType()))
18270 return SDValue();
18271
18272 // Don't do this for f16 with Zfhmin and not Zfh.
18273 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18274 return SDValue();
18275
18276 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18277
18278 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18279 if (FRM == RISCVFPRndMode::Invalid)
18280 return SDValue();
18281
18282 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
18283
18284 unsigned Opc;
18285 if (SatVT == DstVT)
18286 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18287 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
18288 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18289 else
18290 return SDValue();
18291 // FIXME: Support other SatVTs by clamping before or after the conversion.
18292
18293 Src = Src.getOperand(0);
18294
18295 SDLoc DL(N);
18296 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
18297 DAG.getTargetConstant(FRM, DL, XLenVT));
18298
18299 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
18300 // extend.
18301 if (Opc == RISCVISD::FCVT_WU_RV64)
18302 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
18303
18304 // RISC-V FP-to-int conversions saturate to the destination register size, but
18305 // don't produce 0 for nan.
18306 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
18307 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
18308}
18309
18310// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
18311// smaller than XLenVT.
18313 const RISCVSubtarget &Subtarget) {
18314 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
18315
18316 SDValue Src = N->getOperand(0);
18317 if (Src.getOpcode() != ISD::BSWAP)
18318 return SDValue();
18319
18320 EVT VT = N->getValueType(0);
18321 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
18323 return SDValue();
18324
18325 SDLoc DL(N);
18326 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
18327}
18328
18330 const RISCVSubtarget &Subtarget) {
18331 // Fold:
18332 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
18333
18334 // Check if its first operand is a vp.load.
18335 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
18336 if (!VPLoad)
18337 return SDValue();
18338
18339 EVT LoadVT = VPLoad->getValueType(0);
18340 // We do not have a strided_load version for masks, and the evl of vp.reverse
18341 // and vp.load should always be the same.
18342 if (!LoadVT.getVectorElementType().isByteSized() ||
18343 N->getOperand(2) != VPLoad->getVectorLength() ||
18344 !N->getOperand(0).hasOneUse())
18345 return SDValue();
18346
18347 // Check if the mask of outer vp.reverse are all 1's.
18348 if (!isOneOrOneSplat(N->getOperand(1)))
18349 return SDValue();
18350
18351 SDValue LoadMask = VPLoad->getMask();
18352 // If Mask is all ones, then load is unmasked and can be reversed.
18353 if (!isOneOrOneSplat(LoadMask)) {
18354 // If the mask is not all ones, we can reverse the load if the mask was also
18355 // reversed by an unmasked vp.reverse with the same EVL.
18356 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18357 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
18358 LoadMask.getOperand(2) != VPLoad->getVectorLength())
18359 return SDValue();
18360 LoadMask = LoadMask.getOperand(0);
18361 }
18362
18363 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
18364 SDLoc DL(N);
18365 MVT XLenVT = Subtarget.getXLenVT();
18366 SDValue NumElem = VPLoad->getVectorLength();
18367 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
18368
18369 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18370 DAG.getConstant(1, DL, XLenVT));
18371 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18372 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18373 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
18374 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18375
18377 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
18379 PtrInfo, VPLoad->getMemOperand()->getFlags(),
18380 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
18381
18382 SDValue Ret = DAG.getStridedLoadVP(
18383 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
18384 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
18385
18386 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
18387
18388 return Ret;
18389}
18390
18392 const RISCVSubtarget &Subtarget) {
18393 // Fold:
18394 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
18395 // -1, MASK)
18396 auto *VPStore = cast<VPStoreSDNode>(N);
18397
18398 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
18399 return SDValue();
18400
18401 SDValue VPReverse = VPStore->getValue();
18402 EVT ReverseVT = VPReverse->getValueType(0);
18403
18404 // We do not have a strided_store version for masks, and the evl of vp.reverse
18405 // and vp.store should always be the same.
18406 if (!ReverseVT.getVectorElementType().isByteSized() ||
18407 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
18408 !VPReverse.hasOneUse())
18409 return SDValue();
18410
18411 SDValue StoreMask = VPStore->getMask();
18412 // If Mask is all ones, then load is unmasked and can be reversed.
18413 if (!isOneOrOneSplat(StoreMask)) {
18414 // If the mask is not all ones, we can reverse the store if the mask was
18415 // also reversed by an unmasked vp.reverse with the same EVL.
18416 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18417 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
18418 StoreMask.getOperand(2) != VPStore->getVectorLength())
18419 return SDValue();
18420 StoreMask = StoreMask.getOperand(0);
18421 }
18422
18423 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
18424 SDLoc DL(N);
18425 MVT XLenVT = Subtarget.getXLenVT();
18426 SDValue NumElem = VPStore->getVectorLength();
18427 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
18428
18429 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18430 DAG.getConstant(1, DL, XLenVT));
18431 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18432 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18433 SDValue Base =
18434 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
18435 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18436
18438 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
18440 PtrInfo, VPStore->getMemOperand()->getFlags(),
18441 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
18442
18443 return DAG.getStridedStoreVP(
18444 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
18445 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
18446 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
18447 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
18448}
18449
18450// Peephole avgceil pattern.
18451// %1 = zext <N x i8> %a to <N x i32>
18452// %2 = zext <N x i8> %b to <N x i32>
18453// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
18454// %4 = add nuw nsw <N x i32> %3, %2
18455// %5 = lshr <N x i32> %4, splat (i32 1)
18456// %6 = trunc <N x i32> %5 to <N x i8>
18458 const RISCVSubtarget &Subtarget) {
18459 EVT VT = N->getValueType(0);
18460
18461 // Ignore fixed vectors.
18462 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18463 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
18464 return SDValue();
18465
18466 SDValue In = N->getOperand(0);
18467 SDValue Mask = N->getOperand(1);
18468 SDValue VL = N->getOperand(2);
18469
18470 // Input should be a vp_srl with same mask and VL.
18471 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
18472 In.getOperand(3) != VL)
18473 return SDValue();
18474
18475 // Shift amount should be 1.
18476 if (!isOneOrOneSplat(In.getOperand(1)))
18477 return SDValue();
18478
18479 // Shifted value should be a vp_add with same mask and VL.
18480 SDValue LHS = In.getOperand(0);
18481 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
18482 LHS.getOperand(3) != VL)
18483 return SDValue();
18484
18485 SDValue Operands[3];
18486
18487 // Matches another VP_ADD with same VL and Mask.
18488 auto FindAdd = [&](SDValue V, SDValue Other) {
18489 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
18490 V.getOperand(3) != VL)
18491 return false;
18492
18493 Operands[0] = Other;
18494 Operands[1] = V.getOperand(1);
18495 Operands[2] = V.getOperand(0);
18496 return true;
18497 };
18498
18499 // We need to find another VP_ADD in one of the operands.
18500 SDValue LHS0 = LHS.getOperand(0);
18501 SDValue LHS1 = LHS.getOperand(1);
18502 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
18503 return SDValue();
18504
18505 // Now we have three operands of two additions. Check that one of them is a
18506 // constant vector with ones.
18507 auto I = llvm::find_if(Operands,
18508 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
18509 if (I == std::end(Operands))
18510 return SDValue();
18511 // We found a vector with ones, move if it to the end of the Operands array.
18512 std::swap(*I, Operands[2]);
18513
18514 // Make sure the other 2 operands can be promoted from the result type.
18515 for (SDValue Op : drop_end(Operands)) {
18516 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
18517 Op.getOperand(2) != VL)
18518 return SDValue();
18519 // Input must be the same size or smaller than our result.
18520 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
18521 return SDValue();
18522 }
18523
18524 // Pattern is detected.
18525 // Rebuild the zero extends in case the inputs are smaller than our result.
18526 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
18527 Operands[0].getOperand(0), Mask, VL);
18528 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
18529 Operands[1].getOperand(0), Mask, VL);
18530 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
18531 // mode.
18532 SDLoc DL(N);
18533 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
18534 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
18535}
18536
18537// Convert from one FMA opcode to another based on whether we are negating the
18538// multiply result and/or the accumulator.
18539// NOTE: Only supports RVV operations with VL.
18540static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
18541 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
18542 if (NegMul) {
18543 // clang-format off
18544 switch (Opcode) {
18545 default: llvm_unreachable("Unexpected opcode");
18546 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18547 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18548 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18549 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18550 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18551 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18552 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18553 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18554 }
18555 // clang-format on
18556 }
18557
18558 // Negating the accumulator changes ADD<->SUB.
18559 if (NegAcc) {
18560 // clang-format off
18561 switch (Opcode) {
18562 default: llvm_unreachable("Unexpected opcode");
18563 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18564 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18565 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18566 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18567 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18568 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18569 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18570 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18571 }
18572 // clang-format on
18573 }
18574
18575 return Opcode;
18576}
18577
18579 // Fold FNEG_VL into FMA opcodes.
18580 // The first operand of strict-fp is chain.
18581 bool IsStrict =
18582 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
18583 unsigned Offset = IsStrict ? 1 : 0;
18584 SDValue A = N->getOperand(0 + Offset);
18585 SDValue B = N->getOperand(1 + Offset);
18586 SDValue C = N->getOperand(2 + Offset);
18587 SDValue Mask = N->getOperand(3 + Offset);
18588 SDValue VL = N->getOperand(4 + Offset);
18589
18590 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
18591 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
18592 V.getOperand(2) == VL) {
18593 // Return the negated input.
18594 V = V.getOperand(0);
18595 return true;
18596 }
18597
18598 return false;
18599 };
18600
18601 bool NegA = invertIfNegative(A);
18602 bool NegB = invertIfNegative(B);
18603 bool NegC = invertIfNegative(C);
18604
18605 // If no operands are negated, we're done.
18606 if (!NegA && !NegB && !NegC)
18607 return SDValue();
18608
18609 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
18610 if (IsStrict)
18611 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
18612 {N->getOperand(0), A, B, C, Mask, VL});
18613 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
18614 VL);
18615}
18616
18619 const RISCVSubtarget &Subtarget) {
18620 SelectionDAG &DAG = DCI.DAG;
18621
18623 return V;
18624
18625 // FIXME: Ignore strict opcodes for now.
18626 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
18627 return SDValue();
18628
18629 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18630}
18631
18633 const RISCVSubtarget &Subtarget) {
18634 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
18635
18636 EVT VT = N->getValueType(0);
18637
18638 if (VT != Subtarget.getXLenVT())
18639 return SDValue();
18640
18641 if (!isa<ConstantSDNode>(N->getOperand(1)))
18642 return SDValue();
18643 uint64_t ShAmt = N->getConstantOperandVal(1);
18644
18645 SDValue N0 = N->getOperand(0);
18646
18647 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
18648 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
18649 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
18650 unsigned ExtSize =
18651 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
18652 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
18653 N0.getOperand(0).hasOneUse() &&
18655 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
18656 if (LShAmt < ExtSize) {
18657 unsigned Size = VT.getSizeInBits();
18658 SDLoc ShlDL(N0.getOperand(0));
18659 SDValue Shl =
18660 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
18661 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
18662 SDLoc DL(N);
18663 return DAG.getNode(ISD::SRA, DL, VT, Shl,
18664 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
18665 }
18666 }
18667 }
18668
18669 if (ShAmt > 32 || VT != MVT::i64)
18670 return SDValue();
18671
18672 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
18673 // FIXME: Should this be a generic combine? There's a similar combine on X86.
18674 //
18675 // Also try these folds where an add or sub is in the middle.
18676 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
18677 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
18678 SDValue Shl;
18679 ConstantSDNode *AddC = nullptr;
18680
18681 // We might have an ADD or SUB between the SRA and SHL.
18682 bool IsAdd = N0.getOpcode() == ISD::ADD;
18683 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
18684 // Other operand needs to be a constant we can modify.
18685 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
18686 if (!AddC)
18687 return SDValue();
18688
18689 // AddC needs to have at least 32 trailing zeros.
18690 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
18691 return SDValue();
18692
18693 // All users should be a shift by constant less than or equal to 32. This
18694 // ensures we'll do this optimization for each of them to produce an
18695 // add/sub+sext_inreg they can all share.
18696 for (SDNode *U : N0->users()) {
18697 if (U->getOpcode() != ISD::SRA ||
18698 !isa<ConstantSDNode>(U->getOperand(1)) ||
18699 U->getConstantOperandVal(1) > 32)
18700 return SDValue();
18701 }
18702
18703 Shl = N0.getOperand(IsAdd ? 0 : 1);
18704 } else {
18705 // Not an ADD or SUB.
18706 Shl = N0;
18707 }
18708
18709 // Look for a shift left by 32.
18710 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
18711 Shl.getConstantOperandVal(1) != 32)
18712 return SDValue();
18713
18714 // We if we didn't look through an add/sub, then the shl should have one use.
18715 // If we did look through an add/sub, the sext_inreg we create is free so
18716 // we're only creating 2 new instructions. It's enough to only remove the
18717 // original sra+add/sub.
18718 if (!AddC && !Shl.hasOneUse())
18719 return SDValue();
18720
18721 SDLoc DL(N);
18722 SDValue In = Shl.getOperand(0);
18723
18724 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
18725 // constant.
18726 if (AddC) {
18727 SDValue ShiftedAddC =
18728 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
18729 if (IsAdd)
18730 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
18731 else
18732 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
18733 }
18734
18735 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
18736 DAG.getValueType(MVT::i32));
18737 if (ShAmt == 32)
18738 return SExt;
18739
18740 return DAG.getNode(
18741 ISD::SHL, DL, MVT::i64, SExt,
18742 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
18743}
18744
18745// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
18746// the result is used as the condition of a br_cc or select_cc we can invert,
18747// inverting the setcc is free, and Z is 0/1. Caller will invert the
18748// br_cc/select_cc.
18750 bool IsAnd = Cond.getOpcode() == ISD::AND;
18751 if (!IsAnd && Cond.getOpcode() != ISD::OR)
18752 return SDValue();
18753
18754 if (!Cond.hasOneUse())
18755 return SDValue();
18756
18757 SDValue Setcc = Cond.getOperand(0);
18758 SDValue Xor = Cond.getOperand(1);
18759 // Canonicalize setcc to LHS.
18760 if (Setcc.getOpcode() != ISD::SETCC)
18761 std::swap(Setcc, Xor);
18762 // LHS should be a setcc and RHS should be an xor.
18763 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
18764 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
18765 return SDValue();
18766
18767 // If the condition is an And, SimplifyDemandedBits may have changed
18768 // (xor Z, 1) to (not Z).
18769 SDValue Xor1 = Xor.getOperand(1);
18770 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
18771 return SDValue();
18772
18773 EVT VT = Cond.getValueType();
18774 SDValue Xor0 = Xor.getOperand(0);
18775
18776 // The LHS of the xor needs to be 0/1.
18778 if (!DAG.MaskedValueIsZero(Xor0, Mask))
18779 return SDValue();
18780
18781 // We can only invert integer setccs.
18782 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
18783 if (!SetCCOpVT.isScalarInteger())
18784 return SDValue();
18785
18786 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
18787 if (ISD::isIntEqualitySetCC(CCVal)) {
18788 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
18789 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
18790 Setcc.getOperand(1), CCVal);
18791 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
18792 // Invert (setlt 0, X) by converting to (setlt X, 1).
18793 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
18794 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
18795 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
18796 // (setlt X, 1) by converting to (setlt 0, X).
18797 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
18798 DAG.getConstant(0, SDLoc(Setcc), VT),
18799 Setcc.getOperand(0), CCVal);
18800 } else
18801 return SDValue();
18802
18803 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
18804 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
18805}
18806
18807// Perform common combines for BR_CC and SELECT_CC conditions.
18808static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
18809 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
18810 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
18811
18812 // As far as arithmetic right shift always saves the sign,
18813 // shift can be omitted.
18814 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
18815 // setge (sra X, N), 0 -> setge X, 0
18816 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
18817 LHS.getOpcode() == ISD::SRA) {
18818 LHS = LHS.getOperand(0);
18819 return true;
18820 }
18821
18822 if (!ISD::isIntEqualitySetCC(CCVal))
18823 return false;
18824
18825 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
18826 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
18827 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
18828 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
18829 // If we're looking for eq 0 instead of ne 0, we need to invert the
18830 // condition.
18831 bool Invert = CCVal == ISD::SETEQ;
18832 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
18833 if (Invert)
18834 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18835
18836 RHS = LHS.getOperand(1);
18837 LHS = LHS.getOperand(0);
18838 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
18839
18840 CC = DAG.getCondCode(CCVal);
18841 return true;
18842 }
18843
18844 // If XOR is reused and has an immediate that will fit in XORI,
18845 // do not fold.
18846 auto isXorImmediate = [](const SDValue &Op) -> bool {
18847 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
18848 return isInt<12>(XorCnst->getSExtValue());
18849 return false;
18850 };
18851 // Fold (X(i1) ^ 1) == 0 -> X != 0
18852 auto singleBitOp = [&DAG](const SDValue &VarOp,
18853 const SDValue &ConstOp) -> bool {
18854 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
18855 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
18856 return (XorCnst->getSExtValue() == 1) &&
18857 DAG.MaskedValueIsZero(VarOp, Mask);
18858 }
18859 return false;
18860 };
18861 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
18862 for (const SDNode *UserNode : Op->users()) {
18863 const unsigned Opcode = UserNode->getOpcode();
18864 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
18865 return false;
18866 }
18867 return true;
18868 };
18869 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
18870 const SDValue &LHS, const SDValue &RHS) -> bool {
18871 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
18872 (!isXorImmediate(LHS.getOperand(1)) ||
18873 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
18874 onlyUsedBySelectOrBR(LHS));
18875 };
18876 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
18877 if (isFoldableXorEq(LHS, RHS)) {
18878 RHS = LHS.getOperand(1);
18879 LHS = LHS.getOperand(0);
18880 return true;
18881 }
18882 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
18883 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
18884 const SDValue LHS0 = LHS.getOperand(0);
18885 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
18886 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
18887 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18888 LHS0.getOperand(1), LHS.getOperand(1));
18889 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18890 LHS0.getOperand(0), LHS.getOperand(1));
18891 return true;
18892 }
18893 }
18894
18895 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
18896 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
18897 LHS.getOperand(1).getOpcode() == ISD::Constant) {
18898 SDValue LHS0 = LHS.getOperand(0);
18899 if (LHS0.getOpcode() == ISD::AND &&
18900 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
18901 uint64_t Mask = LHS0.getConstantOperandVal(1);
18902 uint64_t ShAmt = LHS.getConstantOperandVal(1);
18903 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
18904 // XAndesPerf supports branch on test bit.
18905 if (Subtarget.hasVendorXAndesPerf()) {
18906 LHS =
18907 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
18908 DAG.getConstant(Mask, DL, LHS.getValueType()));
18909 return true;
18910 }
18911
18912 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
18913 CC = DAG.getCondCode(CCVal);
18914
18915 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
18916 LHS = LHS0.getOperand(0);
18917 if (ShAmt != 0)
18918 LHS =
18919 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
18920 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
18921 return true;
18922 }
18923 }
18924 }
18925
18926 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
18927 // This can occur when legalizing some floating point comparisons.
18928 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
18929 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
18930 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18931 CC = DAG.getCondCode(CCVal);
18932 RHS = DAG.getConstant(0, DL, LHS.getValueType());
18933 return true;
18934 }
18935
18936 if (isNullConstant(RHS)) {
18937 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
18938 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18939 CC = DAG.getCondCode(CCVal);
18940 LHS = NewCond;
18941 return true;
18942 }
18943 }
18944
18945 return false;
18946}
18947
18948// Fold
18949// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
18950// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
18951// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
18952// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
18953// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
18954// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
18956 SDValue TrueVal, SDValue FalseVal,
18957 bool Swapped) {
18958 bool Commutative = true;
18959 unsigned Opc = TrueVal.getOpcode();
18960 switch (Opc) {
18961 default:
18962 return SDValue();
18963 case ISD::SHL:
18964 case ISD::SRA:
18965 case ISD::SRL:
18966 case ISD::SUB:
18967 case ISD::ROTL:
18968 case ISD::ROTR:
18969 Commutative = false;
18970 break;
18971 case ISD::ADD:
18972 case ISD::OR:
18973 case ISD::XOR:
18974 case ISD::UMIN:
18975 case ISD::UMAX:
18976 break;
18977 }
18978
18979 if (!TrueVal.hasOneUse())
18980 return SDValue();
18981
18982 unsigned OpToFold;
18983 if (FalseVal == TrueVal.getOperand(0))
18984 OpToFold = 0;
18985 else if (Commutative && FalseVal == TrueVal.getOperand(1))
18986 OpToFold = 1;
18987 else
18988 return SDValue();
18989
18990 EVT VT = N->getValueType(0);
18991 SDLoc DL(N);
18992 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
18993 EVT OtherOpVT = OtherOp.getValueType();
18994 SDValue IdentityOperand =
18995 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
18996 if (!Commutative)
18997 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
18998 assert(IdentityOperand && "No identity operand!");
18999
19000 if (Swapped)
19001 std::swap(OtherOp, IdentityOperand);
19002 SDValue NewSel =
19003 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
19004 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
19005}
19006
19007// This tries to get rid of `select` and `icmp` that are being used to handle
19008// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
19010 SDValue Cond = N->getOperand(0);
19011
19012 // This represents either CTTZ or CTLZ instruction.
19013 SDValue CountZeroes;
19014
19015 SDValue ValOnZero;
19016
19017 if (Cond.getOpcode() != ISD::SETCC)
19018 return SDValue();
19019
19020 if (!isNullConstant(Cond->getOperand(1)))
19021 return SDValue();
19022
19023 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
19024 if (CCVal == ISD::CondCode::SETEQ) {
19025 CountZeroes = N->getOperand(2);
19026 ValOnZero = N->getOperand(1);
19027 } else if (CCVal == ISD::CondCode::SETNE) {
19028 CountZeroes = N->getOperand(1);
19029 ValOnZero = N->getOperand(2);
19030 } else {
19031 return SDValue();
19032 }
19033
19034 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
19035 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
19036 CountZeroes = CountZeroes.getOperand(0);
19037
19038 if (CountZeroes.getOpcode() != ISD::CTTZ &&
19039 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
19040 CountZeroes.getOpcode() != ISD::CTLZ &&
19041 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
19042 return SDValue();
19043
19044 if (!isNullConstant(ValOnZero))
19045 return SDValue();
19046
19047 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
19048 if (Cond->getOperand(0) != CountZeroesArgument)
19049 return SDValue();
19050
19051 unsigned BitWidth = CountZeroes.getValueSizeInBits();
19052 if (!isPowerOf2_32(BitWidth))
19053 return SDValue();
19054
19055 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
19056 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
19057 CountZeroes.getValueType(), CountZeroesArgument);
19058 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
19059 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
19060 CountZeroes.getValueType(), CountZeroesArgument);
19061 }
19062
19063 SDValue BitWidthMinusOne =
19064 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
19065
19066 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
19067 CountZeroes, BitWidthMinusOne);
19068 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
19069}
19070
19072 const RISCVSubtarget &Subtarget) {
19073 SDValue Cond = N->getOperand(0);
19074 SDValue True = N->getOperand(1);
19075 SDValue False = N->getOperand(2);
19076 SDLoc DL(N);
19077 EVT VT = N->getValueType(0);
19078 EVT CondVT = Cond.getValueType();
19079
19080 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
19081 return SDValue();
19082
19083 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
19084 // BEXTI, where C is power of 2.
19085 if (Subtarget.hasBEXTILike() && VT.isScalarInteger() &&
19086 (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
19087 SDValue LHS = Cond.getOperand(0);
19088 SDValue RHS = Cond.getOperand(1);
19089 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
19090 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
19091 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
19092 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
19093 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
19094 return DAG.getSelect(DL, VT,
19095 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
19096 False, True);
19097 }
19098 }
19099 return SDValue();
19100}
19101
19102static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
19103 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
19104 return false;
19105
19106 SwapCC = false;
19107 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
19108 std::swap(TrueVal, FalseVal);
19109 SwapCC = true;
19110 }
19111
19112 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
19113 return false;
19114
19115 SDValue A = FalseVal.getOperand(0);
19116 SDValue B = FalseVal.getOperand(1);
19117 // Add is commutative, so check both orders
19118 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
19119 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
19120}
19121
19122/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
19123/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
19124/// register pressure over the add followed by masked vsub sequence.
19126 SDLoc DL(N);
19127 EVT VT = N->getValueType(0);
19128 SDValue CC = N->getOperand(0);
19129 SDValue TrueVal = N->getOperand(1);
19130 SDValue FalseVal = N->getOperand(2);
19131
19132 bool SwapCC;
19133 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
19134 return SDValue();
19135
19136 SDValue Sub = SwapCC ? TrueVal : FalseVal;
19137 SDValue A = Sub.getOperand(0);
19138 SDValue B = Sub.getOperand(1);
19139
19140 // Arrange the select such that we can match a masked
19141 // vrsub.vi to perform the conditional negate
19142 SDValue NegB = DAG.getNegative(B, DL, VT);
19143 if (!SwapCC)
19144 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19145 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19146 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19147}
19148
19150 const RISCVSubtarget &Subtarget) {
19151 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
19152 return Folded;
19153
19154 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
19155 return V;
19156
19157 if (Subtarget.hasConditionalMoveFusion())
19158 return SDValue();
19159
19160 SDValue TrueVal = N->getOperand(1);
19161 SDValue FalseVal = N->getOperand(2);
19162 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
19163 return V;
19164 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
19165}
19166
19167/// If we have a build_vector where each lane is binop X, C, where C
19168/// is a constant (but not necessarily the same constant on all lanes),
19169/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
19170/// We assume that materializing a constant build vector will be no more
19171/// expensive that performing O(n) binops.
19173 const RISCVSubtarget &Subtarget,
19174 const RISCVTargetLowering &TLI) {
19175 SDLoc DL(N);
19176 EVT VT = N->getValueType(0);
19177
19178 assert(!VT.isScalableVector() && "unexpected build vector");
19179
19180 if (VT.getVectorNumElements() == 1)
19181 return SDValue();
19182
19183 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
19184 if (!TLI.isBinOp(Opcode))
19185 return SDValue();
19186
19187 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
19188 return SDValue();
19189
19190 // This BUILD_VECTOR involves an implicit truncation, and sinking
19191 // truncates through binops is non-trivial.
19192 if (N->op_begin()->getValueType() != VT.getVectorElementType())
19193 return SDValue();
19194
19195 SmallVector<SDValue> LHSOps;
19196 SmallVector<SDValue> RHSOps;
19197 for (SDValue Op : N->ops()) {
19198 if (Op.isUndef()) {
19199 // We can't form a divide or remainder from undef.
19200 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
19201 return SDValue();
19202
19203 LHSOps.push_back(Op);
19204 RHSOps.push_back(Op);
19205 continue;
19206 }
19207
19208 // TODO: We can handle operations which have an neutral rhs value
19209 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
19210 // of profit in a more explicit manner.
19211 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
19212 return SDValue();
19213
19214 LHSOps.push_back(Op.getOperand(0));
19215 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
19216 !isa<ConstantFPSDNode>(Op.getOperand(1)))
19217 return SDValue();
19218 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19219 // have different LHS and RHS types.
19220 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
19221 return SDValue();
19222
19223 RHSOps.push_back(Op.getOperand(1));
19224 }
19225
19226 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
19227 DAG.getBuildVector(VT, DL, RHSOps));
19228}
19229
19231 ElementCount OpEC = OpVT.getVectorElementCount();
19232 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
19233 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
19234}
19235
19236/// Given fixed length vectors A and B with equal element types, but possibly
19237/// different number of elements, return A + B where either A or B is zero
19238/// padded to the larger number of elements.
19240 SelectionDAG &DAG) {
19241 // NOTE: Manually doing the extract/add/insert scheme produces
19242 // significantly better codegen than the naive pad with zeros
19243 // and add scheme.
19244 EVT AVT = A.getValueType();
19245 EVT BVT = B.getValueType();
19248 std::swap(A, B);
19249 std::swap(AVT, BVT);
19250 }
19251
19252 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
19253 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
19254 return DAG.getInsertSubvector(DL, B, Res, 0);
19255}
19256
19258 SelectionDAG &DAG,
19259 const RISCVSubtarget &Subtarget,
19260 const RISCVTargetLowering &TLI) {
19261 using namespace SDPatternMatch;
19262 // Note: We intentionally do not check the legality of the reduction type.
19263 // We want to handle the m4/m8 *src* types, and thus need to let illegal
19264 // intermediate types flow through here.
19265 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
19267 return SDValue();
19268
19269 // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
19270 // form).
19271 SDValue A, B;
19272 if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
19273 SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
19274 SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
19275 if (AOpt || BOpt) {
19276 if (AOpt)
19277 A = AOpt;
19278 if (BOpt)
19279 B = BOpt;
19280 // From here, we're doing A + B with mixed types, implicitly zero
19281 // padded to the wider type. Note that we *don't* need the result
19282 // type to be the original VT, and in fact prefer narrower ones
19283 // if possible.
19284 return getZeroPaddedAdd(DL, A, B, DAG);
19285 }
19286 }
19287
19288 // zext a <--> partial_reduce_umla 0, a, 1
19289 // sext a <--> partial_reduce_smla 0, a, 1
19290 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
19291 InVec.getOpcode() == ISD::SIGN_EXTEND) {
19292 SDValue A = InVec.getOperand(0);
19293 EVT OpVT = A.getValueType();
19294 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
19295 return SDValue();
19296
19297 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
19298 SDValue B = DAG.getConstant(0x1, DL, OpVT);
19299 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
19300 unsigned Opc =
19301 IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
19302 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
19303 }
19304
19305 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
19306 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
19307 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
19308 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
19309 if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
19310 return SDValue();
19311
19312 if (!ISD::isExtOpcode(A.getOpcode()))
19313 return SDValue();
19314
19315 EVT OpVT = A.getOperand(0).getValueType();
19316 if (OpVT.getVectorElementType() != MVT::i8 ||
19317 OpVT != B.getOperand(0).getValueType() ||
19318 !TLI.isTypeLegal(A.getValueType()))
19319 return SDValue();
19320
19321 unsigned Opc;
19322 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
19323 Opc = ISD::PARTIAL_REDUCE_SMLA;
19324 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19325 B.getOpcode() == ISD::ZERO_EXTEND)
19326 Opc = ISD::PARTIAL_REDUCE_UMLA;
19327 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
19328 B.getOpcode() == ISD::ZERO_EXTEND)
19329 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19330 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19331 B.getOpcode() == ISD::SIGN_EXTEND) {
19332 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19333 std::swap(A, B);
19334 } else
19335 return SDValue();
19336
19337 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
19338 return DAG.getNode(
19339 Opc, DL, ResVT,
19340 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
19341}
19342
19344 const RISCVSubtarget &Subtarget,
19345 const RISCVTargetLowering &TLI) {
19346 if (!Subtarget.hasStdExtZvqdotq())
19347 return SDValue();
19348
19349 SDLoc DL(N);
19350 EVT VT = N->getValueType(0);
19351 SDValue InVec = N->getOperand(0);
19352 if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
19353 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
19354 return SDValue();
19355}
19356
19358 const RISCVSubtarget &Subtarget,
19359 const RISCVTargetLowering &TLI) {
19360 SDValue InVec = N->getOperand(0);
19361 SDValue InVal = N->getOperand(1);
19362 SDValue EltNo = N->getOperand(2);
19363 SDLoc DL(N);
19364
19365 EVT VT = InVec.getValueType();
19366 if (VT.isScalableVector())
19367 return SDValue();
19368
19369 if (!InVec.hasOneUse())
19370 return SDValue();
19371
19372 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
19373 // move the insert_vector_elts into the arms of the binop. Note that
19374 // the new RHS must be a constant.
19375 const unsigned InVecOpcode = InVec->getOpcode();
19376 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
19377 InVal.hasOneUse()) {
19378 SDValue InVecLHS = InVec->getOperand(0);
19379 SDValue InVecRHS = InVec->getOperand(1);
19380 SDValue InValLHS = InVal->getOperand(0);
19381 SDValue InValRHS = InVal->getOperand(1);
19382
19384 return SDValue();
19385 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
19386 return SDValue();
19387 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19388 // have different LHS and RHS types.
19389 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
19390 return SDValue();
19392 InVecLHS, InValLHS, EltNo);
19394 InVecRHS, InValRHS, EltNo);
19395 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
19396 }
19397
19398 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
19399 // move the insert_vector_elt to the source operand of the concat_vector.
19400 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
19401 return SDValue();
19402
19403 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19404 if (!IndexC)
19405 return SDValue();
19406 unsigned Elt = IndexC->getZExtValue();
19407
19408 EVT ConcatVT = InVec.getOperand(0).getValueType();
19409 if (ConcatVT.getVectorElementType() != InVal.getValueType())
19410 return SDValue();
19411 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19412 unsigned NewIdx = Elt % ConcatNumElts;
19413
19414 unsigned ConcatOpIdx = Elt / ConcatNumElts;
19415 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
19416 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
19417
19418 SmallVector<SDValue> ConcatOps(InVec->ops());
19419 ConcatOps[ConcatOpIdx] = ConcatOp;
19420 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19421}
19422
19423// If we're concatenating a series of vector loads like
19424// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
19425// Then we can turn this into a strided load by widening the vector elements
19426// vlse32 p, stride=n
19428 const RISCVSubtarget &Subtarget,
19429 const RISCVTargetLowering &TLI) {
19430 SDLoc DL(N);
19431 EVT VT = N->getValueType(0);
19432
19433 // Only perform this combine on legal MVTs.
19434 if (!TLI.isTypeLegal(VT))
19435 return SDValue();
19436
19437 // TODO: Potentially extend this to scalable vectors
19438 if (VT.isScalableVector())
19439 return SDValue();
19440
19441 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
19442 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
19443 !SDValue(BaseLd, 0).hasOneUse())
19444 return SDValue();
19445
19446 EVT BaseLdVT = BaseLd->getValueType(0);
19447
19448 // Go through the loads and check that they're strided
19450 Lds.push_back(BaseLd);
19451 Align Align = BaseLd->getAlign();
19452 for (SDValue Op : N->ops().drop_front()) {
19453 auto *Ld = dyn_cast<LoadSDNode>(Op);
19454 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
19455 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
19456 Ld->getValueType(0) != BaseLdVT)
19457 return SDValue();
19458
19459 Lds.push_back(Ld);
19460
19461 // The common alignment is the most restrictive (smallest) of all the loads
19462 Align = std::min(Align, Ld->getAlign());
19463 }
19464
19465 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
19466 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
19467 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
19468 // If the load ptrs can be decomposed into a common (Base + Index) with a
19469 // common constant stride, then return the constant stride.
19470 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
19471 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
19472 if (BIO1.equalBaseIndex(BIO2, DAG))
19473 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
19474
19475 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
19476 SDValue P1 = Ld1->getBasePtr();
19477 SDValue P2 = Ld2->getBasePtr();
19478 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
19479 return {{P2.getOperand(1), false}};
19480 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
19481 return {{P1.getOperand(1), true}};
19482
19483 return std::nullopt;
19484 };
19485
19486 // Get the distance between the first and second loads
19487 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
19488 if (!BaseDiff)
19489 return SDValue();
19490
19491 // Check all the loads are the same distance apart
19492 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
19493 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
19494 return SDValue();
19495
19496 // TODO: At this point, we've successfully matched a generalized gather
19497 // load. Maybe we should emit that, and then move the specialized
19498 // matchers above and below into a DAG combine?
19499
19500 // Get the widened scalar type, e.g. v4i8 -> i64
19501 unsigned WideScalarBitWidth =
19502 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
19503 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
19504
19505 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
19506 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
19507 if (!TLI.isTypeLegal(WideVecVT))
19508 return SDValue();
19509
19510 // Check that the operation is legal
19511 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
19512 return SDValue();
19513
19514 auto [StrideVariant, MustNegateStride] = *BaseDiff;
19515 SDValue Stride =
19516 std::holds_alternative<SDValue>(StrideVariant)
19517 ? std::get<SDValue>(StrideVariant)
19518 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
19519 Lds[0]->getOffset().getValueType());
19520 if (MustNegateStride)
19521 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
19522
19523 SDValue AllOneMask =
19524 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
19525 DAG.getConstant(1, DL, MVT::i1));
19526
19527 uint64_t MemSize;
19528 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
19529 ConstStride && ConstStride->getSExtValue() >= 0)
19530 // total size = (elsize * n) + (stride - elsize) * (n-1)
19531 // = elsize + stride * (n-1)
19532 MemSize = WideScalarVT.getSizeInBits() +
19533 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
19534 else
19535 // If Stride isn't constant, then we can't know how much it will load
19537
19539 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
19540 Align);
19541
19542 SDValue StridedLoad = DAG.getStridedLoadVP(
19543 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
19544 AllOneMask,
19545 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
19546
19547 for (SDValue Ld : N->ops())
19548 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
19549
19550 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
19551}
19552
19554 const RISCVSubtarget &Subtarget,
19555 const RISCVTargetLowering &TLI) {
19556 SDLoc DL(N);
19557 EVT VT = N->getValueType(0);
19558 const unsigned ElementSize = VT.getScalarSizeInBits();
19559 const unsigned NumElts = VT.getVectorNumElements();
19560 SDValue V1 = N->getOperand(0);
19561 SDValue V2 = N->getOperand(1);
19562 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
19563 MVT XLenVT = Subtarget.getXLenVT();
19564
19565 // Recognized a disguised select of add/sub.
19566 bool SwapCC;
19567 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
19568 matchSelectAddSub(V1, V2, SwapCC)) {
19569 SDValue Sub = SwapCC ? V1 : V2;
19570 SDValue A = Sub.getOperand(0);
19571 SDValue B = Sub.getOperand(1);
19572
19573 SmallVector<SDValue> MaskVals;
19574 for (int MaskIndex : Mask) {
19575 bool SelectMaskVal = (MaskIndex < (int)NumElts);
19576 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
19577 }
19578 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
19579 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
19580 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
19581
19582 // Arrange the select such that we can match a masked
19583 // vrsub.vi to perform the conditional negate
19584 SDValue NegB = DAG.getNegative(B, DL, VT);
19585 if (!SwapCC)
19586 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19587 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19588 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19589 }
19590
19591 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
19592 // during the combine phase before type legalization, and relies on
19593 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
19594 // for the source mask.
19595 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
19596 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
19597 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
19598 return SDValue();
19599
19600 SmallVector<int, 8> NewMask;
19601 narrowShuffleMaskElts(2, Mask, NewMask);
19602
19603 LLVMContext &C = *DAG.getContext();
19604 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
19605 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
19606 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
19607 DAG.getBitcast(NewVT, V2), NewMask);
19608 return DAG.getBitcast(VT, Res);
19609}
19610
19612 const RISCVSubtarget &Subtarget) {
19613 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19614
19615 if (N->getValueType(0).isFixedLengthVector())
19616 return SDValue();
19617
19618 SDValue Addend = N->getOperand(0);
19619 SDValue MulOp = N->getOperand(1);
19620
19621 if (N->getOpcode() == RISCVISD::ADD_VL) {
19622 SDValue AddPassthruOp = N->getOperand(2);
19623 if (!AddPassthruOp.isUndef())
19624 return SDValue();
19625 }
19626
19627 auto IsVWMulOpc = [](unsigned Opc) {
19628 switch (Opc) {
19629 case RISCVISD::VWMUL_VL:
19630 case RISCVISD::VWMULU_VL:
19631 case RISCVISD::VWMULSU_VL:
19632 return true;
19633 default:
19634 return false;
19635 }
19636 };
19637
19638 if (!IsVWMulOpc(MulOp.getOpcode()))
19639 std::swap(Addend, MulOp);
19640
19641 if (!IsVWMulOpc(MulOp.getOpcode()))
19642 return SDValue();
19643
19644 SDValue MulPassthruOp = MulOp.getOperand(2);
19645
19646 if (!MulPassthruOp.isUndef())
19647 return SDValue();
19648
19649 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19650 const RISCVSubtarget &Subtarget) {
19651 if (N->getOpcode() == ISD::ADD) {
19652 SDLoc DL(N);
19653 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19654 Subtarget);
19655 }
19656 return std::make_pair(N->getOperand(3), N->getOperand(4));
19657 }(N, DAG, Subtarget);
19658
19659 SDValue MulMask = MulOp.getOperand(3);
19660 SDValue MulVL = MulOp.getOperand(4);
19661
19662 if (AddMask != MulMask || AddVL != MulVL)
19663 return SDValue();
19664
19665 const auto &TSInfo =
19666 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
19667 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
19668
19669 SDLoc DL(N);
19670 EVT VT = N->getValueType(0);
19671 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
19672 AddVL};
19673 return DAG.getNode(Opc, DL, VT, Ops);
19674}
19675
19677 const RISCVSubtarget &Subtarget) {
19678
19679 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19680
19681 if (!N->getValueType(0).isVector())
19682 return SDValue();
19683
19684 SDValue Addend = N->getOperand(0);
19685 SDValue DotOp = N->getOperand(1);
19686
19687 if (N->getOpcode() == RISCVISD::ADD_VL) {
19688 SDValue AddPassthruOp = N->getOperand(2);
19689 if (!AddPassthruOp.isUndef())
19690 return SDValue();
19691 }
19692
19693 auto IsVqdotqOpc = [](unsigned Opc) {
19694 switch (Opc) {
19695 case RISCVISD::VQDOT_VL:
19696 case RISCVISD::VQDOTU_VL:
19697 case RISCVISD::VQDOTSU_VL:
19698 return true;
19699 default:
19700 return false;
19701 }
19702 };
19703
19704 if (!IsVqdotqOpc(DotOp.getOpcode()))
19705 std::swap(Addend, DotOp);
19706
19707 if (!IsVqdotqOpc(DotOp.getOpcode()))
19708 return SDValue();
19709
19710 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19711 const RISCVSubtarget &Subtarget) {
19712 if (N->getOpcode() == ISD::ADD) {
19713 SDLoc DL(N);
19714 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19715 Subtarget);
19716 }
19717 return std::make_pair(N->getOperand(3), N->getOperand(4));
19718 }(N, DAG, Subtarget);
19719
19720 SDValue MulVL = DotOp.getOperand(4);
19721 if (AddVL != MulVL)
19722 return SDValue();
19723
19724 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
19725 AddMask.getOperand(0) != MulVL)
19726 return SDValue();
19727
19728 SDValue AccumOp = DotOp.getOperand(2);
19729 SDLoc DL(N);
19730 EVT VT = N->getValueType(0);
19731 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
19732 DAG.getUNDEF(VT), AddMask, AddVL);
19733
19734 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
19735 DotOp.getOperand(3), DotOp->getOperand(4)};
19736 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
19737}
19738
19739static bool
19741 ISD::MemIndexType &IndexType,
19743 if (!DCI.isBeforeLegalize())
19744 return false;
19745
19746 SelectionDAG &DAG = DCI.DAG;
19747 const MVT XLenVT =
19748 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
19749
19750 const EVT IndexVT = Index.getValueType();
19751
19752 // RISC-V indexed loads only support the "unsigned unscaled" addressing
19753 // mode, so anything else must be manually legalized.
19754 if (!isIndexTypeSigned(IndexType))
19755 return false;
19756
19757 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
19758 // Any index legalization should first promote to XLenVT, so we don't lose
19759 // bits when scaling. This may create an illegal index type so we let
19760 // LLVM's legalization take care of the splitting.
19761 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
19762 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
19763 IndexVT.changeVectorElementType(XLenVT), Index);
19764 }
19765 IndexType = ISD::UNSIGNED_SCALED;
19766 return true;
19767}
19768
19769/// Match the index vector of a scatter or gather node as the shuffle mask
19770/// which performs the rearrangement if possible. Will only match if
19771/// all lanes are touched, and thus replacing the scatter or gather with
19772/// a unit strided access and shuffle is legal.
19773static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
19774 SmallVector<int> &ShuffleMask) {
19775 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19776 return false;
19777 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19778 return false;
19779
19780 const unsigned ElementSize = VT.getScalarStoreSize();
19781 const unsigned NumElems = VT.getVectorNumElements();
19782
19783 // Create the shuffle mask and check all bits active
19784 assert(ShuffleMask.empty());
19785 BitVector ActiveLanes(NumElems);
19786 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19787 // TODO: We've found an active bit of UB, and could be
19788 // more aggressive here if desired.
19789 if (Index->getOperand(i)->isUndef())
19790 return false;
19791 uint64_t C = Index->getConstantOperandVal(i);
19792 if (C % ElementSize != 0)
19793 return false;
19794 C = C / ElementSize;
19795 if (C >= NumElems)
19796 return false;
19797 ShuffleMask.push_back(C);
19798 ActiveLanes.set(C);
19799 }
19800 return ActiveLanes.all();
19801}
19802
19803/// Match the index of a gather or scatter operation as an operation
19804/// with twice the element width and half the number of elements. This is
19805/// generally profitable (if legal) because these operations are linear
19806/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
19807/// come out ahead.
19808static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
19809 Align BaseAlign, const RISCVSubtarget &ST) {
19810 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19811 return false;
19812 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19813 return false;
19814
19815 // Attempt a doubling. If we can use a element type 4x or 8x in
19816 // size, this will happen via multiply iterations of the transform.
19817 const unsigned NumElems = VT.getVectorNumElements();
19818 if (NumElems % 2 != 0)
19819 return false;
19820
19821 const unsigned ElementSize = VT.getScalarStoreSize();
19822 const unsigned WiderElementSize = ElementSize * 2;
19823 if (WiderElementSize > ST.getELen()/8)
19824 return false;
19825
19826 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
19827 return false;
19828
19829 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19830 // TODO: We've found an active bit of UB, and could be
19831 // more aggressive here if desired.
19832 if (Index->getOperand(i)->isUndef())
19833 return false;
19834 // TODO: This offset check is too strict if we support fully
19835 // misaligned memory operations.
19836 uint64_t C = Index->getConstantOperandVal(i);
19837 if (i % 2 == 0) {
19838 if (C % WiderElementSize != 0)
19839 return false;
19840 continue;
19841 }
19842 uint64_t Last = Index->getConstantOperandVal(i-1);
19843 if (C != Last + ElementSize)
19844 return false;
19845 }
19846 return true;
19847}
19848
19849// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
19850// This would be benefit for the cases where X and Y are both the same value
19851// type of low precision vectors. Since the truncate would be lowered into
19852// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
19853// restriction, such pattern would be expanded into a series of "vsetvli"
19854// and "vnsrl" instructions later to reach this point.
19856 SDValue Mask = N->getOperand(1);
19857 SDValue VL = N->getOperand(2);
19858
19859 bool IsVLMAX = isAllOnesConstant(VL) ||
19860 (isa<RegisterSDNode>(VL) &&
19861 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
19862 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
19863 Mask.getOperand(0) != VL)
19864 return SDValue();
19865
19866 auto IsTruncNode = [&](SDValue V) {
19867 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19868 V.getOperand(1) == Mask && V.getOperand(2) == VL;
19869 };
19870
19871 SDValue Op = N->getOperand(0);
19872
19873 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
19874 // to distinguish such pattern.
19875 while (IsTruncNode(Op)) {
19876 if (!Op.hasOneUse())
19877 return SDValue();
19878 Op = Op.getOperand(0);
19879 }
19880
19881 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
19882 return SDValue();
19883
19884 SDValue N0 = Op.getOperand(0);
19885 SDValue N1 = Op.getOperand(1);
19886 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
19887 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
19888 return SDValue();
19889
19890 SDValue N00 = N0.getOperand(0);
19891 SDValue N10 = N1.getOperand(0);
19892 if (!N00.getValueType().isVector() ||
19893 N00.getValueType() != N10.getValueType() ||
19894 N->getValueType(0) != N10.getValueType())
19895 return SDValue();
19896
19897 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
19898 SDValue SMin =
19899 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
19900 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
19901 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
19902}
19903
19904// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
19905// maximum value for the truncated type.
19906// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
19907// is the signed maximum value for the truncated type and C2 is the signed
19908// minimum value.
19910 const RISCVSubtarget &Subtarget) {
19911 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
19912
19913 MVT VT = N->getSimpleValueType(0);
19914
19915 SDValue Mask = N->getOperand(1);
19916 SDValue VL = N->getOperand(2);
19917
19918 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
19919 APInt &SplatVal) {
19920 if (V.getOpcode() != Opc &&
19921 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
19922 V.getOperand(3) == Mask && V.getOperand(4) == VL))
19923 return SDValue();
19924
19925 SDValue Op = V.getOperand(1);
19926
19927 // Peek through conversion between fixed and scalable vectors.
19928 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
19929 isNullConstant(Op.getOperand(2)) &&
19930 Op.getOperand(1).getValueType().isFixedLengthVector() &&
19931 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19932 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
19933 isNullConstant(Op.getOperand(1).getOperand(1)))
19934 Op = Op.getOperand(1).getOperand(0);
19935
19936 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
19937 return V.getOperand(0);
19938
19939 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
19940 Op.getOperand(2) == VL) {
19941 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
19942 SplatVal =
19943 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
19944 return V.getOperand(0);
19945 }
19946 }
19947
19948 return SDValue();
19949 };
19950
19951 SDLoc DL(N);
19952
19953 auto DetectUSatPattern = [&](SDValue V) {
19954 APInt LoC, HiC;
19955
19956 // Simple case, V is a UMIN.
19957 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
19958 if (HiC.isMask(VT.getScalarSizeInBits()))
19959 return UMinOp;
19960
19961 // If we have an SMAX that removes negative numbers first, then we can match
19962 // SMIN instead of UMIN.
19963 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19964 if (SDValue SMaxOp =
19965 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19966 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
19967 return SMinOp;
19968
19969 // If we have an SMIN before an SMAX and the SMAX constant is less than or
19970 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
19971 // first.
19972 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19973 if (SDValue SMinOp =
19974 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19975 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
19976 HiC.uge(LoC))
19977 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
19978 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
19979 Mask, VL);
19980
19981 return SDValue();
19982 };
19983
19984 auto DetectSSatPattern = [&](SDValue V) {
19985 unsigned NumDstBits = VT.getScalarSizeInBits();
19986 unsigned NumSrcBits = V.getScalarValueSizeInBits();
19987 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
19988 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
19989
19990 APInt HiC, LoC;
19991 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
19992 if (SDValue SMaxOp =
19993 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19994 if (HiC == SignedMax && LoC == SignedMin)
19995 return SMaxOp;
19996
19997 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
19998 if (SDValue SMinOp =
19999 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20000 if (HiC == SignedMax && LoC == SignedMin)
20001 return SMinOp;
20002
20003 return SDValue();
20004 };
20005
20006 SDValue Src = N->getOperand(0);
20007
20008 // Look through multiple layers of truncates.
20009 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
20010 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
20011 Src.hasOneUse())
20012 Src = Src.getOperand(0);
20013
20014 SDValue Val;
20015 unsigned ClipOpc;
20016 if ((Val = DetectUSatPattern(Src)))
20017 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
20018 else if ((Val = DetectSSatPattern(Src)))
20019 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
20020 else
20021 return SDValue();
20022
20023 MVT ValVT = Val.getSimpleValueType();
20024
20025 do {
20026 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
20027 ValVT = ValVT.changeVectorElementType(ValEltVT);
20028 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
20029 } while (ValVT != VT);
20030
20031 return Val;
20032}
20033
20034// Convert
20035// (iX ctpop (bitcast (vXi1 A)))
20036// ->
20037// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
20038// and
20039// (iN reduce.add (zext (vXi1 A to vXiN))
20040// ->
20041// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
20042// FIXME: It's complicated to match all the variations of this after type
20043// legalization so we only handle the pre-type legalization pattern, but that
20044// requires the fixed vector type to be legal.
20046 const RISCVSubtarget &Subtarget) {
20047 unsigned Opc = N->getOpcode();
20048 assert((Opc == ISD::CTPOP || Opc == ISD::VECREDUCE_ADD) &&
20049 "Unexpected opcode");
20050 EVT VT = N->getValueType(0);
20051 if (!VT.isScalarInteger())
20052 return SDValue();
20053
20054 SDValue Src = N->getOperand(0);
20055
20056 if (Opc == ISD::CTPOP) {
20057 // Peek through zero_extend. It doesn't change the count.
20058 if (Src.getOpcode() == ISD::ZERO_EXTEND)
20059 Src = Src.getOperand(0);
20060
20061 if (Src.getOpcode() != ISD::BITCAST)
20062 return SDValue();
20063 Src = Src.getOperand(0);
20064 } else if (Opc == ISD::VECREDUCE_ADD) {
20065 if (Src.getOpcode() != ISD::ZERO_EXTEND)
20066 return SDValue();
20067 Src = Src.getOperand(0);
20068 }
20069
20070 EVT SrcEVT = Src.getValueType();
20071 if (!SrcEVT.isSimple())
20072 return SDValue();
20073
20074 MVT SrcMVT = SrcEVT.getSimpleVT();
20075 // Make sure the input is an i1 vector.
20076 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
20077 return SDValue();
20078
20079 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20080 if (!TLI.isTypeLegal(SrcMVT))
20081 return SDValue();
20082
20083 // Check that destination type is large enough to hold result without
20084 // overflow.
20085 if (Opc == ISD::VECREDUCE_ADD) {
20086 unsigned EltSize = SrcMVT.getScalarSizeInBits();
20087 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
20088 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
20089 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
20090 ? SrcMVT.getVectorNumElements()
20092 VectorBitsMax, EltSize, MinSize);
20093 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
20094 return SDValue();
20095 }
20096
20097 MVT ContainerVT = SrcMVT;
20098 if (SrcMVT.isFixedLengthVector()) {
20099 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
20100 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
20101 }
20102
20103 SDLoc DL(N);
20104 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
20105
20106 MVT XLenVT = Subtarget.getXLenVT();
20107 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
20108 return DAG.getZExtOrTrunc(Pop, DL, VT);
20109}
20110
20113 const RISCVSubtarget &Subtarget) {
20114 // (shl (zext x), y) -> (vwsll x, y)
20115 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20116 return V;
20117
20118 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
20119 // (shl (zext x), C) -> (vwmulu x, 1u << C)
20120
20121 if (!DCI.isAfterLegalizeDAG())
20122 return SDValue();
20123
20124 SDValue LHS = N->getOperand(0);
20125 if (!LHS.hasOneUse())
20126 return SDValue();
20127 unsigned Opcode;
20128 switch (LHS.getOpcode()) {
20129 case ISD::SIGN_EXTEND:
20130 case RISCVISD::VSEXT_VL:
20131 Opcode = RISCVISD::VWMULSU_VL;
20132 break;
20133 case ISD::ZERO_EXTEND:
20134 case RISCVISD::VZEXT_VL:
20135 Opcode = RISCVISD::VWMULU_VL;
20136 break;
20137 default:
20138 return SDValue();
20139 }
20140
20141 SDValue RHS = N->getOperand(1);
20142 APInt ShAmt;
20143 uint64_t ShAmtInt;
20144 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
20145 ShAmtInt = ShAmt.getZExtValue();
20146 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
20147 RHS.getOperand(1).getOpcode() == ISD::Constant)
20148 ShAmtInt = RHS.getConstantOperandVal(1);
20149 else
20150 return SDValue();
20151
20152 // Better foldings:
20153 // (shl (sext x), 1) -> (vwadd x, x)
20154 // (shl (zext x), 1) -> (vwaddu x, x)
20155 if (ShAmtInt <= 1)
20156 return SDValue();
20157
20158 SDValue NarrowOp = LHS.getOperand(0);
20159 MVT NarrowVT = NarrowOp.getSimpleValueType();
20160 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
20161 if (ShAmtInt >= NarrowBits)
20162 return SDValue();
20163 MVT VT = N->getSimpleValueType(0);
20164 if (NarrowBits * 2 != VT.getScalarSizeInBits())
20165 return SDValue();
20166
20167 SelectionDAG &DAG = DCI.DAG;
20168 SDLoc DL(N);
20169 SDValue Passthru, Mask, VL;
20170 switch (N->getOpcode()) {
20171 case ISD::SHL:
20172 Passthru = DAG.getUNDEF(VT);
20173 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
20174 break;
20175 case RISCVISD::SHL_VL:
20176 Passthru = N->getOperand(2);
20177 Mask = N->getOperand(3);
20178 VL = N->getOperand(4);
20179 break;
20180 default:
20181 llvm_unreachable("Expected SHL");
20182 }
20183 return DAG.getNode(Opcode, DL, VT, NarrowOp,
20184 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
20185 Passthru, Mask, VL);
20186}
20187
20189 DAGCombinerInfo &DCI) const {
20190 SelectionDAG &DAG = DCI.DAG;
20191 const MVT XLenVT = Subtarget.getXLenVT();
20192 SDLoc DL(N);
20193
20194 // Helper to call SimplifyDemandedBits on an operand of N where only some low
20195 // bits are demanded. N will be added to the Worklist if it was not deleted.
20196 // Caller should return SDValue(N, 0) if this returns true.
20197 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
20198 SDValue Op = N->getOperand(OpNo);
20199 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
20200 if (!SimplifyDemandedBits(Op, Mask, DCI))
20201 return false;
20202
20203 if (N->getOpcode() != ISD::DELETED_NODE)
20204 DCI.AddToWorklist(N);
20205 return true;
20206 };
20207
20208 switch (N->getOpcode()) {
20209 default:
20210 break;
20211 case RISCVISD::SplitF64: {
20212 SDValue Op0 = N->getOperand(0);
20213 // If the input to SplitF64 is just BuildPairF64 then the operation is
20214 // redundant. Instead, use BuildPairF64's operands directly.
20215 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
20216 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
20217
20218 if (Op0->isUndef()) {
20219 SDValue Lo = DAG.getUNDEF(MVT::i32);
20220 SDValue Hi = DAG.getUNDEF(MVT::i32);
20221 return DCI.CombineTo(N, Lo, Hi);
20222 }
20223
20224 // It's cheaper to materialise two 32-bit integers than to load a double
20225 // from the constant pool and transfer it to integer registers through the
20226 // stack.
20228 APInt V = C->getValueAPF().bitcastToAPInt();
20229 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
20230 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
20231 return DCI.CombineTo(N, Lo, Hi);
20232 }
20233
20234 // This is a target-specific version of a DAGCombine performed in
20235 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20236 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20237 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20238 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20239 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
20240 break;
20241 SDValue NewSplitF64 =
20242 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
20243 Op0.getOperand(0));
20244 SDValue Lo = NewSplitF64.getValue(0);
20245 SDValue Hi = NewSplitF64.getValue(1);
20246 APInt SignBit = APInt::getSignMask(32);
20247 if (Op0.getOpcode() == ISD::FNEG) {
20248 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
20249 DAG.getConstant(SignBit, DL, MVT::i32));
20250 return DCI.CombineTo(N, Lo, NewHi);
20251 }
20252 assert(Op0.getOpcode() == ISD::FABS);
20253 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
20254 DAG.getConstant(~SignBit, DL, MVT::i32));
20255 return DCI.CombineTo(N, Lo, NewHi);
20256 }
20257 case RISCVISD::SLLW:
20258 case RISCVISD::SRAW:
20259 case RISCVISD::SRLW:
20260 case RISCVISD::RORW:
20261 case RISCVISD::ROLW: {
20262 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
20263 if (SimplifyDemandedLowBitsHelper(0, 32) ||
20264 SimplifyDemandedLowBitsHelper(1, 5))
20265 return SDValue(N, 0);
20266
20267 break;
20268 }
20269 case RISCVISD::CLZW:
20270 case RISCVISD::CTZW: {
20271 // Only the lower 32 bits of the first operand are read
20272 if (SimplifyDemandedLowBitsHelper(0, 32))
20273 return SDValue(N, 0);
20274 break;
20275 }
20276 case RISCVISD::FMV_W_X_RV64: {
20277 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
20278 // conversion is unnecessary and can be replaced with the
20279 // FMV_X_ANYEXTW_RV64 operand.
20280 SDValue Op0 = N->getOperand(0);
20281 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
20282 return Op0.getOperand(0);
20283 break;
20284 }
20285 case RISCVISD::FMV_X_ANYEXTH:
20286 case RISCVISD::FMV_X_ANYEXTW_RV64: {
20287 SDLoc DL(N);
20288 SDValue Op0 = N->getOperand(0);
20289 MVT VT = N->getSimpleValueType(0);
20290
20291 // Constant fold.
20292 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
20293 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
20294 return DAG.getConstant(Val, DL, VT);
20295 }
20296
20297 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
20298 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
20299 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
20300 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
20301 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
20302 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
20303 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
20304 assert(Op0.getOperand(0).getValueType() == VT &&
20305 "Unexpected value type!");
20306 return Op0.getOperand(0);
20307 }
20308
20309 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
20310 cast<LoadSDNode>(Op0)->isSimple()) {
20312 auto *LN0 = cast<LoadSDNode>(Op0);
20313 SDValue Load =
20314 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
20315 LN0->getBasePtr(), IVT, LN0->getMemOperand());
20316 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
20317 return Load;
20318 }
20319
20320 // This is a target-specific version of a DAGCombine performed in
20321 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20322 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20323 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20324 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20325 !Op0.getNode()->hasOneUse())
20326 break;
20327 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
20328 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
20329 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
20330 if (Op0.getOpcode() == ISD::FNEG)
20331 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
20332 DAG.getConstant(SignBit, DL, VT));
20333
20334 assert(Op0.getOpcode() == ISD::FABS);
20335 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
20336 DAG.getConstant(~SignBit, DL, VT));
20337 }
20338 case ISD::ABS: {
20339 EVT VT = N->getValueType(0);
20340 SDValue N0 = N->getOperand(0);
20341 // abs (sext) -> zext (abs)
20342 // abs (zext) -> zext (handled elsewhere)
20343 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
20344 SDValue Src = N0.getOperand(0);
20345 SDLoc DL(N);
20346 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
20347 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
20348 }
20349 break;
20350 }
20351 case ISD::ADD: {
20352 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20353 return V;
20354 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
20355 return V;
20356 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20357 return V;
20358 return performADDCombine(N, DCI, Subtarget);
20359 }
20360 case ISD::SUB: {
20361 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20362 return V;
20363 return performSUBCombine(N, DAG, Subtarget);
20364 }
20365 case ISD::AND:
20366 return performANDCombine(N, DCI, Subtarget);
20367 case ISD::OR: {
20368 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20369 return V;
20370 return performORCombine(N, DCI, Subtarget);
20371 }
20372 case ISD::XOR:
20373 return performXORCombine(N, DAG, Subtarget);
20374 case ISD::MUL:
20375 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20376 return V;
20377 return performMULCombine(N, DAG, DCI, Subtarget);
20378 case ISD::SDIV:
20379 case ISD::UDIV:
20380 case ISD::SREM:
20381 case ISD::UREM:
20382 if (SDValue V = combineBinOpOfZExt(N, DAG))
20383 return V;
20384 break;
20385 case ISD::FMUL: {
20386 using namespace SDPatternMatch;
20387 SDLoc DL(N);
20388 EVT VT = N->getValueType(0);
20389 SDValue X, Y;
20390 // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
20391 // hoistFNegAboveFMulFDiv.
20392 // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
20394 return DAG.getNode(ISD::FNEG, DL, VT,
20395 DAG.getNode(ISD::FMUL, DL, VT, X, Y));
20396
20397 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
20398 SDValue N0 = N->getOperand(0);
20399 SDValue N1 = N->getOperand(1);
20400 if (N0->getOpcode() != ISD::FCOPYSIGN)
20401 std::swap(N0, N1);
20402 if (N0->getOpcode() != ISD::FCOPYSIGN)
20403 return SDValue();
20405 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
20406 return SDValue();
20407 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
20408 return SDValue();
20409 SDValue Sign = N0->getOperand(1);
20410 if (Sign.getValueType() != VT)
20411 return SDValue();
20412 return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
20413 }
20414 case ISD::FADD:
20415 case ISD::UMAX:
20416 case ISD::UMIN:
20417 case ISD::SMAX:
20418 case ISD::SMIN:
20419 case ISD::FMAXNUM:
20420 case ISD::FMINNUM: {
20421 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
20422 return V;
20423 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
20424 return V;
20425 return SDValue();
20426 }
20427 case ISD::SETCC:
20428 return performSETCCCombine(N, DCI, Subtarget);
20430 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
20431 case ISD::ZERO_EXTEND:
20432 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
20433 // type legalization. This is safe because fp_to_uint produces poison if
20434 // it overflows.
20435 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
20436 SDValue Src = N->getOperand(0);
20437 if (Src.getOpcode() == ISD::FP_TO_UINT &&
20438 isTypeLegal(Src.getOperand(0).getValueType()))
20439 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
20440 Src.getOperand(0));
20441 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
20442 isTypeLegal(Src.getOperand(1).getValueType())) {
20443 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
20444 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
20445 Src.getOperand(0), Src.getOperand(1));
20446 DCI.CombineTo(N, Res);
20447 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
20448 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
20449 return SDValue(N, 0); // Return N so it doesn't get rechecked.
20450 }
20451 }
20452 return SDValue();
20453 case RISCVISD::TRUNCATE_VECTOR_VL:
20454 if (SDValue V = combineTruncOfSraSext(N, DAG))
20455 return V;
20456 return combineTruncToVnclip(N, DAG, Subtarget);
20457 case ISD::VP_TRUNCATE:
20458 return performVP_TRUNCATECombine(N, DAG, Subtarget);
20459 case ISD::TRUNCATE:
20460 return performTRUNCATECombine(N, DAG, Subtarget);
20461 case ISD::SELECT:
20462 return performSELECTCombine(N, DAG, Subtarget);
20463 case ISD::VSELECT:
20464 return performVSELECTCombine(N, DAG);
20465 case RISCVISD::CZERO_EQZ:
20466 case RISCVISD::CZERO_NEZ: {
20467 SDValue Val = N->getOperand(0);
20468 SDValue Cond = N->getOperand(1);
20469
20470 unsigned Opc = N->getOpcode();
20471
20472 // czero_eqz x, x -> x
20473 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
20474 return Val;
20475
20476 unsigned InvOpc =
20477 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
20478
20479 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
20480 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
20481 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
20482 SDValue NewCond = Cond.getOperand(0);
20483 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
20484 if (DAG.MaskedValueIsZero(NewCond, Mask))
20485 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
20486 }
20487 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
20488 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
20489 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
20490 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
20491 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
20492 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
20493 if (ISD::isIntEqualitySetCC(CCVal))
20494 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
20495 N->getValueType(0), Val, Cond.getOperand(0));
20496 }
20497 return SDValue();
20498 }
20499 case RISCVISD::SELECT_CC: {
20500 // Transform
20501 SDValue LHS = N->getOperand(0);
20502 SDValue RHS = N->getOperand(1);
20503 SDValue CC = N->getOperand(2);
20504 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
20505 SDValue TrueV = N->getOperand(3);
20506 SDValue FalseV = N->getOperand(4);
20507 SDLoc DL(N);
20508 EVT VT = N->getValueType(0);
20509
20510 // If the True and False values are the same, we don't need a select_cc.
20511 if (TrueV == FalseV)
20512 return TrueV;
20513
20514 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
20515 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
20516 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
20517 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
20518 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
20519 if (CCVal == ISD::CondCode::SETGE)
20520 std::swap(TrueV, FalseV);
20521
20522 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
20523 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
20524 // Only handle simm12, if it is not in this range, it can be considered as
20525 // register.
20526 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
20527 isInt<12>(TrueSImm - FalseSImm)) {
20528 SDValue SRA =
20529 DAG.getNode(ISD::SRA, DL, VT, LHS,
20530 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
20531 SDValue AND =
20532 DAG.getNode(ISD::AND, DL, VT, SRA,
20533 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
20534 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
20535 }
20536
20537 if (CCVal == ISD::CondCode::SETGE)
20538 std::swap(TrueV, FalseV);
20539 }
20540
20541 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20542 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
20543 {LHS, RHS, CC, TrueV, FalseV});
20544
20545 if (!Subtarget.hasConditionalMoveFusion()) {
20546 // (select c, -1, y) -> -c | y
20547 if (isAllOnesConstant(TrueV)) {
20548 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20549 SDValue Neg = DAG.getNegative(C, DL, VT);
20550 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
20551 }
20552 // (select c, y, -1) -> -!c | y
20553 if (isAllOnesConstant(FalseV)) {
20554 SDValue C =
20555 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20556 SDValue Neg = DAG.getNegative(C, DL, VT);
20557 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
20558 }
20559
20560 // (select c, 0, y) -> -!c & y
20561 if (isNullConstant(TrueV)) {
20562 SDValue C =
20563 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20564 SDValue Neg = DAG.getNegative(C, DL, VT);
20565 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
20566 }
20567 // (select c, y, 0) -> -c & y
20568 if (isNullConstant(FalseV)) {
20569 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20570 SDValue Neg = DAG.getNegative(C, DL, VT);
20571 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
20572 }
20573 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
20574 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
20575 if (((isOneConstant(FalseV) && LHS == TrueV &&
20576 CCVal == ISD::CondCode::SETNE) ||
20577 (isOneConstant(TrueV) && LHS == FalseV &&
20578 CCVal == ISD::CondCode::SETEQ)) &&
20579 isNullConstant(RHS)) {
20580 // freeze it to be safe.
20581 LHS = DAG.getFreeze(LHS);
20582 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
20583 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
20584 }
20585 }
20586
20587 // If both true/false are an xor with 1, pull through the select.
20588 // This can occur after op legalization if both operands are setccs that
20589 // require an xor to invert.
20590 // FIXME: Generalize to other binary ops with identical operand?
20591 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
20592 TrueV.getOperand(1) == FalseV.getOperand(1) &&
20593 isOneConstant(TrueV.getOperand(1)) &&
20594 TrueV.hasOneUse() && FalseV.hasOneUse()) {
20595 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
20596 TrueV.getOperand(0), FalseV.getOperand(0));
20597 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
20598 }
20599
20600 return SDValue();
20601 }
20602 case RISCVISD::BR_CC: {
20603 SDValue LHS = N->getOperand(1);
20604 SDValue RHS = N->getOperand(2);
20605 SDValue CC = N->getOperand(3);
20606 SDLoc DL(N);
20607
20608 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20609 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
20610 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
20611
20612 return SDValue();
20613 }
20614 case ISD::BITREVERSE:
20615 return performBITREVERSECombine(N, DAG, Subtarget);
20616 case ISD::FP_TO_SINT:
20617 case ISD::FP_TO_UINT:
20618 return performFP_TO_INTCombine(N, DCI, Subtarget);
20621 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
20622 case ISD::FCOPYSIGN: {
20623 EVT VT = N->getValueType(0);
20624 if (!VT.isVector())
20625 break;
20626 // There is a form of VFSGNJ which injects the negated sign of its second
20627 // operand. Try and bubble any FNEG up after the extend/round to produce
20628 // this optimized pattern. Avoid modifying cases where FP_ROUND and
20629 // TRUNC=1.
20630 SDValue In2 = N->getOperand(1);
20631 // Avoid cases where the extend/round has multiple uses, as duplicating
20632 // those is typically more expensive than removing a fneg.
20633 if (!In2.hasOneUse())
20634 break;
20635 if (In2.getOpcode() != ISD::FP_EXTEND &&
20636 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
20637 break;
20638 In2 = In2.getOperand(0);
20639 if (In2.getOpcode() != ISD::FNEG)
20640 break;
20641 SDLoc DL(N);
20642 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
20643 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
20644 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
20645 }
20646 case ISD::MGATHER: {
20647 const auto *MGN = cast<MaskedGatherSDNode>(N);
20648 const EVT VT = N->getValueType(0);
20649 SDValue Index = MGN->getIndex();
20650 SDValue ScaleOp = MGN->getScale();
20651 ISD::MemIndexType IndexType = MGN->getIndexType();
20652 assert(!MGN->isIndexScaled() &&
20653 "Scaled gather/scatter should not be formed");
20654
20655 SDLoc DL(N);
20656 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20657 return DAG.getMaskedGather(
20658 N->getVTList(), MGN->getMemoryVT(), DL,
20659 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20660 MGN->getBasePtr(), Index, ScaleOp},
20661 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20662
20663 if (narrowIndex(Index, IndexType, DAG))
20664 return DAG.getMaskedGather(
20665 N->getVTList(), MGN->getMemoryVT(), DL,
20666 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20667 MGN->getBasePtr(), Index, ScaleOp},
20668 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20669
20670 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
20671 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
20672 // The sequence will be XLenVT, not the type of Index. Tell
20673 // isSimpleVIDSequence this so we avoid overflow.
20674 if (std::optional<VIDSequence> SimpleVID =
20675 isSimpleVIDSequence(Index, Subtarget.getXLen());
20676 SimpleVID && SimpleVID->StepDenominator == 1) {
20677 const int64_t StepNumerator = SimpleVID->StepNumerator;
20678 const int64_t Addend = SimpleVID->Addend;
20679
20680 // Note: We don't need to check alignment here since (by assumption
20681 // from the existence of the gather), our offsets must be sufficiently
20682 // aligned.
20683
20684 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
20685 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
20686 assert(IndexType == ISD::UNSIGNED_SCALED);
20687 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
20688 DAG.getSignedConstant(Addend, DL, PtrVT));
20689
20690 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
20692 SDValue StridedLoad = DAG.getStridedLoadVP(
20693 VT, DL, MGN->getChain(), BasePtr,
20694 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
20695 EVL, MGN->getMemOperand());
20696 SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,
20697 MGN->getPassThru());
20698 return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},
20699 DL);
20700 }
20701 }
20702
20703 SmallVector<int> ShuffleMask;
20704 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20705 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
20706 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
20707 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
20708 MGN->getMask(), DAG.getUNDEF(VT),
20709 MGN->getMemoryVT(), MGN->getMemOperand(),
20711 SDValue Shuffle =
20712 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
20713 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
20714 }
20715
20716 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20717 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
20718 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
20719 SmallVector<SDValue> NewIndices;
20720 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
20721 NewIndices.push_back(Index.getOperand(i));
20722 EVT IndexVT = Index.getValueType()
20724 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
20725
20726 unsigned ElementSize = VT.getScalarStoreSize();
20727 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
20728 auto EltCnt = VT.getVectorElementCount();
20729 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
20730 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
20731 EltCnt.divideCoefficientBy(2));
20732 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
20733 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
20734 EltCnt.divideCoefficientBy(2));
20735 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
20736
20737 SDValue Gather =
20738 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
20739 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
20740 Index, ScaleOp},
20741 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
20742 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
20743 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
20744 }
20745 break;
20746 }
20747 case ISD::MSCATTER:{
20748 const auto *MSN = cast<MaskedScatterSDNode>(N);
20749 SDValue Index = MSN->getIndex();
20750 SDValue ScaleOp = MSN->getScale();
20751 ISD::MemIndexType IndexType = MSN->getIndexType();
20752 assert(!MSN->isIndexScaled() &&
20753 "Scaled gather/scatter should not be formed");
20754
20755 SDLoc DL(N);
20756 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20757 return DAG.getMaskedScatter(
20758 N->getVTList(), MSN->getMemoryVT(), DL,
20759 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20760 Index, ScaleOp},
20761 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20762
20763 if (narrowIndex(Index, IndexType, DAG))
20764 return DAG.getMaskedScatter(
20765 N->getVTList(), MSN->getMemoryVT(), DL,
20766 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20767 Index, ScaleOp},
20768 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20769
20770 EVT VT = MSN->getValue()->getValueType(0);
20771 SmallVector<int> ShuffleMask;
20772 if (!MSN->isTruncatingStore() &&
20773 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
20774 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
20775 DAG.getUNDEF(VT), ShuffleMask);
20776 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
20777 DAG.getUNDEF(XLenVT), MSN->getMask(),
20778 MSN->getMemoryVT(), MSN->getMemOperand(),
20779 ISD::UNINDEXED, false);
20780 }
20781 break;
20782 }
20783 case ISD::VP_GATHER: {
20784 const auto *VPGN = cast<VPGatherSDNode>(N);
20785 SDValue Index = VPGN->getIndex();
20786 SDValue ScaleOp = VPGN->getScale();
20787 ISD::MemIndexType IndexType = VPGN->getIndexType();
20788 assert(!VPGN->isIndexScaled() &&
20789 "Scaled gather/scatter should not be formed");
20790
20791 SDLoc DL(N);
20792 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20793 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20794 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20795 ScaleOp, VPGN->getMask(),
20796 VPGN->getVectorLength()},
20797 VPGN->getMemOperand(), IndexType);
20798
20799 if (narrowIndex(Index, IndexType, DAG))
20800 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20801 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20802 ScaleOp, VPGN->getMask(),
20803 VPGN->getVectorLength()},
20804 VPGN->getMemOperand(), IndexType);
20805
20806 break;
20807 }
20808 case ISD::VP_SCATTER: {
20809 const auto *VPSN = cast<VPScatterSDNode>(N);
20810 SDValue Index = VPSN->getIndex();
20811 SDValue ScaleOp = VPSN->getScale();
20812 ISD::MemIndexType IndexType = VPSN->getIndexType();
20813 assert(!VPSN->isIndexScaled() &&
20814 "Scaled gather/scatter should not be formed");
20815
20816 SDLoc DL(N);
20817 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20818 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20819 {VPSN->getChain(), VPSN->getValue(),
20820 VPSN->getBasePtr(), Index, ScaleOp,
20821 VPSN->getMask(), VPSN->getVectorLength()},
20822 VPSN->getMemOperand(), IndexType);
20823
20824 if (narrowIndex(Index, IndexType, DAG))
20825 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20826 {VPSN->getChain(), VPSN->getValue(),
20827 VPSN->getBasePtr(), Index, ScaleOp,
20828 VPSN->getMask(), VPSN->getVectorLength()},
20829 VPSN->getMemOperand(), IndexType);
20830 break;
20831 }
20832 case RISCVISD::SHL_VL:
20833 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20834 return V;
20835 [[fallthrough]];
20836 case RISCVISD::SRA_VL:
20837 case RISCVISD::SRL_VL: {
20838 SDValue ShAmt = N->getOperand(1);
20839 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20840 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20841 SDLoc DL(N);
20842 SDValue VL = N->getOperand(4);
20843 EVT VT = N->getValueType(0);
20844 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20845 ShAmt.getOperand(1), VL);
20846 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
20847 N->getOperand(2), N->getOperand(3), N->getOperand(4));
20848 }
20849 break;
20850 }
20851 case ISD::SRA:
20852 if (SDValue V = performSRACombine(N, DAG, Subtarget))
20853 return V;
20854 [[fallthrough]];
20855 case ISD::SRL:
20856 case ISD::SHL: {
20857 if (N->getOpcode() == ISD::SHL) {
20858 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20859 return V;
20860 }
20861 SDValue ShAmt = N->getOperand(1);
20862 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20863 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20864 SDLoc DL(N);
20865 EVT VT = N->getValueType(0);
20866 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20867 ShAmt.getOperand(1),
20868 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
20869 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
20870 }
20871 break;
20872 }
20873 case RISCVISD::ADD_VL:
20874 if (SDValue V = simplifyOp_VL(N))
20875 return V;
20876 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20877 return V;
20878 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20879 return V;
20880 return combineToVWMACC(N, DAG, Subtarget);
20881 case RISCVISD::VWADD_W_VL:
20882 case RISCVISD::VWADDU_W_VL:
20883 case RISCVISD::VWSUB_W_VL:
20884 case RISCVISD::VWSUBU_W_VL:
20885 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
20886 case RISCVISD::OR_VL:
20887 case RISCVISD::SUB_VL:
20888 case RISCVISD::MUL_VL:
20889 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20890 case RISCVISD::VFMADD_VL:
20891 case RISCVISD::VFNMADD_VL:
20892 case RISCVISD::VFMSUB_VL:
20893 case RISCVISD::VFNMSUB_VL:
20894 case RISCVISD::STRICT_VFMADD_VL:
20895 case RISCVISD::STRICT_VFNMADD_VL:
20896 case RISCVISD::STRICT_VFMSUB_VL:
20897 case RISCVISD::STRICT_VFNMSUB_VL:
20898 return performVFMADD_VLCombine(N, DCI, Subtarget);
20899 case RISCVISD::FADD_VL:
20900 case RISCVISD::FSUB_VL:
20901 case RISCVISD::FMUL_VL:
20902 case RISCVISD::VFWADD_W_VL:
20903 case RISCVISD::VFWSUB_W_VL:
20904 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20905 case ISD::LOAD:
20906 case ISD::STORE: {
20907 if (DCI.isAfterLegalizeDAG())
20908 if (SDValue V = performMemPairCombine(N, DCI))
20909 return V;
20910
20911 if (N->getOpcode() != ISD::STORE)
20912 break;
20913
20914 auto *Store = cast<StoreSDNode>(N);
20915 SDValue Chain = Store->getChain();
20916 EVT MemVT = Store->getMemoryVT();
20917 SDValue Val = Store->getValue();
20918 SDLoc DL(N);
20919
20920 bool IsScalarizable =
20921 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
20922 Store->isSimple() &&
20923 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
20924 isPowerOf2_64(MemVT.getSizeInBits()) &&
20925 MemVT.getSizeInBits() <= Subtarget.getXLen();
20926
20927 // If sufficiently aligned we can scalarize stores of constant vectors of
20928 // any power-of-two size up to XLen bits, provided that they aren't too
20929 // expensive to materialize.
20930 // vsetivli zero, 2, e8, m1, ta, ma
20931 // vmv.v.i v8, 4
20932 // vse64.v v8, (a0)
20933 // ->
20934 // li a1, 1028
20935 // sh a1, 0(a0)
20936 if (DCI.isBeforeLegalize() && IsScalarizable &&
20938 // Get the constant vector bits
20939 APInt NewC(Val.getValueSizeInBits(), 0);
20940 uint64_t EltSize = Val.getScalarValueSizeInBits();
20941 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
20942 if (Val.getOperand(i).isUndef())
20943 continue;
20944 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
20945 i * EltSize);
20946 }
20947 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20948
20949 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
20950 true) <= 2 &&
20952 NewVT, *Store->getMemOperand())) {
20953 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
20954 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
20955 Store->getPointerInfo(), Store->getBaseAlign(),
20956 Store->getMemOperand()->getFlags());
20957 }
20958 }
20959
20960 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
20961 // vsetivli zero, 2, e16, m1, ta, ma
20962 // vle16.v v8, (a0)
20963 // vse16.v v8, (a1)
20964 if (auto *L = dyn_cast<LoadSDNode>(Val);
20965 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
20966 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
20967 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
20968 L->getMemoryVT() == MemVT) {
20969 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
20971 NewVT, *Store->getMemOperand()) &&
20973 NewVT, *L->getMemOperand())) {
20974 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
20975 L->getPointerInfo(), L->getBaseAlign(),
20976 L->getMemOperand()->getFlags());
20977 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
20978 Store->getPointerInfo(), Store->getBaseAlign(),
20979 Store->getMemOperand()->getFlags());
20980 }
20981 }
20982
20983 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
20984 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
20985 // any illegal types.
20986 if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
20987 (DCI.isAfterLegalizeDAG() &&
20989 isNullConstant(Val.getOperand(1)))) &&
20990 Val.hasOneUse()) {
20991 SDValue Src = Val.getOperand(0);
20992 MVT VecVT = Src.getSimpleValueType();
20993 // VecVT should be scalable and memory VT should match the element type.
20994 if (!Store->isIndexed() && VecVT.isScalableVector() &&
20995 MemVT == VecVT.getVectorElementType()) {
20996 SDLoc DL(N);
20997 MVT MaskVT = getMaskTypeFor(VecVT);
20998 return DAG.getStoreVP(
20999 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
21000 DAG.getConstant(1, DL, MaskVT),
21001 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
21002 Store->getMemOperand(), Store->getAddressingMode(),
21003 Store->isTruncatingStore(), /*IsCompress*/ false);
21004 }
21005 }
21006
21007 break;
21008 }
21009 case ISD::SPLAT_VECTOR: {
21010 EVT VT = N->getValueType(0);
21011 // Only perform this combine on legal MVT types.
21012 if (!isTypeLegal(VT))
21013 break;
21014 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
21015 DAG, Subtarget))
21016 return Gather;
21017 break;
21018 }
21019 case ISD::BUILD_VECTOR:
21020 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
21021 return V;
21022 break;
21024 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
21025 return V;
21026 break;
21028 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
21029 return V;
21030 break;
21032 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
21033 return V;
21034 break;
21035 case RISCVISD::VFMV_V_F_VL: {
21036 const MVT VT = N->getSimpleValueType(0);
21037 SDValue Passthru = N->getOperand(0);
21038 SDValue Scalar = N->getOperand(1);
21039 SDValue VL = N->getOperand(2);
21040
21041 // If VL is 1, we can use vfmv.s.f.
21042 if (isOneConstant(VL))
21043 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
21044 break;
21045 }
21046 case RISCVISD::VMV_V_X_VL: {
21047 const MVT VT = N->getSimpleValueType(0);
21048 SDValue Passthru = N->getOperand(0);
21049 SDValue Scalar = N->getOperand(1);
21050 SDValue VL = N->getOperand(2);
21051
21052 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
21053 // scalar input.
21054 unsigned ScalarSize = Scalar.getValueSizeInBits();
21055 unsigned EltWidth = VT.getScalarSizeInBits();
21056 if (ScalarSize > EltWidth && Passthru.isUndef())
21057 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
21058 return SDValue(N, 0);
21059
21060 // If VL is 1 and the scalar value won't benefit from immediate, we can
21061 // use vmv.s.x.
21063 if (isOneConstant(VL) &&
21064 (!Const || Const->isZero() ||
21065 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
21066 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
21067
21068 break;
21069 }
21070 case RISCVISD::VFMV_S_F_VL: {
21071 SDValue Src = N->getOperand(1);
21072 // Try to remove vector->scalar->vector if the scalar->vector is inserting
21073 // into an undef vector.
21074 // TODO: Could use a vslide or vmv.v.v for non-undef.
21075 if (N->getOperand(0).isUndef() &&
21076 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21077 isNullConstant(Src.getOperand(1)) &&
21078 Src.getOperand(0).getValueType().isScalableVector()) {
21079 EVT VT = N->getValueType(0);
21080 SDValue EVSrc = Src.getOperand(0);
21081 EVT EVSrcVT = EVSrc.getValueType();
21083 // Widths match, just return the original vector.
21084 if (EVSrcVT == VT)
21085 return EVSrc;
21086 SDLoc DL(N);
21087 // Width is narrower, using insert_subvector.
21088 if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
21089 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
21090 EVSrc,
21091 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
21092 }
21093 // Width is wider, using extract_subvector.
21094 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
21095 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
21096 }
21097 [[fallthrough]];
21098 }
21099 case RISCVISD::VMV_S_X_VL: {
21100 const MVT VT = N->getSimpleValueType(0);
21101 SDValue Passthru = N->getOperand(0);
21102 SDValue Scalar = N->getOperand(1);
21103 SDValue VL = N->getOperand(2);
21104
21105 // The vmv.s.x instruction copies the scalar integer register to element 0
21106 // of the destination vector register. If SEW < XLEN, the least-significant
21107 // bits are copied and the upper XLEN-SEW bits are ignored.
21108 unsigned ScalarSize = Scalar.getValueSizeInBits();
21109 unsigned EltWidth = VT.getScalarSizeInBits();
21110 if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
21111 return SDValue(N, 0);
21112
21113 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
21114 Scalar.getOperand(0).getValueType() == N->getValueType(0))
21115 return Scalar.getOperand(0);
21116
21117 // Use M1 or smaller to avoid over constraining register allocation
21118 const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
21119 if (M1VT.bitsLT(VT)) {
21120 SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);
21121 SDValue Result =
21122 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
21123 Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);
21124 return Result;
21125 }
21126
21127 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
21128 // higher would involve overly constraining the register allocator for
21129 // no purpose.
21130 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
21131 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
21132 VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
21133 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
21134
21135 break;
21136 }
21137 case RISCVISD::VMV_X_S: {
21138 SDValue Vec = N->getOperand(0);
21139 MVT VecVT = N->getOperand(0).getSimpleValueType();
21140 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
21141 if (M1VT.bitsLT(VecVT)) {
21142 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);
21143 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
21144 }
21145 break;
21146 }
21150 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
21151 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
21152 switch (IntNo) {
21153 // By default we do not combine any intrinsic.
21154 default:
21155 return SDValue();
21156 case Intrinsic::riscv_vcpop:
21157 case Intrinsic::riscv_vcpop_mask:
21158 case Intrinsic::riscv_vfirst:
21159 case Intrinsic::riscv_vfirst_mask: {
21160 SDValue VL = N->getOperand(2);
21161 if (IntNo == Intrinsic::riscv_vcpop_mask ||
21162 IntNo == Intrinsic::riscv_vfirst_mask)
21163 VL = N->getOperand(3);
21164 if (!isNullConstant(VL))
21165 return SDValue();
21166 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
21167 SDLoc DL(N);
21168 EVT VT = N->getValueType(0);
21169 if (IntNo == Intrinsic::riscv_vfirst ||
21170 IntNo == Intrinsic::riscv_vfirst_mask)
21171 return DAG.getAllOnesConstant(DL, VT);
21172 return DAG.getConstant(0, DL, VT);
21173 }
21174 case Intrinsic::riscv_vsseg2_mask:
21175 case Intrinsic::riscv_vsseg3_mask:
21176 case Intrinsic::riscv_vsseg4_mask:
21177 case Intrinsic::riscv_vsseg5_mask:
21178 case Intrinsic::riscv_vsseg6_mask:
21179 case Intrinsic::riscv_vsseg7_mask:
21180 case Intrinsic::riscv_vsseg8_mask: {
21181 SDValue Tuple = N->getOperand(2);
21182 unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21183
21184 if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
21185 Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
21186 !Tuple.getOperand(0).isUndef())
21187 return SDValue();
21188
21189 SDValue Val = Tuple.getOperand(1);
21190 unsigned Idx = Tuple.getConstantOperandVal(2);
21191
21192 unsigned SEW = Val.getValueType().getScalarSizeInBits();
21193 assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
21194 "Type mismatch without bitcast?");
21195 unsigned Stride = SEW / 8 * NF;
21196 unsigned Offset = SEW / 8 * Idx;
21197
21198 SDValue Ops[] = {
21199 /*Chain=*/N->getOperand(0),
21200 /*IntID=*/
21201 DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
21202 /*StoredVal=*/Val,
21203 /*Ptr=*/
21204 DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
21205 DAG.getConstant(Offset, DL, XLenVT)),
21206 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21207 /*Mask=*/N->getOperand(4),
21208 /*VL=*/N->getOperand(5)};
21209
21210 auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
21211 // Match getTgtMemIntrinsic for non-unit stride case
21212 EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
21215 OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21216
21217 SDVTList VTs = DAG.getVTList(MVT::Other);
21218 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
21219 MMO);
21220 }
21221 }
21222 }
21223 case ISD::EXPERIMENTAL_VP_REVERSE:
21224 return performVP_REVERSECombine(N, DAG, Subtarget);
21225 case ISD::VP_STORE:
21226 return performVP_STORECombine(N, DAG, Subtarget);
21227 case ISD::BITCAST: {
21228 assert(Subtarget.useRVVForFixedLengthVectors());
21229 SDValue N0 = N->getOperand(0);
21230 EVT VT = N->getValueType(0);
21231 EVT SrcVT = N0.getValueType();
21232 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
21233 unsigned NF = VT.getRISCVVectorTupleNumFields();
21234 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
21235 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
21236 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
21237
21238 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
21239
21240 SDValue Result = DAG.getUNDEF(VT);
21241 for (unsigned i = 0; i < NF; ++i)
21242 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
21243 DAG.getTargetConstant(i, DL, MVT::i32));
21244 return Result;
21245 }
21246 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
21247 // type, widen both sides to avoid a trip through memory.
21248 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
21249 VT.isScalarInteger()) {
21250 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
21251 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
21252 Ops[0] = N0;
21253 SDLoc DL(N);
21254 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
21255 N0 = DAG.getBitcast(MVT::i8, N0);
21256 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
21257 }
21258
21259 return SDValue();
21260 }
21261 case ISD::VECREDUCE_ADD:
21262 if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))
21263 return V;
21264 [[fallthrough]];
21265 case ISD::CTPOP:
21266 if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
21267 return V;
21268 break;
21269 case RISCVISD::VRGATHER_VX_VL: {
21270 // Note this assumes that out of bounds indices produce poison
21271 // and can thus be replaced without having to prove them inbounds..
21272 EVT VT = N->getValueType(0);
21273 SDValue Src = N->getOperand(0);
21274 SDValue Idx = N->getOperand(1);
21275 SDValue Passthru = N->getOperand(2);
21276 SDValue VL = N->getOperand(4);
21277
21278 // Warning: Unlike most cases we strip an insert_subvector, this one
21279 // does not require the first operand to be undef.
21280 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
21281 isNullConstant(Src.getOperand(2)))
21282 Src = Src.getOperand(1);
21283
21284 switch (Src.getOpcode()) {
21285 default:
21286 break;
21287 case RISCVISD::VMV_V_X_VL:
21288 case RISCVISD::VFMV_V_F_VL:
21289 // Drop a redundant vrgather_vx.
21290 // TODO: Remove the type restriction if we find a motivating
21291 // test case?
21292 if (Passthru.isUndef() && VL == Src.getOperand(2) &&
21293 Src.getValueType() == VT)
21294 return Src;
21295 break;
21296 case RISCVISD::VMV_S_X_VL:
21297 case RISCVISD::VFMV_S_F_VL:
21298 // If this use only demands lane zero from the source vmv.s.x, and
21299 // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
21300 // a vmv.v.x. Note that there can be other uses of the original
21301 // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
21302 if (isNullConstant(Idx) && Passthru.isUndef() &&
21303 VL == Src.getOperand(2)) {
21304 unsigned Opc =
21305 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
21306 return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
21307 VL);
21308 }
21309 break;
21310 }
21311 break;
21312 }
21313 case RISCVISD::TUPLE_EXTRACT: {
21314 EVT VT = N->getValueType(0);
21315 SDValue Tuple = N->getOperand(0);
21316 unsigned Idx = N->getConstantOperandVal(1);
21317 if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
21318 break;
21319
21320 unsigned NF = 0;
21321 switch (Tuple.getConstantOperandVal(1)) {
21322 default:
21323 break;
21324 case Intrinsic::riscv_vlseg2_mask:
21325 case Intrinsic::riscv_vlseg3_mask:
21326 case Intrinsic::riscv_vlseg4_mask:
21327 case Intrinsic::riscv_vlseg5_mask:
21328 case Intrinsic::riscv_vlseg6_mask:
21329 case Intrinsic::riscv_vlseg7_mask:
21330 case Intrinsic::riscv_vlseg8_mask:
21331 NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21332 break;
21333 }
21334
21335 if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
21336 break;
21337
21338 unsigned SEW = VT.getScalarSizeInBits();
21339 assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
21340 "Type mismatch without bitcast?");
21341 unsigned Stride = SEW / 8 * NF;
21342 unsigned Offset = SEW / 8 * Idx;
21343
21344 SDValue Ops[] = {
21345 /*Chain=*/Tuple.getOperand(0),
21346 /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
21347 /*Passthru=*/Tuple.getOperand(2),
21348 /*Ptr=*/
21349 DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
21350 DAG.getConstant(Offset, DL, XLenVT)),
21351 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21352 /*Mask=*/Tuple.getOperand(4),
21353 /*VL=*/Tuple.getOperand(5),
21354 /*Policy=*/Tuple.getOperand(6)};
21355
21356 auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
21357 // Match getTgtMemIntrinsic for non-unit stride case
21358 EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
21361 TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21362
21363 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
21365 Ops, MemVT, MMO);
21366 DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
21367 return Result.getValue(0);
21368 }
21369 case RISCVISD::TUPLE_INSERT: {
21370 // tuple_insert tuple, undef, idx -> tuple
21371 if (N->getOperand(1).isUndef())
21372 return N->getOperand(0);
21373 break;
21374 }
21375 case RISCVISD::VSLIDE1UP_VL:
21376 case RISCVISD::VFSLIDE1UP_VL: {
21377 using namespace SDPatternMatch;
21378 SDValue SrcVec;
21379 SDLoc DL(N);
21380 MVT VT = N->getSimpleValueType(0);
21381 // If the scalar we're sliding in was extracted from the first element of a
21382 // vector, we can use that vector as the passthru in a normal slideup of 1.
21383 // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
21384 if (!N->getOperand(0).isUndef() ||
21385 !sd_match(N->getOperand(2),
21386 m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
21387 m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
21388 break;
21389
21390 MVT SrcVecVT = SrcVec.getSimpleValueType();
21391 if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
21392 break;
21393 // Adapt the value type of source vector.
21394 if (SrcVecVT.isFixedLengthVector()) {
21395 SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
21396 SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
21397 }
21399 SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
21400 else
21401 SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
21402
21403 return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
21404 DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
21405 N->getOperand(4));
21406 }
21407 }
21408
21409 return SDValue();
21410}
21411
21413 EVT XVT, unsigned KeptBits) const {
21414 // For vectors, we don't have a preference..
21415 if (XVT.isVector())
21416 return false;
21417
21418 if (XVT != MVT::i32 && XVT != MVT::i64)
21419 return false;
21420
21421 // We can use sext.w for RV64 or an srai 31 on RV32.
21422 if (KeptBits == 32 || KeptBits == 64)
21423 return true;
21424
21425 // With Zbb we can use sext.h/sext.b.
21426 return Subtarget.hasStdExtZbb() &&
21427 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
21428 KeptBits == 16);
21429}
21430
21432 const SDNode *N, CombineLevel Level) const {
21433 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
21434 N->getOpcode() == ISD::SRL) &&
21435 "Expected shift op");
21436
21437 // The following folds are only desirable if `(OP _, c1 << c2)` can be
21438 // materialised in fewer instructions than `(OP _, c1)`:
21439 //
21440 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
21441 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
21442 SDValue N0 = N->getOperand(0);
21443 EVT Ty = N0.getValueType();
21444
21445 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
21446 // LD/ST, it can still complete the folding optimization operation performed
21447 // above.
21448 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
21449 for (SDNode *Use : X->users()) {
21450 // This use is the one we're on right now. Skip it
21451 if (Use == User || Use->getOpcode() == ISD::SELECT)
21452 continue;
21454 return false;
21455 }
21456 return true;
21457 };
21458
21459 if (Ty.isScalarInteger() &&
21460 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
21461 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
21462 return isUsedByLdSt(N0.getNode(), N);
21463
21464 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21465 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
21466
21467 // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
21468 if (C2 && Subtarget.hasShlAdd(C2->getZExtValue()) && N->hasOneUse() &&
21469 N->user_begin()->getOpcode() == ISD::ADD &&
21470 !isUsedByLdSt(*N->user_begin(), nullptr) &&
21471 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
21472 return false;
21473
21474 if (C1 && C2) {
21475 const APInt &C1Int = C1->getAPIntValue();
21476 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
21477
21478 // We can materialise `c1 << c2` into an add immediate, so it's "free",
21479 // and the combine should happen, to potentially allow further combines
21480 // later.
21481 if (ShiftedC1Int.getSignificantBits() <= 64 &&
21482 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
21483 return true;
21484
21485 // We can materialise `c1` in an add immediate, so it's "free", and the
21486 // combine should be prevented.
21487 if (C1Int.getSignificantBits() <= 64 &&
21489 return false;
21490
21491 // Neither constant will fit into an immediate, so find materialisation
21492 // costs.
21493 int C1Cost =
21494 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
21495 /*CompressionCost*/ true);
21496 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
21497 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
21498 /*CompressionCost*/ true);
21499
21500 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
21501 // combine should be prevented.
21502 if (C1Cost < ShiftedC1Cost)
21503 return false;
21504 }
21505 }
21506
21507 if (!N0->hasOneUse())
21508 return false;
21509
21510 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
21511 N0->getOperand(0)->getOpcode() == ISD::ADD &&
21512 !N0->getOperand(0)->hasOneUse())
21513 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
21514
21515 return true;
21516}
21517
21519 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
21520 TargetLoweringOpt &TLO) const {
21521 // Delay this optimization as late as possible.
21522 if (!TLO.LegalOps)
21523 return false;
21524
21525 EVT VT = Op.getValueType();
21526 if (VT.isVector())
21527 return false;
21528
21529 unsigned Opcode = Op.getOpcode();
21530 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
21531 return false;
21532
21533 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
21534 if (!C)
21535 return false;
21536
21537 const APInt &Mask = C->getAPIntValue();
21538
21539 // Clear all non-demanded bits initially.
21540 APInt ShrunkMask = Mask & DemandedBits;
21541
21542 // Try to make a smaller immediate by setting undemanded bits.
21543
21544 APInt ExpandedMask = Mask | ~DemandedBits;
21545
21546 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
21547 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
21548 };
21549 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
21550 if (NewMask == Mask)
21551 return true;
21552 SDLoc DL(Op);
21553 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
21554 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
21555 Op.getOperand(0), NewC);
21556 return TLO.CombineTo(Op, NewOp);
21557 };
21558
21559 // If the shrunk mask fits in sign extended 12 bits, let the target
21560 // independent code apply it.
21561 if (ShrunkMask.isSignedIntN(12))
21562 return false;
21563
21564 // And has a few special cases for zext.
21565 if (Opcode == ISD::AND) {
21566 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
21567 // otherwise use SLLI + SRLI.
21568 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
21569 if (IsLegalMask(NewMask))
21570 return UseMask(NewMask);
21571
21572 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
21573 if (VT == MVT::i64) {
21574 APInt NewMask = APInt(64, 0xffffffff);
21575 if (IsLegalMask(NewMask))
21576 return UseMask(NewMask);
21577 }
21578 }
21579
21580 // For the remaining optimizations, we need to be able to make a negative
21581 // number through a combination of mask and undemanded bits.
21582 if (!ExpandedMask.isNegative())
21583 return false;
21584
21585 // What is the fewest number of bits we need to represent the negative number.
21586 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
21587
21588 // Try to make a 12 bit negative immediate. If that fails try to make a 32
21589 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
21590 // If we can't create a simm12, we shouldn't change opaque constants.
21591 APInt NewMask = ShrunkMask;
21592 if (MinSignedBits <= 12)
21593 NewMask.setBitsFrom(11);
21594 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
21595 NewMask.setBitsFrom(31);
21596 else
21597 return false;
21598
21599 // Check that our new mask is a subset of the demanded mask.
21600 assert(IsLegalMask(NewMask));
21601 return UseMask(NewMask);
21602}
21603
21604static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
21605 static const uint64_t GREVMasks[] = {
21606 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
21607 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
21608
21609 for (unsigned Stage = 0; Stage != 6; ++Stage) {
21610 unsigned Shift = 1 << Stage;
21611 if (ShAmt & Shift) {
21612 uint64_t Mask = GREVMasks[Stage];
21613 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
21614 if (IsGORC)
21615 Res |= x;
21616 x = Res;
21617 }
21618 }
21619
21620 return x;
21621}
21622
21624 KnownBits &Known,
21625 const APInt &DemandedElts,
21626 const SelectionDAG &DAG,
21627 unsigned Depth) const {
21628 unsigned BitWidth = Known.getBitWidth();
21629 unsigned Opc = Op.getOpcode();
21634 "Should use MaskedValueIsZero if you don't know whether Op"
21635 " is a target node!");
21636
21637 Known.resetAll();
21638 switch (Opc) {
21639 default: break;
21640 case RISCVISD::SELECT_CC: {
21641 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
21642 // If we don't know any bits, early out.
21643 if (Known.isUnknown())
21644 break;
21645 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
21646
21647 // Only known if known in both the LHS and RHS.
21648 Known = Known.intersectWith(Known2);
21649 break;
21650 }
21651 case RISCVISD::VCPOP_VL: {
21652 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
21653 Known.Zero.setBitsFrom(Known2.countMaxActiveBits());
21654 break;
21655 }
21656 case RISCVISD::CZERO_EQZ:
21657 case RISCVISD::CZERO_NEZ:
21658 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21659 // Result is either all zero or operand 0. We can propagate zeros, but not
21660 // ones.
21661 Known.One.clearAllBits();
21662 break;
21663 case RISCVISD::REMUW: {
21664 KnownBits Known2;
21665 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21666 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21667 // We only care about the lower 32 bits.
21668 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
21669 // Restore the original width by sign extending.
21670 Known = Known.sext(BitWidth);
21671 break;
21672 }
21673 case RISCVISD::DIVUW: {
21674 KnownBits Known2;
21675 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21676 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21677 // We only care about the lower 32 bits.
21678 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
21679 // Restore the original width by sign extending.
21680 Known = Known.sext(BitWidth);
21681 break;
21682 }
21683 case RISCVISD::SLLW: {
21684 KnownBits Known2;
21685 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21686 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21687 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
21688 // Restore the original width by sign extending.
21689 Known = Known.sext(BitWidth);
21690 break;
21691 }
21692 case RISCVISD::SRLW: {
21693 KnownBits Known2;
21694 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21695 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21696 Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32));
21697 // Restore the original width by sign extending.
21698 Known = Known.sext(BitWidth);
21699 break;
21700 }
21701 case RISCVISD::SRAW: {
21702 KnownBits Known2;
21703 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21704 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21705 Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
21706 // Restore the original width by sign extending.
21707 Known = Known.sext(BitWidth);
21708 break;
21709 }
21710 case RISCVISD::SHL_ADD: {
21711 KnownBits Known2;
21712 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21713 unsigned ShAmt = Op.getConstantOperandVal(1);
21714 Known <<= ShAmt;
21715 Known.Zero.setLowBits(ShAmt); // the <<= operator left these bits unknown
21716 Known2 = DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
21717 Known = KnownBits::add(Known, Known2);
21718 break;
21719 }
21720 case RISCVISD::CTZW: {
21721 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21722 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
21723 unsigned LowBits = llvm::bit_width(PossibleTZ);
21724 Known.Zero.setBitsFrom(LowBits);
21725 break;
21726 }
21727 case RISCVISD::CLZW: {
21728 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21729 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
21730 unsigned LowBits = llvm::bit_width(PossibleLZ);
21731 Known.Zero.setBitsFrom(LowBits);
21732 break;
21733 }
21734 case RISCVISD::BREV8:
21735 case RISCVISD::ORC_B: {
21736 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
21737 // control value of 7 is equivalent to brev8 and orc.b.
21738 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21739 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21740 // To compute zeros for ORC_B, we need to invert the value and invert it
21741 // back after. This inverting is harmless for BREV8.
21742 Known.Zero =
21743 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
21744 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
21745 break;
21746 }
21747 case RISCVISD::READ_VLENB: {
21748 // We can use the minimum and maximum VLEN values to bound VLENB. We
21749 // know VLEN must be a power of two.
21750 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
21751 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
21752 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
21753 Known.Zero.setLowBits(Log2_32(MinVLenB));
21754 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
21755 if (MaxVLenB == MinVLenB)
21756 Known.One.setBit(Log2_32(MinVLenB));
21757 break;
21758 }
21759 case RISCVISD::FCLASS: {
21760 // fclass will only set one of the low 10 bits.
21761 Known.Zero.setBitsFrom(10);
21762 break;
21763 }
21766 unsigned IntNo =
21767 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
21768 switch (IntNo) {
21769 default:
21770 // We can't do anything for most intrinsics.
21771 break;
21772 case Intrinsic::riscv_vsetvli:
21773 case Intrinsic::riscv_vsetvlimax: {
21774 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
21775 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
21776 RISCVVType::VLMUL VLMUL =
21777 static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
21778 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
21779 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
21780 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
21781 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
21782
21783 // Result of vsetvli must be not larger than AVL.
21784 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
21785 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
21786
21787 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
21788 if (BitWidth > KnownZeroFirstBit)
21789 Known.Zero.setBitsFrom(KnownZeroFirstBit);
21790 break;
21791 }
21792 }
21793 break;
21794 }
21795 }
21796}
21797
21799 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21800 unsigned Depth) const {
21801 switch (Op.getOpcode()) {
21802 default:
21803 break;
21804 case RISCVISD::SELECT_CC: {
21805 unsigned Tmp =
21806 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
21807 if (Tmp == 1) return 1; // Early out.
21808 unsigned Tmp2 =
21809 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
21810 return std::min(Tmp, Tmp2);
21811 }
21812 case RISCVISD::CZERO_EQZ:
21813 case RISCVISD::CZERO_NEZ:
21814 // Output is either all zero or operand 0. We can propagate sign bit count
21815 // from operand 0.
21816 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21817 case RISCVISD::ABSW: {
21818 // We expand this at isel to negw+max. The result will have 33 sign bits
21819 // if the input has at least 33 sign bits.
21820 unsigned Tmp =
21821 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21822 if (Tmp < 33) return 1;
21823 return 33;
21824 }
21825 case RISCVISD::SRAW: {
21826 unsigned Tmp =
21827 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21828 // sraw produces at least 33 sign bits. If the input already has more than
21829 // 33 sign bits sraw, will preserve them.
21830 // TODO: A more precise answer could be calculated depending on known bits
21831 // in the shift amount.
21832 return std::max(Tmp, 33U);
21833 }
21834 case RISCVISD::SLLW:
21835 case RISCVISD::SRLW:
21836 case RISCVISD::DIVW:
21837 case RISCVISD::DIVUW:
21838 case RISCVISD::REMUW:
21839 case RISCVISD::ROLW:
21840 case RISCVISD::RORW:
21841 case RISCVISD::FCVT_W_RV64:
21842 case RISCVISD::FCVT_WU_RV64:
21843 case RISCVISD::STRICT_FCVT_W_RV64:
21844 case RISCVISD::STRICT_FCVT_WU_RV64:
21845 // TODO: As the result is sign-extended, this is conservatively correct.
21846 return 33;
21847 case RISCVISD::VMV_X_S: {
21848 // The number of sign bits of the scalar result is computed by obtaining the
21849 // element type of the input vector operand, subtracting its width from the
21850 // XLEN, and then adding one (sign bit within the element type). If the
21851 // element type is wider than XLen, the least-significant XLEN bits are
21852 // taken.
21853 unsigned XLen = Subtarget.getXLen();
21854 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
21855 if (EltBits <= XLen)
21856 return XLen - EltBits + 1;
21857 break;
21858 }
21860 unsigned IntNo = Op.getConstantOperandVal(1);
21861 switch (IntNo) {
21862 default:
21863 break;
21864 case Intrinsic::riscv_masked_atomicrmw_xchg:
21865 case Intrinsic::riscv_masked_atomicrmw_add:
21866 case Intrinsic::riscv_masked_atomicrmw_sub:
21867 case Intrinsic::riscv_masked_atomicrmw_nand:
21868 case Intrinsic::riscv_masked_atomicrmw_max:
21869 case Intrinsic::riscv_masked_atomicrmw_min:
21870 case Intrinsic::riscv_masked_atomicrmw_umax:
21871 case Intrinsic::riscv_masked_atomicrmw_umin:
21872 case Intrinsic::riscv_masked_cmpxchg:
21873 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
21874 // narrow atomic operation. These are implemented using atomic
21875 // operations at the minimum supported atomicrmw/cmpxchg width whose
21876 // result is then sign extended to XLEN. With +A, the minimum width is
21877 // 32 for both 64 and 32.
21879 assert(Subtarget.hasStdExtA());
21880 return Op.getValueSizeInBits() - 31;
21881 }
21882 break;
21883 }
21884 }
21885
21886 return 1;
21887}
21888
21890 SDValue Op, const APInt &OriginalDemandedBits,
21891 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
21892 unsigned Depth) const {
21893 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
21894
21895 switch (Op.getOpcode()) {
21896 case RISCVISD::BREV8:
21897 case RISCVISD::ORC_B: {
21898 KnownBits Known2;
21899 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21900 // For BREV8, we need to do BREV8 on the demanded bits.
21901 // For ORC_B, any bit in the output demandeds all bits from the same byte.
21902 // So we need to do ORC_B on the demanded bits.
21904 APInt(BitWidth, computeGREVOrGORC(OriginalDemandedBits.getZExtValue(),
21905 7, IsGORC));
21906 if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits,
21907 OriginalDemandedElts, Known2, TLO, Depth + 1))
21908 return true;
21909
21910 // To compute zeros for ORC_B, we need to invert the value and invert it
21911 // back after. This inverting is harmless for BREV8.
21912 Known.Zero = ~computeGREVOrGORC(~Known2.Zero.getZExtValue(), 7, IsGORC);
21913 Known.One = computeGREVOrGORC(Known2.One.getZExtValue(), 7, IsGORC);
21914 return false;
21915 }
21916 }
21917
21919 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
21920}
21921
21923 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21924 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
21925
21926 // TODO: Add more target nodes.
21927 switch (Op.getOpcode()) {
21928 case RISCVISD::SLLW:
21929 case RISCVISD::SRAW:
21930 case RISCVISD::SRLW:
21931 case RISCVISD::RORW:
21932 case RISCVISD::ROLW:
21933 // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
21934 // amount is bounds.
21935 return false;
21936 case RISCVISD::SELECT_CC:
21937 // Integer comparisons cannot create poison.
21938 assert(Op.getOperand(0).getValueType().isInteger() &&
21939 "RISCVISD::SELECT_CC only compares integers");
21940 return false;
21941 }
21943 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
21944}
21945
21946const Constant *
21948 assert(Ld && "Unexpected null LoadSDNode");
21949 if (!ISD::isNormalLoad(Ld))
21950 return nullptr;
21951
21952 SDValue Ptr = Ld->getBasePtr();
21953
21954 // Only constant pools with no offset are supported.
21955 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
21956 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
21957 if (!CNode || CNode->isMachineConstantPoolEntry() ||
21958 CNode->getOffset() != 0)
21959 return nullptr;
21960
21961 return CNode;
21962 };
21963
21964 // Simple case, LLA.
21965 if (Ptr.getOpcode() == RISCVISD::LLA) {
21966 auto *CNode = GetSupportedConstantPool(Ptr.getOperand(0));
21967 if (!CNode || CNode->getTargetFlags() != 0)
21968 return nullptr;
21969
21970 return CNode->getConstVal();
21971 }
21972
21973 // Look for a HI and ADD_LO pair.
21974 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
21975 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
21976 return nullptr;
21977
21978 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
21979 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
21980
21981 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
21982 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
21983 return nullptr;
21984
21985 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
21986 return nullptr;
21987
21988 return CNodeLo->getConstVal();
21989}
21990
21992 MachineBasicBlock *BB) {
21993 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
21994
21995 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
21996 // Should the count have wrapped while it was being read, we need to try
21997 // again.
21998 // For example:
21999 // ```
22000 // read:
22001 // csrrs x3, counterh # load high word of counter
22002 // csrrs x2, counter # load low word of counter
22003 // csrrs x4, counterh # load high word of counter
22004 // bne x3, x4, read # check if high word reads match, otherwise try again
22005 // ```
22006
22007 MachineFunction &MF = *BB->getParent();
22008 const BasicBlock *LLVMBB = BB->getBasicBlock();
22010
22011 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
22012 MF.insert(It, LoopMBB);
22013
22014 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
22015 MF.insert(It, DoneMBB);
22016
22017 // Transfer the remainder of BB and its successor edges to DoneMBB.
22018 DoneMBB->splice(DoneMBB->begin(), BB,
22019 std::next(MachineBasicBlock::iterator(MI)), BB->end());
22021
22022 BB->addSuccessor(LoopMBB);
22023
22025 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22026 Register LoReg = MI.getOperand(0).getReg();
22027 Register HiReg = MI.getOperand(1).getReg();
22028 int64_t LoCounter = MI.getOperand(2).getImm();
22029 int64_t HiCounter = MI.getOperand(3).getImm();
22030 DebugLoc DL = MI.getDebugLoc();
22031
22033 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
22034 .addImm(HiCounter)
22035 .addReg(RISCV::X0);
22036 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
22037 .addImm(LoCounter)
22038 .addReg(RISCV::X0);
22039 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
22040 .addImm(HiCounter)
22041 .addReg(RISCV::X0);
22042
22043 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
22044 .addReg(HiReg)
22045 .addReg(ReadAgainReg)
22046 .addMBB(LoopMBB);
22047
22048 LoopMBB->addSuccessor(LoopMBB);
22049 LoopMBB->addSuccessor(DoneMBB);
22050
22051 MI.eraseFromParent();
22052
22053 return DoneMBB;
22054}
22055
22058 const RISCVSubtarget &Subtarget) {
22059 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
22060
22061 MachineFunction &MF = *BB->getParent();
22062 DebugLoc DL = MI.getDebugLoc();
22065 Register LoReg = MI.getOperand(0).getReg();
22066 Register HiReg = MI.getOperand(1).getReg();
22067 Register SrcReg = MI.getOperand(2).getReg();
22068
22069 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
22070 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
22071
22072 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
22073 RI, Register());
22075 MachineMemOperand *MMOLo =
22079 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
22080 .addFrameIndex(FI)
22081 .addImm(0)
22082 .addMemOperand(MMOLo);
22083 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
22084 .addFrameIndex(FI)
22085 .addImm(4)
22086 .addMemOperand(MMOHi);
22087 MI.eraseFromParent(); // The pseudo instruction is gone now.
22088 return BB;
22089}
22090
22093 const RISCVSubtarget &Subtarget) {
22094 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
22095 "Unexpected instruction");
22096
22097 MachineFunction &MF = *BB->getParent();
22098 DebugLoc DL = MI.getDebugLoc();
22101 Register DstReg = MI.getOperand(0).getReg();
22102 Register LoReg = MI.getOperand(1).getReg();
22103 Register HiReg = MI.getOperand(2).getReg();
22104
22105 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
22106 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
22107
22109 MachineMemOperand *MMOLo =
22113 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
22114 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
22115 .addFrameIndex(FI)
22116 .addImm(0)
22117 .addMemOperand(MMOLo);
22118 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
22119 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
22120 .addFrameIndex(FI)
22121 .addImm(4)
22122 .addMemOperand(MMOHi);
22123 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
22124 MI.eraseFromParent(); // The pseudo instruction is gone now.
22125 return BB;
22126}
22127
22129 unsigned RelOpcode, unsigned EqOpcode,
22130 const RISCVSubtarget &Subtarget) {
22131 DebugLoc DL = MI.getDebugLoc();
22132 Register DstReg = MI.getOperand(0).getReg();
22133 Register Src1Reg = MI.getOperand(1).getReg();
22134 Register Src2Reg = MI.getOperand(2).getReg();
22136 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22138
22139 // Save the current FFLAGS.
22140 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
22141
22142 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
22143 .addReg(Src1Reg)
22144 .addReg(Src2Reg);
22147
22148 // Restore the FFLAGS.
22149 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22150 .addReg(SavedFFlags, RegState::Kill);
22151
22152 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
22153 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
22154 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
22155 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
22158
22159 // Erase the pseudoinstruction.
22160 MI.eraseFromParent();
22161 return BB;
22162}
22163
22164static MachineBasicBlock *
22166 MachineBasicBlock *ThisMBB,
22167 const RISCVSubtarget &Subtarget) {
22168 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
22169 // Without this, custom-inserter would have generated:
22170 //
22171 // A
22172 // | \
22173 // | B
22174 // | /
22175 // C
22176 // | \
22177 // | D
22178 // | /
22179 // E
22180 //
22181 // A: X = ...; Y = ...
22182 // B: empty
22183 // C: Z = PHI [X, A], [Y, B]
22184 // D: empty
22185 // E: PHI [X, C], [Z, D]
22186 //
22187 // If we lower both Select_FPRX_ in a single step, we can instead generate:
22188 //
22189 // A
22190 // | \
22191 // | C
22192 // | /|
22193 // |/ |
22194 // | |
22195 // | D
22196 // | /
22197 // E
22198 //
22199 // A: X = ...; Y = ...
22200 // D: empty
22201 // E: PHI [X, A], [X, C], [Y, D]
22202
22203 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22204 const DebugLoc &DL = First.getDebugLoc();
22205 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
22206 MachineFunction *F = ThisMBB->getParent();
22207 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
22208 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
22209 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
22210 MachineFunction::iterator It = ++ThisMBB->getIterator();
22211 F->insert(It, FirstMBB);
22212 F->insert(It, SecondMBB);
22213 F->insert(It, SinkMBB);
22214
22215 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
22216 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
22218 ThisMBB->end());
22219 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
22220
22221 // Fallthrough block for ThisMBB.
22222 ThisMBB->addSuccessor(FirstMBB);
22223 // Fallthrough block for FirstMBB.
22224 FirstMBB->addSuccessor(SecondMBB);
22225 ThisMBB->addSuccessor(SinkMBB);
22226 FirstMBB->addSuccessor(SinkMBB);
22227 // This is fallthrough.
22228 SecondMBB->addSuccessor(SinkMBB);
22229
22230 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
22231 Register FLHS = First.getOperand(1).getReg();
22232 Register FRHS = First.getOperand(2).getReg();
22233 // Insert appropriate branch.
22234 BuildMI(FirstMBB, DL, TII.get(RISCVCC::getBrCond(FirstCC, First.getOpcode())))
22235 .addReg(FLHS)
22236 .addReg(FRHS)
22237 .addMBB(SinkMBB);
22238
22239 Register SLHS = Second.getOperand(1).getReg();
22240 Register SRHS = Second.getOperand(2).getReg();
22241 Register Op1Reg4 = First.getOperand(4).getReg();
22242 Register Op1Reg5 = First.getOperand(5).getReg();
22243
22244 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
22245 // Insert appropriate branch.
22246 BuildMI(ThisMBB, DL,
22247 TII.get(RISCVCC::getBrCond(SecondCC, Second.getOpcode())))
22248 .addReg(SLHS)
22249 .addReg(SRHS)
22250 .addMBB(SinkMBB);
22251
22252 Register DestReg = Second.getOperand(0).getReg();
22253 Register Op2Reg4 = Second.getOperand(4).getReg();
22254 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
22255 .addReg(Op2Reg4)
22256 .addMBB(ThisMBB)
22257 .addReg(Op1Reg4)
22258 .addMBB(FirstMBB)
22259 .addReg(Op1Reg5)
22260 .addMBB(SecondMBB);
22261
22262 // Now remove the Select_FPRX_s.
22263 First.eraseFromParent();
22264 Second.eraseFromParent();
22265 return SinkMBB;
22266}
22267
22270 const RISCVSubtarget &Subtarget) {
22271 // To "insert" Select_* instructions, we actually have to insert the triangle
22272 // control-flow pattern. The incoming instructions know the destination vreg
22273 // to set, the condition code register to branch on, the true/false values to
22274 // select between, and the condcode to use to select the appropriate branch.
22275 //
22276 // We produce the following control flow:
22277 // HeadMBB
22278 // | \
22279 // | IfFalseMBB
22280 // | /
22281 // TailMBB
22282 //
22283 // When we find a sequence of selects we attempt to optimize their emission
22284 // by sharing the control flow. Currently we only handle cases where we have
22285 // multiple selects with the exact same condition (same LHS, RHS and CC).
22286 // The selects may be interleaved with other instructions if the other
22287 // instructions meet some requirements we deem safe:
22288 // - They are not pseudo instructions.
22289 // - They are debug instructions. Otherwise,
22290 // - They do not have side-effects, do not access memory and their inputs do
22291 // not depend on the results of the select pseudo-instructions.
22292 // - They don't adjust stack.
22293 // The TrueV/FalseV operands of the selects cannot depend on the result of
22294 // previous selects in the sequence.
22295 // These conditions could be further relaxed. See the X86 target for a
22296 // related approach and more information.
22297 //
22298 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
22299 // is checked here and handled by a separate function -
22300 // EmitLoweredCascadedSelect.
22301
22302 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
22303 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
22304 MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
22305 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
22306 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
22307 Next->getOperand(5).isKill())
22308 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
22309
22310 Register LHS = MI.getOperand(1).getReg();
22311 Register RHS;
22312 if (MI.getOperand(2).isReg())
22313 RHS = MI.getOperand(2).getReg();
22314 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
22315
22316 SmallVector<MachineInstr *, 4> SelectDebugValues;
22317 SmallSet<Register, 4> SelectDests;
22318 SelectDests.insert(MI.getOperand(0).getReg());
22319
22320 MachineInstr *LastSelectPseudo = &MI;
22321 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22322
22323 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
22324 SequenceMBBI != E; ++SequenceMBBI) {
22325 if (SequenceMBBI->isDebugInstr())
22326 continue;
22327 if (RISCVInstrInfo::isSelectPseudo(*SequenceMBBI)) {
22328 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
22329 !SequenceMBBI->getOperand(2).isReg() ||
22330 SequenceMBBI->getOperand(2).getReg() != RHS ||
22331 SequenceMBBI->getOperand(3).getImm() != CC ||
22332 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
22333 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
22334 break;
22335 LastSelectPseudo = &*SequenceMBBI;
22336 SequenceMBBI->collectDebugValues(SelectDebugValues);
22337 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
22338 continue;
22339 }
22340 if (SequenceMBBI->hasUnmodeledSideEffects() ||
22341 SequenceMBBI->mayLoadOrStore() ||
22342 SequenceMBBI->usesCustomInsertionHook() ||
22343 TII.isFrameInstr(*SequenceMBBI) ||
22344 SequenceMBBI->isStackAligningInlineAsm())
22345 break;
22346 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
22347 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
22348 }))
22349 break;
22350 }
22351
22352 const BasicBlock *LLVM_BB = BB->getBasicBlock();
22353 DebugLoc DL = MI.getDebugLoc();
22355
22356 MachineBasicBlock *HeadMBB = BB;
22357 MachineFunction *F = BB->getParent();
22358 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
22359 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
22360
22361 F->insert(I, IfFalseMBB);
22362 F->insert(I, TailMBB);
22363
22364 // Set the call frame size on entry to the new basic blocks.
22365 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
22366 IfFalseMBB->setCallFrameSize(CallFrameSize);
22367 TailMBB->setCallFrameSize(CallFrameSize);
22368
22369 // Transfer debug instructions associated with the selects to TailMBB.
22370 for (MachineInstr *DebugInstr : SelectDebugValues) {
22371 TailMBB->push_back(DebugInstr->removeFromParent());
22372 }
22373
22374 // Move all instructions after the sequence to TailMBB.
22375 TailMBB->splice(TailMBB->end(), HeadMBB,
22376 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
22377 // Update machine-CFG edges by transferring all successors of the current
22378 // block to the new block which will contain the Phi nodes for the selects.
22379 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
22380 // Set the successors for HeadMBB.
22381 HeadMBB->addSuccessor(IfFalseMBB);
22382 HeadMBB->addSuccessor(TailMBB);
22383
22384 // Insert appropriate branch.
22385 if (MI.getOperand(2).isImm())
22386 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22387 .addReg(LHS)
22388 .addImm(MI.getOperand(2).getImm())
22389 .addMBB(TailMBB);
22390 else
22391 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22392 .addReg(LHS)
22393 .addReg(RHS)
22394 .addMBB(TailMBB);
22395
22396 // IfFalseMBB just falls through to TailMBB.
22397 IfFalseMBB->addSuccessor(TailMBB);
22398
22399 // Create PHIs for all of the select pseudo-instructions.
22400 auto SelectMBBI = MI.getIterator();
22401 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
22402 auto InsertionPoint = TailMBB->begin();
22403 while (SelectMBBI != SelectEnd) {
22404 auto Next = std::next(SelectMBBI);
22405 if (RISCVInstrInfo::isSelectPseudo(*SelectMBBI)) {
22406 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
22407 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
22408 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
22409 .addReg(SelectMBBI->getOperand(4).getReg())
22410 .addMBB(HeadMBB)
22411 .addReg(SelectMBBI->getOperand(5).getReg())
22412 .addMBB(IfFalseMBB);
22413 SelectMBBI->eraseFromParent();
22414 }
22415 SelectMBBI = Next;
22416 }
22417
22418 F->getProperties().resetNoPHIs();
22419 return TailMBB;
22420}
22421
22422// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
22423static const RISCV::RISCVMaskedPseudoInfo *
22424lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {
22426 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
22427 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
22429 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
22430 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
22431 return Masked;
22432}
22433
22436 unsigned CVTXOpc) {
22437 DebugLoc DL = MI.getDebugLoc();
22438
22440
22442 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22443
22444 // Save the old value of FFLAGS.
22445 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
22446
22447 assert(MI.getNumOperands() == 7);
22448
22449 // Emit a VFCVT_X_F
22450 const TargetRegisterInfo *TRI =
22452 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
22453 Register Tmp = MRI.createVirtualRegister(RC);
22454 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
22455 .add(MI.getOperand(1))
22456 .add(MI.getOperand(2))
22457 .add(MI.getOperand(3))
22458 .add(MachineOperand::CreateImm(7)) // frm = DYN
22459 .add(MI.getOperand(4))
22460 .add(MI.getOperand(5))
22461 .add(MI.getOperand(6))
22462 .add(MachineOperand::CreateReg(RISCV::FRM,
22463 /*IsDef*/ false,
22464 /*IsImp*/ true));
22465
22466 // Emit a VFCVT_F_X
22467 RISCVVType::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
22468 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
22469 // There is no E8 variant for VFCVT_F_X.
22470 assert(Log2SEW >= 4);
22471 unsigned CVTFOpc =
22472 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
22473 ->MaskedPseudo;
22474
22475 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
22476 .add(MI.getOperand(0))
22477 .add(MI.getOperand(1))
22478 .addReg(Tmp)
22479 .add(MI.getOperand(3))
22480 .add(MachineOperand::CreateImm(7)) // frm = DYN
22481 .add(MI.getOperand(4))
22482 .add(MI.getOperand(5))
22483 .add(MI.getOperand(6))
22484 .add(MachineOperand::CreateReg(RISCV::FRM,
22485 /*IsDef*/ false,
22486 /*IsImp*/ true));
22487
22488 // Restore FFLAGS.
22489 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22490 .addReg(SavedFFLAGS, RegState::Kill);
22491
22492 // Erase the pseudoinstruction.
22493 MI.eraseFromParent();
22494 return BB;
22495}
22496
22498 const RISCVSubtarget &Subtarget) {
22499 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
22500 const TargetRegisterClass *RC;
22501 switch (MI.getOpcode()) {
22502 default:
22503 llvm_unreachable("Unexpected opcode");
22504 case RISCV::PseudoFROUND_H:
22505 CmpOpc = RISCV::FLT_H;
22506 F2IOpc = RISCV::FCVT_W_H;
22507 I2FOpc = RISCV::FCVT_H_W;
22508 FSGNJOpc = RISCV::FSGNJ_H;
22509 FSGNJXOpc = RISCV::FSGNJX_H;
22510 RC = &RISCV::FPR16RegClass;
22511 break;
22512 case RISCV::PseudoFROUND_H_INX:
22513 CmpOpc = RISCV::FLT_H_INX;
22514 F2IOpc = RISCV::FCVT_W_H_INX;
22515 I2FOpc = RISCV::FCVT_H_W_INX;
22516 FSGNJOpc = RISCV::FSGNJ_H_INX;
22517 FSGNJXOpc = RISCV::FSGNJX_H_INX;
22518 RC = &RISCV::GPRF16RegClass;
22519 break;
22520 case RISCV::PseudoFROUND_S:
22521 CmpOpc = RISCV::FLT_S;
22522 F2IOpc = RISCV::FCVT_W_S;
22523 I2FOpc = RISCV::FCVT_S_W;
22524 FSGNJOpc = RISCV::FSGNJ_S;
22525 FSGNJXOpc = RISCV::FSGNJX_S;
22526 RC = &RISCV::FPR32RegClass;
22527 break;
22528 case RISCV::PseudoFROUND_S_INX:
22529 CmpOpc = RISCV::FLT_S_INX;
22530 F2IOpc = RISCV::FCVT_W_S_INX;
22531 I2FOpc = RISCV::FCVT_S_W_INX;
22532 FSGNJOpc = RISCV::FSGNJ_S_INX;
22533 FSGNJXOpc = RISCV::FSGNJX_S_INX;
22534 RC = &RISCV::GPRF32RegClass;
22535 break;
22536 case RISCV::PseudoFROUND_D:
22537 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22538 CmpOpc = RISCV::FLT_D;
22539 F2IOpc = RISCV::FCVT_L_D;
22540 I2FOpc = RISCV::FCVT_D_L;
22541 FSGNJOpc = RISCV::FSGNJ_D;
22542 FSGNJXOpc = RISCV::FSGNJX_D;
22543 RC = &RISCV::FPR64RegClass;
22544 break;
22545 case RISCV::PseudoFROUND_D_INX:
22546 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22547 CmpOpc = RISCV::FLT_D_INX;
22548 F2IOpc = RISCV::FCVT_L_D_INX;
22549 I2FOpc = RISCV::FCVT_D_L_INX;
22550 FSGNJOpc = RISCV::FSGNJ_D_INX;
22551 FSGNJXOpc = RISCV::FSGNJX_D_INX;
22552 RC = &RISCV::GPRRegClass;
22553 break;
22554 }
22555
22556 const BasicBlock *BB = MBB->getBasicBlock();
22557 DebugLoc DL = MI.getDebugLoc();
22558 MachineFunction::iterator I = ++MBB->getIterator();
22559
22560 MachineFunction *F = MBB->getParent();
22561 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
22562 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
22563
22564 F->insert(I, CvtMBB);
22565 F->insert(I, DoneMBB);
22566 // Move all instructions after the sequence to DoneMBB.
22567 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
22568 MBB->end());
22569 // Update machine-CFG edges by transferring all successors of the current
22570 // block to the new block which will contain the Phi nodes for the selects.
22572 // Set the successors for MBB.
22573 MBB->addSuccessor(CvtMBB);
22574 MBB->addSuccessor(DoneMBB);
22575
22576 Register DstReg = MI.getOperand(0).getReg();
22577 Register SrcReg = MI.getOperand(1).getReg();
22578 Register MaxReg = MI.getOperand(2).getReg();
22579 int64_t FRM = MI.getOperand(3).getImm();
22580
22581 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22582 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
22583
22584 Register FabsReg = MRI.createVirtualRegister(RC);
22585 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
22586
22587 // Compare the FP value to the max value.
22588 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22589 auto MIB =
22590 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
22593
22594 // Insert branch.
22595 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
22596 .addReg(CmpReg)
22597 .addReg(RISCV::X0)
22598 .addMBB(DoneMBB);
22599
22600 CvtMBB->addSuccessor(DoneMBB);
22601
22602 // Convert to integer.
22603 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22604 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
22607
22608 // Convert back to FP.
22609 Register I2FReg = MRI.createVirtualRegister(RC);
22610 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
22613
22614 // Restore the sign bit.
22615 Register CvtReg = MRI.createVirtualRegister(RC);
22616 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
22617
22618 // Merge the results.
22619 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
22620 .addReg(SrcReg)
22621 .addMBB(MBB)
22622 .addReg(CvtReg)
22623 .addMBB(CvtMBB);
22624
22625 MI.eraseFromParent();
22626 return DoneMBB;
22627}
22628
22631 MachineBasicBlock *BB) const {
22632 switch (MI.getOpcode()) {
22633 default:
22634 llvm_unreachable("Unexpected instr type to insert");
22635 case RISCV::ReadCounterWide:
22636 assert(!Subtarget.is64Bit() &&
22637 "ReadCounterWide is only to be used on riscv32");
22638 return emitReadCounterWidePseudo(MI, BB);
22639 case RISCV::Select_GPR_Using_CC_GPR:
22640 case RISCV::Select_GPR_Using_CC_Imm5_Zibi:
22641 case RISCV::Select_GPR_Using_CC_SImm5_CV:
22642 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
22643 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
22644 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
22645 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
22646 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
22647 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
22648 case RISCV::Select_FPR16_Using_CC_GPR:
22649 case RISCV::Select_FPR16INX_Using_CC_GPR:
22650 case RISCV::Select_FPR32_Using_CC_GPR:
22651 case RISCV::Select_FPR32INX_Using_CC_GPR:
22652 case RISCV::Select_FPR64_Using_CC_GPR:
22653 case RISCV::Select_FPR64INX_Using_CC_GPR:
22654 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
22655 return emitSelectPseudo(MI, BB, Subtarget);
22656 case RISCV::BuildPairF64Pseudo:
22657 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
22658 case RISCV::SplitF64Pseudo:
22659 return emitSplitF64Pseudo(MI, BB, Subtarget);
22660 case RISCV::PseudoQuietFLE_H:
22661 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
22662 case RISCV::PseudoQuietFLE_H_INX:
22663 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
22664 case RISCV::PseudoQuietFLT_H:
22665 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
22666 case RISCV::PseudoQuietFLT_H_INX:
22667 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
22668 case RISCV::PseudoQuietFLE_S:
22669 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
22670 case RISCV::PseudoQuietFLE_S_INX:
22671 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
22672 case RISCV::PseudoQuietFLT_S:
22673 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
22674 case RISCV::PseudoQuietFLT_S_INX:
22675 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
22676 case RISCV::PseudoQuietFLE_D:
22677 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
22678 case RISCV::PseudoQuietFLE_D_INX:
22679 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
22680 case RISCV::PseudoQuietFLE_D_IN32X:
22681 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
22682 Subtarget);
22683 case RISCV::PseudoQuietFLT_D:
22684 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
22685 case RISCV::PseudoQuietFLT_D_INX:
22686 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
22687 case RISCV::PseudoQuietFLT_D_IN32X:
22688 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
22689 Subtarget);
22690
22691 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
22692 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
22693 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
22694 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
22695 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
22696 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
22697 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
22698 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
22699 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
22700 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
22701 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
22702 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
22703 case RISCV::PseudoFROUND_H:
22704 case RISCV::PseudoFROUND_H_INX:
22705 case RISCV::PseudoFROUND_S:
22706 case RISCV::PseudoFROUND_S_INX:
22707 case RISCV::PseudoFROUND_D:
22708 case RISCV::PseudoFROUND_D_INX:
22709 case RISCV::PseudoFROUND_D_IN32X:
22710 return emitFROUND(MI, BB, Subtarget);
22711 case RISCV::PROBED_STACKALLOC_DYN:
22712 return emitDynamicProbedAlloc(MI, BB);
22713 case TargetOpcode::STATEPOINT:
22714 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
22715 // while jal call instruction (where statepoint will be lowered at the end)
22716 // has implicit def. This def is early-clobber as it will be set at
22717 // the moment of the call and earlier than any use is read.
22718 // Add this implicit dead def here as a workaround.
22719 MI.addOperand(*MI.getMF(),
22721 RISCV::X1, /*isDef*/ true,
22722 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
22723 /*isUndef*/ false, /*isEarlyClobber*/ true));
22724 [[fallthrough]];
22725 case TargetOpcode::STACKMAP:
22726 case TargetOpcode::PATCHPOINT:
22727 if (!Subtarget.is64Bit())
22728 reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "
22729 "supported on 64-bit targets");
22730 return emitPatchPoint(MI, BB);
22731 }
22732}
22733
22735 SDNode *Node) const {
22736 // If instruction defines FRM operand, conservatively set it as non-dead to
22737 // express data dependency with FRM users and prevent incorrect instruction
22738 // reordering.
22739 if (auto *FRMDef = MI.findRegisterDefOperand(RISCV::FRM, /*TRI=*/nullptr)) {
22740 FRMDef->setIsDead(false);
22741 return;
22742 }
22743 // Add FRM dependency to any instructions with dynamic rounding mode.
22744 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
22745 if (Idx < 0) {
22746 // Vector pseudos have FRM index indicated by TSFlags.
22747 Idx = RISCVII::getFRMOpNum(MI.getDesc());
22748 if (Idx < 0)
22749 return;
22750 }
22751 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
22752 return;
22753 // If the instruction already reads FRM, don't add another read.
22754 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
22755 return;
22756 MI.addOperand(
22757 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
22758}
22759
22760void RISCVTargetLowering::analyzeInputArgs(
22761 MachineFunction &MF, CCState &CCInfo,
22762 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
22763 RISCVCCAssignFn Fn) const {
22764 for (const auto &[Idx, In] : enumerate(Ins)) {
22765 MVT ArgVT = In.VT;
22766 ISD::ArgFlagsTy ArgFlags = In.Flags;
22767
22768 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22769 In.OrigTy)) {
22770 LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type "
22771 << ArgVT << '\n');
22772 llvm_unreachable(nullptr);
22773 }
22774 }
22775}
22776
22777void RISCVTargetLowering::analyzeOutputArgs(
22778 MachineFunction &MF, CCState &CCInfo,
22779 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
22780 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
22781 for (const auto &[Idx, Out] : enumerate(Outs)) {
22782 MVT ArgVT = Out.VT;
22783 ISD::ArgFlagsTy ArgFlags = Out.Flags;
22784
22785 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22786 Out.OrigTy)) {
22787 LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type "
22788 << ArgVT << "\n");
22789 llvm_unreachable(nullptr);
22790 }
22791 }
22792}
22793
22794// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
22795// values.
22797 const CCValAssign &VA, const SDLoc &DL,
22798 const RISCVSubtarget &Subtarget) {
22799 if (VA.needsCustom()) {
22800 if (VA.getLocVT().isInteger() &&
22801 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22802 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
22803 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
22804 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
22806 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
22807 llvm_unreachable("Unexpected Custom handling.");
22808 }
22809
22810 switch (VA.getLocInfo()) {
22811 default:
22812 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22813 case CCValAssign::Full:
22814 break;
22815 case CCValAssign::BCvt:
22816 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
22817 break;
22818 }
22819 return Val;
22820}
22821
22822// The caller is responsible for loading the full value if the argument is
22823// passed with CCValAssign::Indirect.
22825 const CCValAssign &VA, const SDLoc &DL,
22826 const ISD::InputArg &In,
22827 const RISCVTargetLowering &TLI) {
22830 EVT LocVT = VA.getLocVT();
22831 SDValue Val;
22832 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
22833 Register VReg = RegInfo.createVirtualRegister(RC);
22834 RegInfo.addLiveIn(VA.getLocReg(), VReg);
22835 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
22836
22837 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
22838 if (In.isOrigArg()) {
22839 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
22840 if (OrigArg->getType()->isIntegerTy()) {
22841 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
22842 // An input zero extended from i31 can also be considered sign extended.
22843 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
22844 (BitWidth < 32 && In.Flags.isZExt())) {
22846 RVFI->addSExt32Register(VReg);
22847 }
22848 }
22849 }
22850
22852 return Val;
22853
22854 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
22855}
22856
22858 const CCValAssign &VA, const SDLoc &DL,
22859 const RISCVSubtarget &Subtarget) {
22860 EVT LocVT = VA.getLocVT();
22861
22862 if (VA.needsCustom()) {
22863 if (LocVT.isInteger() &&
22864 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22865 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
22866 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
22867 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
22868 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
22869 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
22870 llvm_unreachable("Unexpected Custom handling.");
22871 }
22872
22873 switch (VA.getLocInfo()) {
22874 default:
22875 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22876 case CCValAssign::Full:
22877 break;
22878 case CCValAssign::BCvt:
22879 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
22880 break;
22881 }
22882 return Val;
22883}
22884
22885// The caller is responsible for loading the full value if the argument is
22886// passed with CCValAssign::Indirect.
22888 const CCValAssign &VA, const SDLoc &DL) {
22890 MachineFrameInfo &MFI = MF.getFrameInfo();
22891 EVT LocVT = VA.getLocVT();
22892 EVT ValVT = VA.getValVT();
22894 if (VA.getLocInfo() == CCValAssign::Indirect) {
22895 // When the value is a scalable vector, we save the pointer which points to
22896 // the scalable vector value in the stack. The ValVT will be the pointer
22897 // type, instead of the scalable vector type.
22898 ValVT = LocVT;
22899 }
22900 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
22901 /*IsImmutable=*/true);
22902 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22903 SDValue Val;
22904
22906 switch (VA.getLocInfo()) {
22907 default:
22908 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22909 case CCValAssign::Full:
22911 case CCValAssign::BCvt:
22912 break;
22913 }
22914 Val = DAG.getExtLoad(
22915 ExtType, DL, LocVT, Chain, FIN,
22917 return Val;
22918}
22919
22921 const CCValAssign &VA,
22922 const CCValAssign &HiVA,
22923 const SDLoc &DL) {
22924 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
22925 "Unexpected VA");
22927 MachineFrameInfo &MFI = MF.getFrameInfo();
22929
22930 assert(VA.isRegLoc() && "Expected register VA assignment");
22931
22932 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22933 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
22934 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
22935 SDValue Hi;
22936 if (HiVA.isMemLoc()) {
22937 // Second half of f64 is passed on the stack.
22938 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
22939 /*IsImmutable=*/true);
22940 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
22941 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
22943 } else {
22944 // Second half of f64 is passed in another GPR.
22945 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22946 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
22947 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
22948 }
22949 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
22950}
22951
22952// Transform physical registers into virtual registers.
22954 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
22955 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
22956 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
22957
22959
22960 switch (CallConv) {
22961 default:
22962 reportFatalUsageError("Unsupported calling convention");
22963 case CallingConv::C:
22964 case CallingConv::Fast:
22967 case CallingConv::GRAAL:
22969#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:
22970 CC_VLS_CASE(32)
22971 CC_VLS_CASE(64)
22972 CC_VLS_CASE(128)
22973 CC_VLS_CASE(256)
22974 CC_VLS_CASE(512)
22975 CC_VLS_CASE(1024)
22976 CC_VLS_CASE(2048)
22977 CC_VLS_CASE(4096)
22978 CC_VLS_CASE(8192)
22979 CC_VLS_CASE(16384)
22980 CC_VLS_CASE(32768)
22981 CC_VLS_CASE(65536)
22982#undef CC_VLS_CASE
22983 break;
22984 case CallingConv::GHC:
22985 if (Subtarget.hasStdExtE())
22986 reportFatalUsageError("GHC calling convention is not supported on RVE!");
22987 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
22988 reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "
22989 "(Zdinx/D) instruction set extensions");
22990 }
22991
22992 const Function &Func = MF.getFunction();
22993 if (Func.hasFnAttribute("interrupt")) {
22994 if (!Func.arg_empty())
22996 "Functions with the interrupt attribute cannot have arguments!");
22997
22998 StringRef Kind =
22999 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
23000
23001 constexpr StringLiteral SupportedInterruptKinds[] = {
23002 "machine",
23003 "supervisor",
23004 "rnmi",
23005 "qci-nest",
23006 "qci-nonest",
23007 "SiFive-CLIC-preemptible",
23008 "SiFive-CLIC-stack-swap",
23009 "SiFive-CLIC-preemptible-stack-swap",
23010 };
23011 if (!llvm::is_contained(SupportedInterruptKinds, Kind))
23013 "Function interrupt attribute argument not supported!");
23014
23015 if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())
23017 "'qci-*' interrupt kinds require Xqciint extension");
23018
23019 if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
23021 "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
23022
23023 if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
23024 reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
23025 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
23026 if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
23027 reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
23028 "have a frame pointer");
23029 }
23030
23031 EVT PtrVT = getPointerTy(DAG.getDataLayout());
23032 MVT XLenVT = Subtarget.getXLenVT();
23033 unsigned XLenInBytes = Subtarget.getXLen() / 8;
23034 // Used with vargs to accumulate store chains.
23035 std::vector<SDValue> OutChains;
23036
23037 // Assign locations to all of the incoming arguments.
23039 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
23040
23041 if (CallConv == CallingConv::GHC)
23043 else
23044 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
23046 : CC_RISCV);
23047
23048 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
23049 CCValAssign &VA = ArgLocs[i];
23050 SDValue ArgValue;
23051 // Passing f64 on RV32D with a soft float ABI must be handled as a special
23052 // case.
23053 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23054 assert(VA.needsCustom());
23055 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
23056 } else if (VA.isRegLoc())
23057 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
23058 else
23059 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
23060
23061 if (VA.getLocInfo() == CCValAssign::Indirect) {
23062 // If the original argument was split and passed by reference (e.g. i128
23063 // on RV32), we need to load all parts of it here (using the same
23064 // address). Vectors may be partly split to registers and partly to the
23065 // stack, in which case the base address is partly offset and subsequent
23066 // stores are relative to that.
23067 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
23069 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
23070 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
23071 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23072 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
23073 CCValAssign &PartVA = ArgLocs[i + 1];
23074 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
23075 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23076 if (PartVA.getValVT().isScalableVector())
23077 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23078 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
23079 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
23081 ++i;
23082 ++InsIdx;
23083 }
23084 continue;
23085 }
23086 InVals.push_back(ArgValue);
23087 }
23088
23089 if (any_of(ArgLocs,
23090 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23092
23093 if (IsVarArg) {
23094 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
23095 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
23096 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
23097 MachineFrameInfo &MFI = MF.getFrameInfo();
23098 MachineRegisterInfo &RegInfo = MF.getRegInfo();
23100
23101 // Size of the vararg save area. For now, the varargs save area is either
23102 // zero or large enough to hold a0-a7.
23103 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
23104 int FI;
23105
23106 // If all registers are allocated, then all varargs must be passed on the
23107 // stack and we don't need to save any argregs.
23108 if (VarArgsSaveSize == 0) {
23109 int VaArgOffset = CCInfo.getStackSize();
23110 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
23111 } else {
23112 int VaArgOffset = -VarArgsSaveSize;
23113 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
23114
23115 // If saving an odd number of registers then create an extra stack slot to
23116 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
23117 // offsets to even-numbered registered remain 2*XLEN-aligned.
23118 if (Idx % 2) {
23120 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
23121 VarArgsSaveSize += XLenInBytes;
23122 }
23123
23124 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
23125
23126 // Copy the integer registers that may have been used for passing varargs
23127 // to the vararg save area.
23128 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
23129 const Register Reg = RegInfo.createVirtualRegister(RC);
23130 RegInfo.addLiveIn(ArgRegs[I], Reg);
23131 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
23132 SDValue Store = DAG.getStore(
23133 Chain, DL, ArgValue, FIN,
23134 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
23135 OutChains.push_back(Store);
23136 FIN =
23137 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
23138 }
23139 }
23140
23141 // Record the frame index of the first variable argument
23142 // which is a value necessary to VASTART.
23143 RVFI->setVarArgsFrameIndex(FI);
23144 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
23145 }
23146
23147 // All stores are grouped in one node to allow the matching between
23148 // the size of Ins and InVals. This only happens for vararg functions.
23149 if (!OutChains.empty()) {
23150 OutChains.push_back(Chain);
23151 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
23152 }
23153
23154 return Chain;
23155}
23156
23157/// isEligibleForTailCallOptimization - Check whether the call is eligible
23158/// for tail call optimization.
23159/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
23160bool RISCVTargetLowering::isEligibleForTailCallOptimization(
23161 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
23162 const SmallVector<CCValAssign, 16> &ArgLocs) const {
23163
23164 auto CalleeCC = CLI.CallConv;
23165 auto &Outs = CLI.Outs;
23166 auto &Caller = MF.getFunction();
23167 auto CallerCC = Caller.getCallingConv();
23168
23169 // Exception-handling functions need a special set of instructions to
23170 // indicate a return to the hardware. Tail-calling another function would
23171 // probably break this.
23172 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
23173 // should be expanded as new function attributes are introduced.
23174 if (Caller.hasFnAttribute("interrupt"))
23175 return false;
23176
23177 // Do not tail call opt if the stack is used to pass parameters.
23178 if (CCInfo.getStackSize() != 0)
23179 return false;
23180
23181 // Do not tail call opt if any parameters need to be passed indirectly.
23182 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
23183 // passed indirectly. So the address of the value will be passed in a
23184 // register, or if not available, then the address is put on the stack. In
23185 // order to pass indirectly, space on the stack often needs to be allocated
23186 // in order to store the value. In this case the CCInfo.getNextStackOffset()
23187 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
23188 // are passed CCValAssign::Indirect.
23189 for (auto &VA : ArgLocs)
23190 if (VA.getLocInfo() == CCValAssign::Indirect)
23191 return false;
23192
23193 // Do not tail call opt if either caller or callee uses struct return
23194 // semantics.
23195 auto IsCallerStructRet = Caller.hasStructRetAttr();
23196 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
23197 if (IsCallerStructRet || IsCalleeStructRet)
23198 return false;
23199
23200 // The callee has to preserve all registers the caller needs to preserve.
23201 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
23202 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
23203 if (CalleeCC != CallerCC) {
23204 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
23205 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
23206 return false;
23207 }
23208
23209 // Byval parameters hand the function a pointer directly into the stack area
23210 // we want to reuse during a tail call. Working around this *is* possible
23211 // but less efficient and uglier in LowerCall.
23212 for (auto &Arg : Outs)
23213 if (Arg.Flags.isByVal())
23214 return false;
23215
23216 return true;
23217}
23218
23220 return DAG.getDataLayout().getPrefTypeAlign(
23221 VT.getTypeForEVT(*DAG.getContext()));
23222}
23223
23224// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
23225// and output parameter nodes.
23227 SmallVectorImpl<SDValue> &InVals) const {
23228 SelectionDAG &DAG = CLI.DAG;
23229 SDLoc &DL = CLI.DL;
23231 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
23233 SDValue Chain = CLI.Chain;
23234 SDValue Callee = CLI.Callee;
23235 bool &IsTailCall = CLI.IsTailCall;
23236 CallingConv::ID CallConv = CLI.CallConv;
23237 bool IsVarArg = CLI.IsVarArg;
23238 EVT PtrVT = getPointerTy(DAG.getDataLayout());
23239 MVT XLenVT = Subtarget.getXLenVT();
23240 const CallBase *CB = CLI.CB;
23241
23244
23245 // Set type id for call site info.
23246 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
23247 CSInfo = MachineFunction::CallSiteInfo(*CB);
23248
23249 // Analyze the operands of the call, assigning locations to each operand.
23251 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
23252
23253 if (CallConv == CallingConv::GHC) {
23254 if (Subtarget.hasStdExtE())
23255 reportFatalUsageError("GHC calling convention is not supported on RVE!");
23256 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
23257 } else
23258 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
23260 : CC_RISCV);
23261
23262 // Check if it's really possible to do a tail call.
23263 if (IsTailCall)
23264 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
23265
23266 if (IsTailCall)
23267 ++NumTailCalls;
23268 else if (CLI.CB && CLI.CB->isMustTailCall())
23269 reportFatalInternalError("failed to perform tail call elimination on a "
23270 "call site marked musttail");
23271
23272 // Get a count of how many bytes are to be pushed on the stack.
23273 unsigned NumBytes = ArgCCInfo.getStackSize();
23274
23275 // Create local copies for byval args
23276 SmallVector<SDValue, 8> ByValArgs;
23277 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23278 ISD::ArgFlagsTy Flags = Outs[i].Flags;
23279 if (!Flags.isByVal())
23280 continue;
23281
23282 SDValue Arg = OutVals[i];
23283 unsigned Size = Flags.getByValSize();
23284 Align Alignment = Flags.getNonZeroByValAlign();
23285
23286 int FI =
23287 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
23288 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
23289 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
23290
23291 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
23292 /*IsVolatile=*/false,
23293 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
23295 ByValArgs.push_back(FIPtr);
23296 }
23297
23298 if (!IsTailCall)
23299 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
23300
23301 // Copy argument values to their designated locations.
23303 SmallVector<SDValue, 8> MemOpChains;
23304 SDValue StackPtr;
23305 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
23306 ++i, ++OutIdx) {
23307 CCValAssign &VA = ArgLocs[i];
23308 SDValue ArgValue = OutVals[OutIdx];
23309 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
23310
23311 // Handle passing f64 on RV32D with a soft float ABI as a special case.
23312 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23313 assert(VA.isRegLoc() && "Expected register VA assignment");
23314 assert(VA.needsCustom());
23315 SDValue SplitF64 = DAG.getNode(
23316 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
23317 SDValue Lo = SplitF64.getValue(0);
23318 SDValue Hi = SplitF64.getValue(1);
23319
23320 Register RegLo = VA.getLocReg();
23321 RegsToPass.push_back(std::make_pair(RegLo, Lo));
23322
23323 // Get the CCValAssign for the Hi part.
23324 CCValAssign &HiVA = ArgLocs[++i];
23325
23326 if (HiVA.isMemLoc()) {
23327 // Second half of f64 is passed on the stack.
23328 if (!StackPtr.getNode())
23329 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23331 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23332 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
23333 // Emit the store.
23334 MemOpChains.push_back(DAG.getStore(
23335 Chain, DL, Hi, Address,
23337 } else {
23338 // Second half of f64 is passed in another GPR.
23339 Register RegHigh = HiVA.getLocReg();
23340 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
23341 }
23342 continue;
23343 }
23344
23345 // Promote the value if needed.
23346 // For now, only handle fully promoted and indirect arguments.
23347 if (VA.getLocInfo() == CCValAssign::Indirect) {
23348 // Store the argument in a stack slot and pass its address.
23349 Align StackAlign =
23350 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
23351 getPrefTypeAlign(ArgValue.getValueType(), DAG));
23352 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
23353 // If the original argument was split (e.g. i128), we need
23354 // to store the required parts of it here (and pass just one address).
23355 // Vectors may be partly split to registers and partly to the stack, in
23356 // which case the base address is partly offset and subsequent stores are
23357 // relative to that.
23358 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
23359 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
23360 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23361 // Calculate the total size to store. We don't have access to what we're
23362 // actually storing other than performing the loop and collecting the
23363 // info.
23365 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
23366 SDValue PartValue = OutVals[OutIdx + 1];
23367 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
23368 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23369 EVT PartVT = PartValue.getValueType();
23370 if (PartVT.isScalableVector())
23371 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23372 StoredSize += PartVT.getStoreSize();
23373 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
23374 Parts.push_back(std::make_pair(PartValue, Offset));
23375 ++i;
23376 ++OutIdx;
23377 }
23378 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
23379 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
23380 MemOpChains.push_back(
23381 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
23383 for (const auto &Part : Parts) {
23384 SDValue PartValue = Part.first;
23385 SDValue PartOffset = Part.second;
23387 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
23388 MemOpChains.push_back(
23389 DAG.getStore(Chain, DL, PartValue, Address,
23391 }
23392 ArgValue = SpillSlot;
23393 } else {
23394 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
23395 }
23396
23397 // Use local copy if it is a byval arg.
23398 if (Flags.isByVal())
23399 ArgValue = ByValArgs[j++];
23400
23401 if (VA.isRegLoc()) {
23402 // Queue up the argument copies and emit them at the end.
23403 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
23404
23405 const TargetOptions &Options = DAG.getTarget().Options;
23406 if (Options.EmitCallSiteInfo)
23407 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
23408 } else {
23409 assert(VA.isMemLoc() && "Argument not register or memory");
23410 assert(!IsTailCall && "Tail call not allowed if stack is used "
23411 "for passing parameters");
23412
23413 // Work out the address of the stack slot.
23414 if (!StackPtr.getNode())
23415 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23417 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23419
23420 // Emit the store.
23421 MemOpChains.push_back(
23422 DAG.getStore(Chain, DL, ArgValue, Address,
23424 }
23425 }
23426
23427 // Join the stores, which are independent of one another.
23428 if (!MemOpChains.empty())
23429 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
23430
23431 SDValue Glue;
23432
23433 // Build a sequence of copy-to-reg nodes, chained and glued together.
23434 for (auto &Reg : RegsToPass) {
23435 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
23436 Glue = Chain.getValue(1);
23437 }
23438
23439 // Validate that none of the argument registers have been marked as
23440 // reserved, if so report an error. Do the same for the return address if this
23441 // is not a tailcall.
23442 validateCCReservedRegs(RegsToPass, MF);
23443 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
23445 MF.getFunction(),
23446 "Return address register required, but has been reserved."});
23447
23448 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
23449 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
23450 // split it and then direct call can be matched by PseudoCALL.
23451 bool CalleeIsLargeExternalSymbol = false;
23453 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
23454 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
23455 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23456 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
23457 CalleeIsLargeExternalSymbol = true;
23458 }
23459 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
23460 const GlobalValue *GV = S->getGlobal();
23461 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
23462 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23463 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
23464 }
23465
23466 // The first call operand is the chain and the second is the target address.
23468 Ops.push_back(Chain);
23469 Ops.push_back(Callee);
23470
23471 // Add argument registers to the end of the list so that they are
23472 // known live into the call.
23473 for (auto &Reg : RegsToPass)
23474 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
23475
23476 // Add a register mask operand representing the call-preserved registers.
23477 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
23478 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
23479 assert(Mask && "Missing call preserved mask for calling convention");
23480 Ops.push_back(DAG.getRegisterMask(Mask));
23481
23482 // Glue the call to the argument copies, if any.
23483 if (Glue.getNode())
23484 Ops.push_back(Glue);
23485
23486 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
23487 "Unexpected CFI type for a direct call");
23488
23489 // Emit the call.
23490 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
23491
23492 // Use software guarded branch for large code model non-indirect calls
23493 // Tail call to external symbol will have a null CLI.CB and we need another
23494 // way to determine the callsite type
23495 bool NeedSWGuarded = false;
23497 Subtarget.hasStdExtZicfilp() &&
23498 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
23499 NeedSWGuarded = true;
23500
23501 if (IsTailCall) {
23503 unsigned CallOpc =
23504 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
23505 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23506 if (CLI.CFIType)
23507 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23508 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
23509 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
23510 return Ret;
23511 }
23512
23513 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
23514 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23515 if (CLI.CFIType)
23516 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23517
23518 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
23519 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
23520 Glue = Chain.getValue(1);
23521
23522 // Mark the end of the call, which is glued to the call itself.
23523 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
23524 Glue = Chain.getValue(1);
23525
23526 // Assign locations to each value returned by this call.
23528 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
23529 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
23530
23531 // Copy all of the result registers out of their specified physreg.
23532 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
23533 auto &VA = RVLocs[i];
23534 // Copy the value out
23535 SDValue RetValue =
23536 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
23537 // Glue the RetValue to the end of the call sequence
23538 Chain = RetValue.getValue(1);
23539 Glue = RetValue.getValue(2);
23540
23541 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23542 assert(VA.needsCustom());
23543 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
23544 MVT::i32, Glue);
23545 Chain = RetValue2.getValue(1);
23546 Glue = RetValue2.getValue(2);
23547 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
23548 RetValue2);
23549 } else
23550 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
23551
23552 InVals.push_back(RetValue);
23553 }
23554
23555 return Chain;
23556}
23557
23559 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
23560 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
23561 const Type *RetTy) const {
23563 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
23564
23565 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23566 MVT VT = Outs[i].VT;
23567 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
23568 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
23569 /*IsRet=*/true, Outs[i].OrigTy))
23570 return false;
23571 }
23572 return true;
23573}
23574
23575SDValue
23577 bool IsVarArg,
23579 const SmallVectorImpl<SDValue> &OutVals,
23580 const SDLoc &DL, SelectionDAG &DAG) const {
23582
23583 // Stores the assignment of the return value to a location.
23585
23586 // Info about the registers and stack slot.
23587 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
23588 *DAG.getContext());
23589
23590 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
23591 nullptr, CC_RISCV);
23592
23593 if (CallConv == CallingConv::GHC && !RVLocs.empty())
23594 reportFatalUsageError("GHC functions return void only");
23595
23596 SDValue Glue;
23597 SmallVector<SDValue, 4> RetOps(1, Chain);
23598
23599 // Copy the result values into the output registers.
23600 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
23601 SDValue Val = OutVals[OutIdx];
23602 CCValAssign &VA = RVLocs[i];
23603 assert(VA.isRegLoc() && "Can only return in registers!");
23604
23605 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23606 // Handle returning f64 on RV32D with a soft float ABI.
23607 assert(VA.isRegLoc() && "Expected return via registers");
23608 assert(VA.needsCustom());
23609 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
23610 DAG.getVTList(MVT::i32, MVT::i32), Val);
23611 SDValue Lo = SplitF64.getValue(0);
23612 SDValue Hi = SplitF64.getValue(1);
23613 Register RegLo = VA.getLocReg();
23614 Register RegHi = RVLocs[++i].getLocReg();
23615
23616 if (Subtarget.isRegisterReservedByUser(RegLo) ||
23617 Subtarget.isRegisterReservedByUser(RegHi))
23619 MF.getFunction(),
23620 "Return value register required, but has been reserved."});
23621
23622 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
23623 Glue = Chain.getValue(1);
23624 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
23625 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
23626 Glue = Chain.getValue(1);
23627 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
23628 } else {
23629 // Handle a 'normal' return.
23630 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
23631 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
23632
23633 if (Subtarget.isRegisterReservedByUser(VA.getLocReg()))
23635 MF.getFunction(),
23636 "Return value register required, but has been reserved."});
23637
23638 // Guarantee that all emitted copies are stuck together.
23639 Glue = Chain.getValue(1);
23640 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
23641 }
23642 }
23643
23644 RetOps[0] = Chain; // Update chain.
23645
23646 // Add the glue node if we have it.
23647 if (Glue.getNode()) {
23648 RetOps.push_back(Glue);
23649 }
23650
23651 if (any_of(RVLocs,
23652 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23654
23655 unsigned RetOpc = RISCVISD::RET_GLUE;
23656 // Interrupt service routines use different return instructions.
23657 const Function &Func = DAG.getMachineFunction().getFunction();
23658 if (Func.hasFnAttribute("interrupt")) {
23659 if (!Func.getReturnType()->isVoidTy())
23661 "Functions with the interrupt attribute must have void return type!");
23662
23664 StringRef Kind =
23665 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
23666
23667 if (Kind == "supervisor")
23668 RetOpc = RISCVISD::SRET_GLUE;
23669 else if (Kind == "rnmi") {
23670 assert(Subtarget.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
23671 "Need Smrnmi extension for rnmi");
23672 RetOpc = RISCVISD::MNRET_GLUE;
23673 } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
23674 assert(Subtarget.hasFeature(RISCV::FeatureVendorXqciint) &&
23675 "Need Xqciint for qci-(no)nest");
23676 RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
23677 } else
23678 RetOpc = RISCVISD::MRET_GLUE;
23679 }
23680
23681 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
23682}
23683
23684void RISCVTargetLowering::validateCCReservedRegs(
23685 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
23686 MachineFunction &MF) const {
23687 const Function &F = MF.getFunction();
23688
23689 if (llvm::any_of(Regs, [this](auto Reg) {
23690 return Subtarget.isRegisterReservedByUser(Reg.first);
23691 }))
23692 F.getContext().diagnose(DiagnosticInfoUnsupported{
23693 F, "Argument register required, but has been reserved."});
23694}
23695
23696// Check if the result of the node is only used as a return value, as
23697// otherwise we can't perform a tail-call.
23699 if (N->getNumValues() != 1)
23700 return false;
23701 if (!N->hasNUsesOfValue(1, 0))
23702 return false;
23703
23704 SDNode *Copy = *N->user_begin();
23705
23706 if (Copy->getOpcode() == ISD::BITCAST) {
23707 return isUsedByReturnOnly(Copy, Chain);
23708 }
23709
23710 // TODO: Handle additional opcodes in order to support tail-calling libcalls
23711 // with soft float ABIs.
23712 if (Copy->getOpcode() != ISD::CopyToReg) {
23713 return false;
23714 }
23715
23716 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
23717 // isn't safe to perform a tail call.
23718 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
23719 return false;
23720
23721 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
23722 bool HasRet = false;
23723 for (SDNode *Node : Copy->users()) {
23724 if (Node->getOpcode() != RISCVISD::RET_GLUE)
23725 return false;
23726 HasRet = true;
23727 }
23728 if (!HasRet)
23729 return false;
23730
23731 Chain = Copy->getOperand(0);
23732 return true;
23733}
23734
23736 return CI->isTailCall();
23737}
23738
23739/// getConstraintType - Given a constraint letter, return the type of
23740/// constraint it is for this target.
23743 if (Constraint.size() == 1) {
23744 switch (Constraint[0]) {
23745 default:
23746 break;
23747 case 'f':
23748 case 'R':
23749 return C_RegisterClass;
23750 case 'I':
23751 case 'J':
23752 case 'K':
23753 return C_Immediate;
23754 case 'A':
23755 return C_Memory;
23756 case 's':
23757 case 'S': // A symbolic address
23758 return C_Other;
23759 }
23760 } else {
23761 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
23762 return C_RegisterClass;
23763 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
23764 return C_RegisterClass;
23765 }
23766 return TargetLowering::getConstraintType(Constraint);
23767}
23768
23769std::pair<unsigned, const TargetRegisterClass *>
23771 StringRef Constraint,
23772 MVT VT) const {
23773 // First, see if this is a constraint that directly corresponds to a RISC-V
23774 // register class.
23775 if (Constraint.size() == 1) {
23776 switch (Constraint[0]) {
23777 case 'r':
23778 // TODO: Support fixed vectors up to XLen for P extension?
23779 if (VT.isVector())
23780 break;
23781 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23782 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23783 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23784 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23785 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23786 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23787 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23788 case 'f':
23789 if (VT == MVT::f16) {
23790 if (Subtarget.hasStdExtZfhmin())
23791 return std::make_pair(0U, &RISCV::FPR16RegClass);
23792 if (Subtarget.hasStdExtZhinxmin())
23793 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23794 } else if (VT == MVT::f32) {
23795 if (Subtarget.hasStdExtF())
23796 return std::make_pair(0U, &RISCV::FPR32RegClass);
23797 if (Subtarget.hasStdExtZfinx())
23798 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23799 } else if (VT == MVT::f64) {
23800 if (Subtarget.hasStdExtD())
23801 return std::make_pair(0U, &RISCV::FPR64RegClass);
23802 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23803 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23804 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23805 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23806 }
23807 break;
23808 case 'R':
23809 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23810 (VT == MVT::i128 && Subtarget.is64Bit()))
23811 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23812 break;
23813 default:
23814 break;
23815 }
23816 } else if (Constraint == "vr") {
23817 for (const auto *RC :
23818 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
23819 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
23820 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
23821 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
23822 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
23823 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
23824 &RISCV::VRN2M4RegClass}) {
23825 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23826 return std::make_pair(0U, RC);
23827
23828 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23829 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23830 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23831 return std::make_pair(0U, RC);
23832 }
23833 }
23834 } else if (Constraint == "vd") {
23835 for (const auto *RC :
23836 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
23837 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
23838 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
23839 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
23840 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
23841 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
23842 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
23843 &RISCV::VRN2M4NoV0RegClass}) {
23844 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23845 return std::make_pair(0U, RC);
23846
23847 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23848 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23849 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23850 return std::make_pair(0U, RC);
23851 }
23852 }
23853 } else if (Constraint == "vm") {
23854 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
23855 return std::make_pair(0U, &RISCV::VMV0RegClass);
23856
23857 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23858 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23859 // VT here might be coerced to vector with i8 elements, so we need to
23860 // check if this is a M1 register here instead of checking VMV0RegClass.
23861 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))
23862 return std::make_pair(0U, &RISCV::VMV0RegClass);
23863 }
23864 } else if (Constraint == "cr") {
23865 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23866 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23867 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23868 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23869 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23870 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23871 if (!VT.isVector())
23872 return std::make_pair(0U, &RISCV::GPRCRegClass);
23873 } else if (Constraint == "cR") {
23874 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23875 (VT == MVT::i128 && Subtarget.is64Bit()))
23876 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23877 } else if (Constraint == "cf") {
23878 if (VT == MVT::f16) {
23879 if (Subtarget.hasStdExtZfhmin())
23880 return std::make_pair(0U, &RISCV::FPR16CRegClass);
23881 if (Subtarget.hasStdExtZhinxmin())
23882 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23883 } else if (VT == MVT::f32) {
23884 if (Subtarget.hasStdExtF())
23885 return std::make_pair(0U, &RISCV::FPR32CRegClass);
23886 if (Subtarget.hasStdExtZfinx())
23887 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23888 } else if (VT == MVT::f64) {
23889 if (Subtarget.hasStdExtD())
23890 return std::make_pair(0U, &RISCV::FPR64CRegClass);
23891 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23892 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23893 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23894 return std::make_pair(0U, &RISCV::GPRCRegClass);
23895 }
23896 }
23897
23898 // Clang will correctly decode the usage of register name aliases into their
23899 // official names. However, other frontends like `rustc` do not. This allows
23900 // users of these frontends to use the ABI names for registers in LLVM-style
23901 // register constraints.
23902 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
23903 .Case("{zero}", RISCV::X0)
23904 .Case("{ra}", RISCV::X1)
23905 .Case("{sp}", RISCV::X2)
23906 .Case("{gp}", RISCV::X3)
23907 .Case("{tp}", RISCV::X4)
23908 .Case("{t0}", RISCV::X5)
23909 .Case("{t1}", RISCV::X6)
23910 .Case("{t2}", RISCV::X7)
23911 .Cases("{s0}", "{fp}", RISCV::X8)
23912 .Case("{s1}", RISCV::X9)
23913 .Case("{a0}", RISCV::X10)
23914 .Case("{a1}", RISCV::X11)
23915 .Case("{a2}", RISCV::X12)
23916 .Case("{a3}", RISCV::X13)
23917 .Case("{a4}", RISCV::X14)
23918 .Case("{a5}", RISCV::X15)
23919 .Case("{a6}", RISCV::X16)
23920 .Case("{a7}", RISCV::X17)
23921 .Case("{s2}", RISCV::X18)
23922 .Case("{s3}", RISCV::X19)
23923 .Case("{s4}", RISCV::X20)
23924 .Case("{s5}", RISCV::X21)
23925 .Case("{s6}", RISCV::X22)
23926 .Case("{s7}", RISCV::X23)
23927 .Case("{s8}", RISCV::X24)
23928 .Case("{s9}", RISCV::X25)
23929 .Case("{s10}", RISCV::X26)
23930 .Case("{s11}", RISCV::X27)
23931 .Case("{t3}", RISCV::X28)
23932 .Case("{t4}", RISCV::X29)
23933 .Case("{t5}", RISCV::X30)
23934 .Case("{t6}", RISCV::X31)
23935 .Default(RISCV::NoRegister);
23936 if (XRegFromAlias != RISCV::NoRegister)
23937 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
23938
23939 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
23940 // TableGen record rather than the AsmName to choose registers for InlineAsm
23941 // constraints, plus we want to match those names to the widest floating point
23942 // register type available, manually select floating point registers here.
23943 //
23944 // The second case is the ABI name of the register, so that frontends can also
23945 // use the ABI names in register constraint lists.
23946 if (Subtarget.hasStdExtF()) {
23947 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
23948 .Cases("{f0}", "{ft0}", RISCV::F0_F)
23949 .Cases("{f1}", "{ft1}", RISCV::F1_F)
23950 .Cases("{f2}", "{ft2}", RISCV::F2_F)
23951 .Cases("{f3}", "{ft3}", RISCV::F3_F)
23952 .Cases("{f4}", "{ft4}", RISCV::F4_F)
23953 .Cases("{f5}", "{ft5}", RISCV::F5_F)
23954 .Cases("{f6}", "{ft6}", RISCV::F6_F)
23955 .Cases("{f7}", "{ft7}", RISCV::F7_F)
23956 .Cases("{f8}", "{fs0}", RISCV::F8_F)
23957 .Cases("{f9}", "{fs1}", RISCV::F9_F)
23958 .Cases("{f10}", "{fa0}", RISCV::F10_F)
23959 .Cases("{f11}", "{fa1}", RISCV::F11_F)
23960 .Cases("{f12}", "{fa2}", RISCV::F12_F)
23961 .Cases("{f13}", "{fa3}", RISCV::F13_F)
23962 .Cases("{f14}", "{fa4}", RISCV::F14_F)
23963 .Cases("{f15}", "{fa5}", RISCV::F15_F)
23964 .Cases("{f16}", "{fa6}", RISCV::F16_F)
23965 .Cases("{f17}", "{fa7}", RISCV::F17_F)
23966 .Cases("{f18}", "{fs2}", RISCV::F18_F)
23967 .Cases("{f19}", "{fs3}", RISCV::F19_F)
23968 .Cases("{f20}", "{fs4}", RISCV::F20_F)
23969 .Cases("{f21}", "{fs5}", RISCV::F21_F)
23970 .Cases("{f22}", "{fs6}", RISCV::F22_F)
23971 .Cases("{f23}", "{fs7}", RISCV::F23_F)
23972 .Cases("{f24}", "{fs8}", RISCV::F24_F)
23973 .Cases("{f25}", "{fs9}", RISCV::F25_F)
23974 .Cases("{f26}", "{fs10}", RISCV::F26_F)
23975 .Cases("{f27}", "{fs11}", RISCV::F27_F)
23976 .Cases("{f28}", "{ft8}", RISCV::F28_F)
23977 .Cases("{f29}", "{ft9}", RISCV::F29_F)
23978 .Cases("{f30}", "{ft10}", RISCV::F30_F)
23979 .Cases("{f31}", "{ft11}", RISCV::F31_F)
23980 .Default(RISCV::NoRegister);
23981 if (FReg != RISCV::NoRegister) {
23982 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
23983 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
23984 unsigned RegNo = FReg - RISCV::F0_F;
23985 unsigned DReg = RISCV::F0_D + RegNo;
23986 return std::make_pair(DReg, &RISCV::FPR64RegClass);
23987 }
23988 if (VT == MVT::f32 || VT == MVT::Other)
23989 return std::make_pair(FReg, &RISCV::FPR32RegClass);
23990 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
23991 unsigned RegNo = FReg - RISCV::F0_F;
23992 unsigned HReg = RISCV::F0_H + RegNo;
23993 return std::make_pair(HReg, &RISCV::FPR16RegClass);
23994 }
23995 }
23996 }
23997
23998 if (Subtarget.hasVInstructions()) {
23999 Register VReg = StringSwitch<Register>(Constraint.lower())
24000 .Case("{v0}", RISCV::V0)
24001 .Case("{v1}", RISCV::V1)
24002 .Case("{v2}", RISCV::V2)
24003 .Case("{v3}", RISCV::V3)
24004 .Case("{v4}", RISCV::V4)
24005 .Case("{v5}", RISCV::V5)
24006 .Case("{v6}", RISCV::V6)
24007 .Case("{v7}", RISCV::V7)
24008 .Case("{v8}", RISCV::V8)
24009 .Case("{v9}", RISCV::V9)
24010 .Case("{v10}", RISCV::V10)
24011 .Case("{v11}", RISCV::V11)
24012 .Case("{v12}", RISCV::V12)
24013 .Case("{v13}", RISCV::V13)
24014 .Case("{v14}", RISCV::V14)
24015 .Case("{v15}", RISCV::V15)
24016 .Case("{v16}", RISCV::V16)
24017 .Case("{v17}", RISCV::V17)
24018 .Case("{v18}", RISCV::V18)
24019 .Case("{v19}", RISCV::V19)
24020 .Case("{v20}", RISCV::V20)
24021 .Case("{v21}", RISCV::V21)
24022 .Case("{v22}", RISCV::V22)
24023 .Case("{v23}", RISCV::V23)
24024 .Case("{v24}", RISCV::V24)
24025 .Case("{v25}", RISCV::V25)
24026 .Case("{v26}", RISCV::V26)
24027 .Case("{v27}", RISCV::V27)
24028 .Case("{v28}", RISCV::V28)
24029 .Case("{v29}", RISCV::V29)
24030 .Case("{v30}", RISCV::V30)
24031 .Case("{v31}", RISCV::V31)
24032 .Default(RISCV::NoRegister);
24033 if (VReg != RISCV::NoRegister) {
24034 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
24035 return std::make_pair(VReg, &RISCV::VMRegClass);
24036 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
24037 return std::make_pair(VReg, &RISCV::VRRegClass);
24038 for (const auto *RC :
24039 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
24040 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
24041 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
24042 return std::make_pair(VReg, RC);
24043 }
24044 }
24045 }
24046 }
24047
24048 std::pair<Register, const TargetRegisterClass *> Res =
24050
24051 // If we picked one of the Zfinx register classes, remap it to the GPR class.
24052 // FIXME: When Zfinx is supported in CodeGen this will need to take the
24053 // Subtarget into account.
24054 if (Res.second == &RISCV::GPRF16RegClass ||
24055 Res.second == &RISCV::GPRF32RegClass ||
24056 Res.second == &RISCV::GPRPairRegClass)
24057 return std::make_pair(Res.first, &RISCV::GPRRegClass);
24058
24059 return Res;
24060}
24061
24064 // Currently only support length 1 constraints.
24065 if (ConstraintCode.size() == 1) {
24066 switch (ConstraintCode[0]) {
24067 case 'A':
24069 default:
24070 break;
24071 }
24072 }
24073
24074 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
24075}
24076
24078 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
24079 SelectionDAG &DAG) const {
24080 // Currently only support length 1 constraints.
24081 if (Constraint.size() == 1) {
24082 switch (Constraint[0]) {
24083 case 'I':
24084 // Validate & create a 12-bit signed immediate operand.
24085 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
24086 uint64_t CVal = C->getSExtValue();
24087 if (isInt<12>(CVal))
24088 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
24089 Subtarget.getXLenVT()));
24090 }
24091 return;
24092 case 'J':
24093 // Validate & create an integer zero operand.
24094 if (isNullConstant(Op))
24095 Ops.push_back(
24096 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
24097 return;
24098 case 'K':
24099 // Validate & create a 5-bit unsigned immediate operand.
24100 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
24101 uint64_t CVal = C->getZExtValue();
24102 if (isUInt<5>(CVal))
24103 Ops.push_back(
24104 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
24105 }
24106 return;
24107 case 'S':
24109 return;
24110 default:
24111 break;
24112 }
24113 }
24115}
24116
24118 Instruction *Inst,
24119 AtomicOrdering Ord) const {
24120 if (Subtarget.hasStdExtZtso()) {
24122 return Builder.CreateFence(Ord);
24123 return nullptr;
24124 }
24125
24127 return Builder.CreateFence(Ord);
24128 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
24129 return Builder.CreateFence(AtomicOrdering::Release);
24130 return nullptr;
24131}
24132
24134 Instruction *Inst,
24135 AtomicOrdering Ord) const {
24136 if (Subtarget.hasStdExtZtso()) {
24138 return Builder.CreateFence(Ord);
24139 return nullptr;
24140 }
24141
24142 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
24143 return Builder.CreateFence(AtomicOrdering::Acquire);
24144 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
24146 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
24147 return nullptr;
24148}
24149
24152 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
24153 // point operations can't be used in an lr/sc sequence without breaking the
24154 // forward-progress guarantee.
24155 if (AI->isFloatingPointOperation() ||
24161
24162 // Don't expand forced atomics, we want to have __sync libcalls instead.
24163 if (Subtarget.hasForcedAtomics())
24165
24166 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
24167 if (AI->getOperation() == AtomicRMWInst::Nand) {
24168 if (Subtarget.hasStdExtZacas() &&
24169 (Size >= 32 || Subtarget.hasStdExtZabha()))
24171 if (Size < 32)
24173 }
24174
24175 if (Size < 32 && !Subtarget.hasStdExtZabha())
24177
24179}
24180
24181static Intrinsic::ID
24183 switch (BinOp) {
24184 default:
24185 llvm_unreachable("Unexpected AtomicRMW BinOp");
24187 return Intrinsic::riscv_masked_atomicrmw_xchg;
24188 case AtomicRMWInst::Add:
24189 return Intrinsic::riscv_masked_atomicrmw_add;
24190 case AtomicRMWInst::Sub:
24191 return Intrinsic::riscv_masked_atomicrmw_sub;
24193 return Intrinsic::riscv_masked_atomicrmw_nand;
24194 case AtomicRMWInst::Max:
24195 return Intrinsic::riscv_masked_atomicrmw_max;
24196 case AtomicRMWInst::Min:
24197 return Intrinsic::riscv_masked_atomicrmw_min;
24199 return Intrinsic::riscv_masked_atomicrmw_umax;
24201 return Intrinsic::riscv_masked_atomicrmw_umin;
24202 }
24203}
24204
24206 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
24207 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
24208 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
24209 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
24210 // mask, as this produces better code than the LR/SC loop emitted by
24211 // int_riscv_masked_atomicrmw_xchg.
24212 if (AI->getOperation() == AtomicRMWInst::Xchg &&
24215 if (CVal->isZero())
24216 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
24217 Builder.CreateNot(Mask, "Inv_Mask"),
24218 AI->getAlign(), Ord);
24219 if (CVal->isMinusOne())
24220 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
24221 AI->getAlign(), Ord);
24222 }
24223
24224 unsigned XLen = Subtarget.getXLen();
24225 Value *Ordering =
24226 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
24227 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24229 AI->getModule(),
24231
24232 if (XLen == 64) {
24233 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
24234 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24235 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
24236 }
24237
24238 Value *Result;
24239
24240 // Must pass the shift amount needed to sign extend the loaded value prior
24241 // to performing a signed comparison for min/max. ShiftAmt is the number of
24242 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
24243 // is the number of bits to left+right shift the value in order to
24244 // sign-extend.
24245 if (AI->getOperation() == AtomicRMWInst::Min ||
24247 const DataLayout &DL = AI->getDataLayout();
24248 unsigned ValWidth =
24249 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
24250 Value *SextShamt =
24251 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
24252 Result = Builder.CreateCall(LrwOpScwLoop,
24253 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
24254 } else {
24255 Result =
24256 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
24257 }
24258
24259 if (XLen == 64)
24260 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24261 return Result;
24262}
24263
24266 AtomicCmpXchgInst *CI) const {
24267 // Don't expand forced atomics, we want to have __sync libcalls instead.
24268 if (Subtarget.hasForcedAtomics())
24270
24272 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
24273 (Size == 8 || Size == 16))
24276}
24277
24279 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
24280 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
24281 unsigned XLen = Subtarget.getXLen();
24282 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
24283 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;
24284 if (XLen == 64) {
24285 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
24286 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
24287 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24288 }
24289 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24290 Value *Result = Builder.CreateIntrinsic(
24291 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
24292 if (XLen == 64)
24293 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24294 return Result;
24295}
24296
24298 EVT DataVT) const {
24299 // We have indexed loads for all supported EEW types. Indices are always
24300 // zero extended.
24301 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
24302 isTypeLegal(Extend.getValueType()) &&
24303 isTypeLegal(Extend.getOperand(0).getValueType()) &&
24304 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
24305}
24306
24308 EVT VT) const {
24309 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
24310 return false;
24311
24312 switch (FPVT.getSimpleVT().SimpleTy) {
24313 case MVT::f16:
24314 return Subtarget.hasStdExtZfhmin();
24315 case MVT::f32:
24316 return Subtarget.hasStdExtF();
24317 case MVT::f64:
24318 return Subtarget.hasStdExtD();
24319 default:
24320 return false;
24321 }
24322}
24323
24325 // If we are using the small code model, we can reduce size of jump table
24326 // entry to 4 bytes.
24327 if (Subtarget.is64Bit() && !isPositionIndependent() &&
24330 }
24332}
24333
24335 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
24336 unsigned uid, MCContext &Ctx) const {
24337 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
24339 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
24340}
24341
24343 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
24344 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
24345 // a power of two as well.
24346 // FIXME: This doesn't work for zve32, but that's already broken
24347 // elsewhere for the same reason.
24348 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
24349 static_assert(RISCV::RVVBitsPerBlock == 64,
24350 "RVVBitsPerBlock changed, audit needed");
24351 return true;
24352}
24353
24355 SDValue &Offset,
24357 SelectionDAG &DAG) const {
24358 // Target does not support indexed loads.
24359 if (!Subtarget.hasVendorXTHeadMemIdx())
24360 return false;
24361
24362 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
24363 return false;
24364
24365 Base = Op->getOperand(0);
24366 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
24367 int64_t RHSC = RHS->getSExtValue();
24368 if (Op->getOpcode() == ISD::SUB)
24369 RHSC = -(uint64_t)RHSC;
24370
24371 // The constants that can be encoded in the THeadMemIdx instructions
24372 // are of the form (sign_extend(imm5) << imm2).
24373 bool isLegalIndexedOffset = false;
24374 for (unsigned i = 0; i < 4; i++)
24375 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
24376 isLegalIndexedOffset = true;
24377 break;
24378 }
24379
24380 if (!isLegalIndexedOffset)
24381 return false;
24382
24383 Offset = Op->getOperand(1);
24384 return true;
24385 }
24386
24387 return false;
24388}
24389
24391 SDValue &Offset,
24393 SelectionDAG &DAG) const {
24394 EVT VT;
24395 SDValue Ptr;
24396 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24397 VT = LD->getMemoryVT();
24398 Ptr = LD->getBasePtr();
24399 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24400 VT = ST->getMemoryVT();
24401 Ptr = ST->getBasePtr();
24402 } else
24403 return false;
24404
24405 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
24406 return false;
24407
24408 AM = ISD::PRE_INC;
24409 return true;
24410}
24411
24413 SDValue &Base,
24414 SDValue &Offset,
24416 SelectionDAG &DAG) const {
24417 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
24418 if (Op->getOpcode() != ISD::ADD)
24419 return false;
24420
24422 Base = LS->getBasePtr();
24423 else
24424 return false;
24425
24426 if (Base == Op->getOperand(0))
24427 Offset = Op->getOperand(1);
24428 else if (Base == Op->getOperand(1))
24429 Offset = Op->getOperand(0);
24430 else
24431 return false;
24432
24433 AM = ISD::POST_INC;
24434 return true;
24435 }
24436
24437 EVT VT;
24438 SDValue Ptr;
24439 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24440 VT = LD->getMemoryVT();
24441 Ptr = LD->getBasePtr();
24442 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24443 VT = ST->getMemoryVT();
24444 Ptr = ST->getBasePtr();
24445 } else
24446 return false;
24447
24448 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
24449 return false;
24450 // Post-indexing updates the base, so it's not a valid transform
24451 // if that's not the same as the load's pointer.
24452 if (Ptr != Base)
24453 return false;
24454
24455 AM = ISD::POST_INC;
24456 return true;
24457}
24458
24460 EVT VT) const {
24461 EVT SVT = VT.getScalarType();
24462
24463 if (!SVT.isSimple())
24464 return false;
24465
24466 switch (SVT.getSimpleVT().SimpleTy) {
24467 case MVT::f16:
24468 return VT.isVector() ? Subtarget.hasVInstructionsF16()
24469 : Subtarget.hasStdExtZfhOrZhinx();
24470 case MVT::f32:
24471 return Subtarget.hasStdExtFOrZfinx();
24472 case MVT::f64:
24473 return Subtarget.hasStdExtDOrZdinx();
24474 default:
24475 break;
24476 }
24477
24478 return false;
24479}
24480
24482 // Zacas will use amocas.w which does not require extension.
24483 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
24484}
24485
24487 const Constant *PersonalityFn) const {
24488 return RISCV::X10;
24489}
24490
24492 const Constant *PersonalityFn) const {
24493 return RISCV::X11;
24494}
24495
24497 // Return false to suppress the unnecessary extensions if the LibCall
24498 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
24499 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
24500 Type.getSizeInBits() < Subtarget.getXLen()))
24501 return false;
24502
24503 return true;
24504}
24505
24507 bool IsSigned) const {
24508 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
24509 return true;
24510
24511 return IsSigned;
24512}
24513
24515 SDValue C) const {
24516 // Check integral scalar types.
24517 if (!VT.isScalarInteger())
24518 return false;
24519
24520 // Omit the optimization if the sub target has the M extension and the data
24521 // size exceeds XLen.
24522 const bool HasZmmul = Subtarget.hasStdExtZmmul();
24523 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
24524 return false;
24525
24526 auto *ConstNode = cast<ConstantSDNode>(C);
24527 const APInt &Imm = ConstNode->getAPIntValue();
24528
24529 // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
24530 if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
24531 return false;
24532
24533 // Break the MUL to a SLLI and an ADD/SUB.
24534 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
24535 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
24536 return true;
24537
24538 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
24539 if (Subtarget.hasShlAdd(3) && !Imm.isSignedIntN(12) &&
24540 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
24541 (Imm - 8).isPowerOf2()))
24542 return true;
24543
24544 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
24545 // a pair of LUI/ADDI.
24546 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
24547 ConstNode->hasOneUse()) {
24548 APInt ImmS = Imm.ashr(Imm.countr_zero());
24549 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
24550 (1 - ImmS).isPowerOf2())
24551 return true;
24552 }
24553
24554 return false;
24555}
24556
24558 SDValue ConstNode) const {
24559 // Let the DAGCombiner decide for vectors.
24560 EVT VT = AddNode.getValueType();
24561 if (VT.isVector())
24562 return true;
24563
24564 // Let the DAGCombiner decide for larger types.
24565 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
24566 return true;
24567
24568 // It is worse if c1 is simm12 while c1*c2 is not.
24569 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
24570 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
24571 const APInt &C1 = C1Node->getAPIntValue();
24572 const APInt &C2 = C2Node->getAPIntValue();
24573 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
24574 return false;
24575
24576 // Default to true and let the DAGCombiner decide.
24577 return true;
24578}
24579
24581 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
24582 unsigned *Fast) const {
24583 if (!VT.isVector()) {
24584 if (Fast)
24585 *Fast = Subtarget.enableUnalignedScalarMem();
24586 return Subtarget.enableUnalignedScalarMem();
24587 }
24588
24589 // All vector implementations must support element alignment
24590 EVT ElemVT = VT.getVectorElementType();
24591 if (Alignment >= ElemVT.getStoreSize()) {
24592 if (Fast)
24593 *Fast = 1;
24594 return true;
24595 }
24596
24597 // Note: We lower an unmasked unaligned vector access to an equally sized
24598 // e8 element type access. Given this, we effectively support all unmasked
24599 // misaligned accesses. TODO: Work through the codegen implications of
24600 // allowing such accesses to be formed, and considered fast.
24601 if (Fast)
24602 *Fast = Subtarget.enableUnalignedVectorMem();
24603 return Subtarget.enableUnalignedVectorMem();
24604}
24605
24607 LLVMContext &Context, const MemOp &Op,
24608 const AttributeList &FuncAttributes) const {
24609 if (!Subtarget.hasVInstructions())
24610 return MVT::Other;
24611
24612 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
24613 return MVT::Other;
24614
24615 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
24616 // has an expansion threshold, and we want the number of hardware memory
24617 // operations to correspond roughly to that threshold. LMUL>1 operations
24618 // are typically expanded linearly internally, and thus correspond to more
24619 // than one actual memory operation. Note that store merging and load
24620 // combining will typically form larger LMUL operations from the LMUL1
24621 // operations emitted here, and that's okay because combining isn't
24622 // introducing new memory operations; it's just merging existing ones.
24623 // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
24624 const unsigned MinVLenInBytes =
24625 std::min(Subtarget.getRealMinVLen() / 8, 1024U);
24626
24627 if (Op.size() < MinVLenInBytes)
24628 // TODO: Figure out short memops. For the moment, do the default thing
24629 // which ends up using scalar sequences.
24630 return MVT::Other;
24631
24632 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
24633 // fixed vectors.
24634 if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
24635 return MVT::Other;
24636
24637 // Prefer i8 for non-zero memset as it allows us to avoid materializing
24638 // a large scalar constant and instead use vmv.v.x/i to do the
24639 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
24640 // maximize the chance we can encode the size in the vsetvli.
24641 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
24642 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
24643
24644 // Do we have sufficient alignment for our preferred VT? If not, revert
24645 // to largest size allowed by our alignment criteria.
24646 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
24647 Align RequiredAlign(PreferredVT.getStoreSize());
24648 if (Op.isFixedDstAlign())
24649 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
24650 if (Op.isMemcpy())
24651 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
24652 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
24653 }
24654 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
24655}
24656
24658 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
24659 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
24660 bool IsABIRegCopy = CC.has_value();
24661 EVT ValueVT = Val.getValueType();
24662
24663 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24664 if ((ValueVT == PairVT ||
24665 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24666 ValueVT == MVT::f64)) &&
24667 NumParts == 1 && PartVT == MVT::Untyped) {
24668 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24669 MVT XLenVT = Subtarget.getXLenVT();
24670 if (ValueVT == MVT::f64)
24671 Val = DAG.getBitcast(MVT::i64, Val);
24672 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
24673 // Always creating an MVT::Untyped part, so always use
24674 // RISCVISD::BuildGPRPair.
24675 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
24676 return true;
24677 }
24678
24679 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24680 PartVT == MVT::f32) {
24681 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
24682 // nan, and cast to f32.
24683 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
24684 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
24685 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
24686 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
24687 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24688 Parts[0] = Val;
24689 return true;
24690 }
24691
24692 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
24693#ifndef NDEBUG
24694 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
24695 [[maybe_unused]] unsigned ValLMUL =
24697 ValNF * RISCV::RVVBitsPerBlock);
24698 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
24699 [[maybe_unused]] unsigned PartLMUL =
24701 PartNF * RISCV::RVVBitsPerBlock);
24702 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
24703 "RISC-V vector tuple type only accepts same register class type "
24704 "TUPLE_INSERT");
24705#endif
24706
24707 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
24708 Val, DAG.getTargetConstant(0, DL, MVT::i32));
24709 Parts[0] = Val;
24710 return true;
24711 }
24712
24713 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24714 PartVT.isScalableVector()) {
24715 if (ValueVT.isFixedLengthVector()) {
24716 ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
24717 Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
24718 }
24719 LLVMContext &Context = *DAG.getContext();
24720 EVT ValueEltVT = ValueVT.getVectorElementType();
24721 EVT PartEltVT = PartVT.getVectorElementType();
24722 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24723 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24724 if (PartVTBitSize % ValueVTBitSize == 0) {
24725 assert(PartVTBitSize >= ValueVTBitSize);
24726 // If the element types are different, bitcast to the same element type of
24727 // PartVT first.
24728 // Give an example here, we want copy a <vscale x 1 x i8> value to
24729 // <vscale x 4 x i16>.
24730 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
24731 // subvector, then we can bitcast to <vscale x 4 x i16>.
24732 if (ValueEltVT != PartEltVT) {
24733 if (PartVTBitSize > ValueVTBitSize) {
24734 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24735 assert(Count != 0 && "The number of element should not be zero.");
24736 EVT SameEltTypeVT =
24737 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24738 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(SameEltTypeVT), Val, 0);
24739 }
24740 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24741 } else {
24742 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(PartVT), Val, 0);
24743 }
24744 Parts[0] = Val;
24745 return true;
24746 }
24747 }
24748
24749 return false;
24750}
24751
24753 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
24754 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
24755 bool IsABIRegCopy = CC.has_value();
24756
24757 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24758 if ((ValueVT == PairVT ||
24759 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24760 ValueVT == MVT::f64)) &&
24761 NumParts == 1 && PartVT == MVT::Untyped) {
24762 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24763 MVT XLenVT = Subtarget.getXLenVT();
24764
24765 SDValue Val = Parts[0];
24766 // Always starting with an MVT::Untyped part, so always use
24767 // RISCVISD::SplitGPRPair
24768 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
24769 Val);
24770 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
24771 Val.getValue(1));
24772 if (ValueVT == MVT::f64)
24773 Val = DAG.getBitcast(ValueVT, Val);
24774 return Val;
24775 }
24776
24777 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24778 PartVT == MVT::f32) {
24779 SDValue Val = Parts[0];
24780
24781 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
24782 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
24783 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
24784 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
24785 return Val;
24786 }
24787
24788 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24789 PartVT.isScalableVector()) {
24790 LLVMContext &Context = *DAG.getContext();
24791 SDValue Val = Parts[0];
24792 EVT ValueEltVT = ValueVT.getVectorElementType();
24793 EVT PartEltVT = PartVT.getVectorElementType();
24794 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24795 if (ValueVT.isFixedLengthVector())
24796 ValueVTBitSize = getContainerForFixedLengthVector(ValueVT.getSimpleVT())
24797 .getSizeInBits()
24799 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24800 if (PartVTBitSize % ValueVTBitSize == 0) {
24801 assert(PartVTBitSize >= ValueVTBitSize);
24802 EVT SameEltTypeVT = ValueVT;
24803 // If the element types are different, convert it to the same element type
24804 // of PartVT.
24805 // Give an example here, we want copy a <vscale x 1 x i8> value from
24806 // <vscale x 4 x i16>.
24807 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
24808 // then we can extract <vscale x 1 x i8>.
24809 if (ValueEltVT != PartEltVT) {
24810 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24811 assert(Count != 0 && "The number of element should not be zero.");
24812 SameEltTypeVT =
24813 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24814 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
24815 }
24816 if (ValueVT.isFixedLengthVector())
24817 Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);
24818 else
24819 Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
24820 return Val;
24821 }
24822 }
24823 return SDValue();
24824}
24825
24826bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
24827 // When aggressively optimizing for code size, we prefer to use a div
24828 // instruction, as it is usually smaller than the alternative sequence.
24829 // TODO: Add vector division?
24830 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
24831 return OptSize && !VT.isVector();
24832}
24833
24835 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
24836 // some situation.
24837 unsigned Opc = N->getOpcode();
24839 return false;
24840 return true;
24841}
24842
24843static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
24844 Module *M = IRB.GetInsertBlock()->getModule();
24845 Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(
24846 M, Intrinsic::thread_pointer, IRB.getPtrTy());
24847 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
24848 IRB.CreateCall(ThreadPointerFunc), Offset);
24849}
24850
24852 // Fuchsia provides a fixed TLS slot for the stack cookie.
24853 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
24854 if (Subtarget.isTargetFuchsia())
24855 return useTpOffset(IRB, -0x10);
24856
24857 // Android provides a fixed TLS slot for the stack cookie. See the definition
24858 // of TLS_SLOT_STACK_GUARD in
24859 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
24860 if (Subtarget.isTargetAndroid())
24861 return useTpOffset(IRB, -0x18);
24862
24863 Module *M = IRB.GetInsertBlock()->getModule();
24864
24865 if (M->getStackProtectorGuard() == "tls") {
24866 // Users must specify the offset explicitly
24867 int Offset = M->getStackProtectorGuardOffset();
24868 return useTpOffset(IRB, Offset);
24869 }
24870
24872}
24873
24875 Align Alignment) const {
24876 if (!Subtarget.hasVInstructions())
24877 return false;
24878
24879 // Only support fixed vectors if we know the minimum vector size.
24880 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
24881 return false;
24882
24883 EVT ScalarType = DataType.getScalarType();
24884 if (!isLegalElementTypeForRVV(ScalarType))
24885 return false;
24886
24887 if (!Subtarget.enableUnalignedVectorMem() &&
24888 Alignment < ScalarType.getStoreSize())
24889 return false;
24890
24891 return true;
24892}
24893
24897 const TargetInstrInfo *TII) const {
24898 assert(MBBI->isCall() && MBBI->getCFIType() &&
24899 "Invalid call instruction for a KCFI check");
24900 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
24901 MBBI->getOpcode()));
24902
24903 MachineOperand &Target = MBBI->getOperand(0);
24904 Target.setIsRenamable(false);
24905
24906 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
24907 .addReg(Target.getReg())
24908 .addImm(MBBI->getCFIType())
24909 .getInstr();
24910}
24911
24912#define GET_REGISTER_MATCHER
24913#include "RISCVGenAsmMatcher.inc"
24914
24917 const MachineFunction &MF) const {
24919 if (!Reg)
24921 if (!Reg)
24922 return Reg;
24923
24924 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
24925 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
24926 reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +
24927 StringRef(RegName) + "\"."));
24928 return Reg;
24929}
24930
24933 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
24934
24935 if (NontemporalInfo == nullptr)
24937
24938 // 1 for default value work as __RISCV_NTLH_ALL
24939 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
24940 // 3 -> __RISCV_NTLH_ALL_PRIVATE
24941 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
24942 // 5 -> __RISCV_NTLH_ALL
24943 int NontemporalLevel = 5;
24944 const MDNode *RISCVNontemporalInfo =
24945 I.getMetadata("riscv-nontemporal-domain");
24946 if (RISCVNontemporalInfo != nullptr)
24947 NontemporalLevel =
24949 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
24950 ->getValue())
24951 ->getZExtValue();
24952
24953 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
24954 "RISC-V target doesn't support this non-temporal domain.");
24955
24956 NontemporalLevel -= 2;
24958 if (NontemporalLevel & 0b1)
24959 Flags |= MONontemporalBit0;
24960 if (NontemporalLevel & 0b10)
24961 Flags |= MONontemporalBit1;
24962
24963 return Flags;
24964}
24965
24968
24969 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
24971 TargetFlags |= (NodeFlags & MONontemporalBit0);
24972 TargetFlags |= (NodeFlags & MONontemporalBit1);
24973 return TargetFlags;
24974}
24975
24977 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
24978 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
24979}
24980
24982 if (VT.isVector()) {
24983 EVT SVT = VT.getVectorElementType();
24984 // If the element type is legal we can use cpop.v if it is enabled.
24985 if (isLegalElementTypeForRVV(SVT))
24986 return Subtarget.hasStdExtZvbb();
24987 // Don't consider it fast if the type needs to be legalized or scalarized.
24988 return false;
24989 }
24990
24991 return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);
24992}
24993
24995 ISD::CondCode Cond) const {
24996 return isCtpopFast(VT) ? 0 : 1;
24997}
24998
25000 const Instruction *I) const {
25001 if (Subtarget.hasStdExtZalasr()) {
25002 if (Subtarget.hasStdExtZtso()) {
25003 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
25004 // should be lowered to plain load/store. The easiest way to do this is
25005 // to say we should insert fences for them, and the fence insertion code
25006 // will just not insert any fences
25007 auto *LI = dyn_cast<LoadInst>(I);
25008 auto *SI = dyn_cast<StoreInst>(I);
25009 if ((LI &&
25010 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
25011 (SI &&
25012 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
25013 // Here, this is a load or store which is seq_cst, and needs a .aq or
25014 // .rl therefore we shouldn't try to insert fences
25015 return false;
25016 }
25017 // Here, we are a TSO inst that isn't a seq_cst load/store
25018 return isa<LoadInst>(I) || isa<StoreInst>(I);
25019 }
25020 return false;
25021 }
25022 // Note that one specific case requires fence insertion for an
25023 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
25024 // than this hook due to limitations in the interface here.
25025 return isa<LoadInst>(I) || isa<StoreInst>(I);
25026}
25027
25029
25030 // GISel support is in progress or complete for these opcodes.
25031 unsigned Op = Inst.getOpcode();
25032 if (Op == Instruction::Add || Op == Instruction::Sub ||
25033 Op == Instruction::And || Op == Instruction::Or ||
25034 Op == Instruction::Xor || Op == Instruction::InsertElement ||
25035 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
25036 Op == Instruction::Freeze || Op == Instruction::Store)
25037 return false;
25038
25039 if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
25040 // Mark RVV intrinsic as supported.
25041 if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(II->getIntrinsicID())) {
25042 // GISel doesn't support tuple types yet.
25043 if (Inst.getType()->isRISCVVectorTupleTy())
25044 return true;
25045
25046 for (unsigned i = 0; i < II->arg_size(); ++i)
25047 if (II->getArgOperand(i)->getType()->isRISCVVectorTupleTy())
25048 return true;
25049
25050 return false;
25051 }
25052 }
25053
25054 if (Inst.getType()->isScalableTy())
25055 return true;
25056
25057 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
25058 if (Inst.getOperand(i)->getType()->isScalableTy() &&
25059 !isa<ReturnInst>(&Inst))
25060 return true;
25061
25062 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
25063 if (AI->getAllocatedType()->isScalableTy())
25064 return true;
25065 }
25066
25067 return false;
25068}
25069
25070SDValue
25071RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
25072 SelectionDAG &DAG,
25073 SmallVectorImpl<SDNode *> &Created) const {
25074 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
25075 if (isIntDivCheap(N->getValueType(0), Attr))
25076 return SDValue(N, 0); // Lower SDIV as SDIV
25077
25078 // Only perform this transform if short forward branch opt is supported.
25079 if (!Subtarget.hasShortForwardBranchOpt())
25080 return SDValue();
25081 EVT VT = N->getValueType(0);
25082 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
25083 return SDValue();
25084
25085 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
25086 if (Divisor.sgt(2048) || Divisor.slt(-2048))
25087 return SDValue();
25088 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
25089}
25090
25091bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
25092 EVT VT, const APInt &AndMask) const {
25093 if (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())
25094 return !Subtarget.hasBEXTILike() && AndMask.ugt(1024);
25096}
25097
25099 return Subtarget.getMinimumJumpTableEntries();
25100}
25101
25103 SDValue Value, SDValue Addr,
25104 int JTI,
25105 SelectionDAG &DAG) const {
25106 if (Subtarget.hasStdExtZicfilp()) {
25107 // When Zicfilp enabled, we need to use software guarded branch for jump
25108 // table branch.
25109 SDValue Chain = Value;
25110 // Jump table debug info is only needed if CodeView is enabled.
25112 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
25113 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
25114 }
25115 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
25116}
25117
25118// If an output pattern produces multiple instructions tablegen may pick an
25119// arbitrary type from an instructions destination register class to use for the
25120// VT of that MachineSDNode. This VT may be used to look up the representative
25121// register class. If the type isn't legal, the default implementation will
25122// not find a register class.
25123//
25124// Some integer types smaller than XLen are listed in the GPR register class to
25125// support isel patterns for GISel, but are not legal in SelectionDAG. The
25126// arbitrary type tablegen picks may be one of these smaller types.
25127//
25128// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
25129// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
25130std::pair<const TargetRegisterClass *, uint8_t>
25131RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
25132 MVT VT) const {
25133 switch (VT.SimpleTy) {
25134 default:
25135 break;
25136 case MVT::i8:
25137 case MVT::i16:
25138 case MVT::i32:
25140 case MVT::bf16:
25141 case MVT::f16:
25143 }
25144
25146}
25147
25149
25150#define GET_RISCVVIntrinsicsTable_IMPL
25151#include "RISCVGenSearchableTables.inc"
25152
25153} // namespace llvm::RISCVVIntrinsicsTable
25154
25156
25157 // If the function specifically requests inline stack probes, emit them.
25158 if (MF.getFunction().hasFnAttribute("probe-stack"))
25159 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
25160 "inline-asm";
25161
25162 return false;
25163}
25164
25166 Align StackAlign) const {
25167 // The default stack probe size is 4096 if the function has no
25168 // stack-probe-size attribute.
25169 const Function &Fn = MF.getFunction();
25170 unsigned StackProbeSize =
25171 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
25172 // Round down to the stack alignment.
25173 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
25174 return StackProbeSize ? StackProbeSize : StackAlign.value();
25175}
25176
25177SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
25178 SelectionDAG &DAG) const {
25180 if (!hasInlineStackProbe(MF))
25181 return SDValue();
25182
25183 MVT XLenVT = Subtarget.getXLenVT();
25184 // Get the inputs.
25185 SDValue Chain = Op.getOperand(0);
25186 SDValue Size = Op.getOperand(1);
25187
25189 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
25190 SDLoc dl(Op);
25191 EVT VT = Op.getValueType();
25192
25193 // Construct the new SP value in a GPR.
25194 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
25195 Chain = SP.getValue(1);
25196 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
25197 if (Align)
25198 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
25199 DAG.getSignedConstant(-Align->value(), dl, VT));
25200
25201 // Set the real SP to the new value with a probing loop.
25202 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
25203 return DAG.getMergeValues({SP, Chain}, dl);
25204}
25205
25208 MachineBasicBlock *MBB) const {
25209 MachineFunction &MF = *MBB->getParent();
25210 MachineBasicBlock::iterator MBBI = MI.getIterator();
25211 DebugLoc DL = MBB->findDebugLoc(MBBI);
25212 Register TargetReg = MI.getOperand(0).getReg();
25213
25214 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
25215 bool IsRV64 = Subtarget.is64Bit();
25216 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
25217 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
25218 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
25219
25220 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
25221 MachineBasicBlock *LoopTestMBB =
25222 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25223 MF.insert(MBBInsertPoint, LoopTestMBB);
25224 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25225 MF.insert(MBBInsertPoint, ExitMBB);
25226 Register SPReg = RISCV::X2;
25227 Register ScratchReg =
25228 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
25229
25230 // ScratchReg = ProbeSize
25231 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
25232
25233 // LoopTest:
25234 // SUB SP, SP, ProbeSize
25235 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
25236 .addReg(SPReg)
25237 .addReg(ScratchReg);
25238
25239 // s[d|w] zero, 0(sp)
25240 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
25241 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
25242 .addReg(RISCV::X0)
25243 .addReg(SPReg)
25244 .addImm(0);
25245
25246 // BLT TargetReg, SP, LoopTest
25247 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
25248 .addReg(TargetReg)
25249 .addReg(SPReg)
25250 .addMBB(LoopTestMBB);
25251
25252 // Adjust with: MV SP, TargetReg.
25253 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
25254 .addReg(TargetReg)
25255 .addImm(0);
25256
25257 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
25259
25260 LoopTestMBB->addSuccessor(ExitMBB);
25261 LoopTestMBB->addSuccessor(LoopTestMBB);
25262 MBB->addSuccessor(LoopTestMBB);
25263
25264 MI.eraseFromParent();
25265 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
25266 return ExitMBB->begin()->getParent();
25267}
25268
25270 if (Subtarget.hasStdExtFOrZfinx()) {
25271 static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};
25272 return RCRegs;
25273 }
25274 return {};
25275}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Definition CostModel.cpp:74
#define Check(C,...)
#define DEBUG_TYPE
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define CC_VLS_CASE(ABIVlen)
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
mir Rename Register Operands
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr MCPhysReg SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue OrigOp, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
static bool isZipOdd(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipodd idiom.
static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2)
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static bool isLegalBitRotate(ArrayRef< int > Mask, EVT VT, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static SDValue simplifyOp_VL(SDNode *N)
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isAlternating(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned Factor, bool RequiredPolarity)
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(3))
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< bool > ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, cl::desc("Swap add and addi in cases where the add may " "be combined with a shift"), cl::init(true))
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static void promoteVCIXScalar(SDValue Op, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static bool isSpanSplatShuffle(ArrayRef< int > Mask, int Span)
Return true for a mask which performs an arbitrary shuffle within the first span, and then repeats th...
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue combineOrToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue Op, SelectionDAG &DAG, unsigned Type)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
const uint64_t ModeMask64
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, SelectionDAG &DAG)
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static bool isValidVisniInsertExtractIndex(SDValue Idx)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B, SelectionDAG &DAG)
Given fixed length vectors A and B with equal element types, but possibly different number of element...
const uint32_t ModeMask32
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static SDValue combineVqdotAccum(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVCIXISDNodeWCHAIN(SDValue Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSimm12Constant(SDValue V)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static bool isElementRotate(const std::array< std::pair< int, int >, 2 > &SrcInfo, unsigned NumElts)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match a single source shuffle which is an identity except that some particular element is repeated.
static bool isNonZeroAVL(SDValue AVL)
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getQDOTXResultType(MVT OpVT)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue foldConcatVector(SDValue V1, SDValue V2)
If concat_vector(V1,V2) could be folded away to some existing vector source, return it.
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isZipEven(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipeven idiom.
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static SDValue combineOrAndToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
#define ROTR(x, n)
Definition SHA256.cpp:32
static bool isCommutative(Instruction *I, Value *ValWithUses)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1347
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1332
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1109
Class for arbitrary precision integers.
Definition APInt.h:78
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1201
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:209
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1644
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1396
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:510
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
bool isMask(unsigned numBits) const
Definition APInt.h:488
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:334
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1130
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:239
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1221
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
bool isFloatingPointOperation() const
BinOp getOperation() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This is an SDNode representing atomic operations.
const SDValue & getBasePtr() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & set()
Definition BitVector.h:370
bool all() const
all - Returns true if all bits are set.
Definition BitVector.h:194
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:479
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:237
unsigned size() const
Definition DenseMap.h:110
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:213
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:313
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:310
Tagged union holding either a T or a Error.
Definition Error.h:485
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Argument * getArg(unsigned i) const
Definition Function.h:884
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Helper struct to store a base, index and offset that forms an address.
bool isDSOLocal() const
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:1939
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
MCContext & getContext() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition Module.cpp:353
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtZfhOrZhinx() const
bool hasShlAdd(int64_t ShAmt) const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsBF16Minimal() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
bool hasBEXTILike() const
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
bool hasCZEROLike() const
unsigned getELen() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
static unsigned getRegClassIDForLMUL(RISCVVType::VLMUL LMul)
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Match a mask which "spreads" the leading elements of a vector evenly across the result.
static RISCVVType::VLMUL getLMUL(MVT VT)
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
std::optional< APInt > bitcastToAPInt() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static LLVM_ABI bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
LLVM_ABI std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual MVT getVPExplicitVectorLengthTy() const
Returns the type to be used for the EVL/AVL operand of VP nodes: ISD::VP_ADD, ISD::VP_SUB,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetOptions Options
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:774
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:347
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:62
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI bool isRISCVVectorTupleTy() const
Definition Type.cpp:147
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:231
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:257
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr bool isZero() const
Definition TypeSize.h:154
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:253
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:774
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:706
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:773
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:622
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:682
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:952
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:627
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:701
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition ISDOpcodes.h:672
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:648
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:690
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition ISDOpcodes.h:611
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:853
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:857
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
static constexpr unsigned RVVBytesPerBlock
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
Or< Preds... > m_AnyOf(const Preds &...preds)
auto m_Node(unsigned Opcode, const OpndPreds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
initializer< Ty > init(const Ty &Val)
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2474
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
bool isStrongerThanMonotonic(AtomicOrdering AO)
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
static const MachineMemOperand::Flags MONontemporalBit0
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1589
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1970
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
int isShifted359(T Value, int &Shift)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:325
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
CombineLevel
Definition DAGCombine.h:15
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
@ Xor
Bitwise or logical XOR of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1963
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1760
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1899
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:207
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
#define NC
Definition regutils.h:42
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition ValueTypes.h:402
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition ValueTypes.h:364
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:419
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:274
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:161
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:347
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:280
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Matching combinators.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...